xref: /unit/src/nxt_atomic.h (revision 2595:00349e24e6b7)
1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #ifndef _NXT_ATOMIC_H_INCLUDED_
8 #define _NXT_ATOMIC_H_INCLUDED_
9 
10 
11 /*
12  * nxt_atomic_try_lock() must set an acquire barrier on lock.
13  * nxt_atomic_xchg() must set an acquire barrier.
14  * nxt_atomic_release() must set a release barrier.
15  */
16 
17 #if (NXT_HAVE_GCC_ATOMIC) /* GCC 4.1 builtin atomic operations */
18 
19 typedef intptr_t                    nxt_atomic_int_t;
20 typedef uintptr_t                   nxt_atomic_uint_t;
21 typedef volatile nxt_atomic_uint_t  nxt_atomic_t;
22 
23 /*
24  * __sync_bool_compare_and_swap() is a full barrier.
25  * __sync_lock_test_and_set() is an acquire barrier.
26  * __sync_lock_release() is a release barrier.
27  */
28 
29 #define nxt_atomic_cmp_set(lock, cmp, set)                                    \
30     __sync_bool_compare_and_swap(lock, cmp, set)
31 
32 
33 #define nxt_atomic_xchg(lock, set)                                            \
34     __sync_lock_test_and_set(lock, set)
35 
36 
37 #define nxt_atomic_fetch_add(value, add)                                      \
38     __sync_fetch_and_add(value, add)
39 
40 
41 #define nxt_atomic_try_lock(lock)                                             \
42     nxt_atomic_cmp_set(lock, 0, 1)
43 
44 
45 #define nxt_atomic_release(lock)                                              \
46     __sync_lock_release(lock)
47 
48 
49 #define nxt_atomic_or_fetch(ptr, val)                                         \
50     __sync_or_and_fetch(ptr, val)
51 
52 
53 #define nxt_atomic_and_fetch(ptr, val)                                        \
54     __sync_and_and_fetch(ptr, val)
55 
56 
57 #if (__i386__ || __i386 || __amd64__ || __amd64)
58 #define nxt_cpu_pause()                                                       \
59     __asm__ ("pause")
60 
61 #elif (__aarch64__ || __arm64__)
62 #define nxt_cpu_pause()                                                       \
63     __asm__ ("isb")
64 
65 #else
66 #define nxt_cpu_pause()
67 #endif
68 
69 
70 #elif (NXT_HAVE_SOLARIS_ATOMIC) /* Solaris 10 */
71 
72 #include <atomic.h>
73 
74 typedef long                        nxt_atomic_int_t;
75 typedef ulong_t                     nxt_atomic_uint_t;
76 typedef volatile nxt_atomic_uint_t  nxt_atomic_t;
77 
78 
79 #define nxt_atomic_cmp_set(lock, cmp, set)                                    \
80     (atomic_cas_ulong(lock, cmp, set) == (ulong_t) cmp)
81 
82 
83 #define nxt_atomic_xchg(lock, set)                                            \
84     atomic_add_swap(lock, set)
85 
86 
87 #define nxt_atomic_fetch_add(value, add)                                      \
88     (atomic_add_long_nv(value, add) - add)
89 
90 
91 #define nxt_atomic_or_fetch(ptr, val)                                         \
92     atomic_or_ulong_nv(ptr, val)
93 
94 
95 #define nxt_atomic_and_fetch(ptr, val)                                        \
96     atomic_and_ulong_nv(ptr, val)
97 
98 
99 /*
100  * Solaris uses SPARC Total Store Order model.  In this model:
101  * 1) Each atomic load-store instruction behaves as if it were followed by
102  *    #LoadLoad, #LoadStore, and #StoreStore barriers.
103  * 2) Each load instruction behaves as if it were followed by
104  *    #LoadLoad and #LoadStore barriers.
105  * 3) Each store instruction behaves as if it were followed by
106  *    #StoreStore barrier.
107  *
108  * In X86_64 atomic instructions set a full barrier and usual instructions
109  * set implicit #LoadLoad, #LoadStore, and #StoreStore barriers.
110  *
111  * An acquire barrier requires at least #LoadLoad and #LoadStore barriers
112  * and they are provided by atomic load-store instruction.
113  *
114  * A release barrier requires at least #LoadStore and #StoreStore barriers,
115  * so a lock release does not require an explicit barrier: all load
116  * instructions in critical section is followed by implicit #LoadStore
117  * barrier and all store instructions are followed by implicit #StoreStore
118  * barrier.
119  */
120 
121 #define nxt_atomic_try_lock(lock)                                             \
122     nxt_atomic_cmp_set(lock, 0, 1)
123 
124 
125 #define nxt_atomic_release(lock)                                              \
126     *lock = 0;
127 
128 
129 /*
130  * The "rep; nop" is used instead of "pause" to omit the "[ PAUSE ]" hardware
131  * capability added by linker since Solaris ld.so.1 does not know about it:
132  *
133  *   ld.so.1: ...: fatal: hardware capability unsupported: 0x2000 [ PAUSE ]
134  */
135 
136 #if (__i386__ || __i386 || __amd64__ || __amd64)
137 #define nxt_cpu_pause()                                                       \
138     __asm__ ("rep; nop")
139 
140 #else
141 #define nxt_cpu_pause()
142 #endif
143 
144 
145 /* elif (NXT_HAVE_MACOSX_ATOMIC) */
146 
147 /*
148  * The atomic(3) interface has been introduced in MacOS 10.4 (Tiger) and
149  * extended in 10.5 (Leopard).  However its support is omitted because:
150  *
151  * 1) the interface is still incomplete:
152  *    *) there are OSAtomicAdd32Barrier() and OSAtomicAdd64Barrier()
153  *       but no OSAtomicAddLongBarrier();
154  *    *) there is no interface for XCHG operation.
155  *
156  * 2) the interface is tuned for non-SMP systems due to omission of the
157  *    LOCK prefix on single CPU system but nowadays MacOSX systems are at
158  *    least dual core.  Thus these indirect calls just add overhead as
159  *    compared with inlined atomic operations which are supported by GCC
160  *    and Clang in modern MacOSX systems.
161  */
162 
163 
164 #elif (NXT_HAVE_XLC_ATOMIC) /* XL C/C++ V8.0 for AIX */
165 
166 #if (NXT_64BIT)
167 
168 typedef long                        nxt_atomic_int_t;
169 typedef unsigned long               nxt_atomic_uint_t;
170 typedef volatile nxt_atomic_int_t   nxt_atomic_t;
171 
172 
173 nxt_inline nxt_bool_t
nxt_atomic_cmp_set(nxt_atomic_t * lock,nxt_atomic_int_t cmp,nxt_atomic_int_t set)174 nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp,
175     nxt_atomic_int_t set)
176 {
177     nxt_atomic_int_t  old;
178 
179     old = cmp;
180 
181     return __compare_and_swaplp(lock, &old, set);
182 }
183 
184 
185 #define nxt_atomic_xchg(lock, set)                                            \
186     __fetch_and_swaplp(lock, set)
187 
188 
189 #define nxt_atomic_fetch_add(value, add)                                      \
190     __fetch_and_addlp(value, add)
191 
192 
193 #else /* NXT_32BIT */
194 
195 typedef int                         nxt_atomic_int_t;
196 typedef unsigned int                nxt_atomic_uint_t;
197 typedef volatile nxt_atomic_int_t   nxt_atomic_t;
198 
199 
200 nxt_inline nxt_bool_t
nxt_atomic_cmp_set(nxt_atomic_t * lock,nxt_atomic_int_t cmp,nxt_atomic_int_t set)201 nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp,
202     nxt_atomic_int_t set)
203 {
204     nxt_atomic_int_t  old;
205 
206     old = cmp;
207 
208     return __compare_and_swap(lock, &old, set);
209 }
210 
211 
212 #define nxt_atomic_xchg(lock, set)                                            \
213     __fetch_and_swap(lock, set)
214 
215 
216 #define nxt_atomic_fetch_add(value, add)                                      \
217     __fetch_and_add(value, add)
218 
219 
220 #endif /* NXT_32BIT*/
221 
222 
223 /*
224  * __lwsync() is a "lwsync" instruction that sets #LoadLoad, #LoadStore,
225  * and #StoreStore barrier.
226  *
227  * __compare_and_swap() is a pair of "ldarx" and "stdcx" instructions.
228  * A "lwsync" does not set #StoreLoad barrier so it can not be used after
229  * this pair since a next load inside critical section can be performed
230  * after the "ldarx" instruction but before the "stdcx" instruction.
231  * However, this next load instruction will load correct data because
232  * otherwise the "ldarx/stdcx" pair will fail and this data will be
233  * discarded.  Nevertheless, the "isync" instruction is used for sure.
234  *
235  * A full barrier can be set with __sync(), a "sync" instruction, but there
236  * is also a faster __isync(), an "isync" instruction.  This instruction is
237  * not a memory barrier but an instruction barrier.  An "isync" instruction
238  * causes the processor to complete execution of all previous instructions
239  * and then to discard instructions (which may have begun execution) following
240  * the "isync".  After the "isync" is executed, the following instructions
241  * then begin execution.  The "isync" is used to ensure that the loads
242  * following entry into a critical section are not performed (because of
243  * aggressive out-of-order or speculative execution in the processor) until
244  * the lock is granted.
245  */
246 
247 nxt_inline nxt_bool_t
nxt_atomic_try_lock(nxt_atomic_t * lock)248 nxt_atomic_try_lock(nxt_atomic_t *lock)
249 {
250     if (nxt_atomic_cmp_set(lock, 0, 1)) {
251         __isync();
252         return 1;
253     }
254 
255     return 0;
256 }
257 
258 
259 #define nxt_atomic_release(lock)                                              \
260     do { __lwsync(); *lock = 0; } while (0)
261 
262 
263 #define nxt_cpu_pause()
264 
265 
266 #endif /* NXT_HAVE_XLC_ATOMIC */
267 
268 
269 #endif /* _NXT_ATOMIC_H_INCLUDED_ */
270