1
2 /*
3 * Copyright (C) Igor Sysoev
4 * Copyright (C) NGINX, Inc.
5 */
6
7 #ifndef _NXT_ATOMIC_H_INCLUDED_
8 #define _NXT_ATOMIC_H_INCLUDED_
9
10
11 /*
12 * nxt_atomic_try_lock() must set an acquire barrier on lock.
13 * nxt_atomic_xchg() must set an acquire barrier.
14 * nxt_atomic_release() must set a release barrier.
15 */
16
17 #if (NXT_HAVE_GCC_ATOMIC) /* GCC 4.1 builtin atomic operations */
18
19 typedef intptr_t nxt_atomic_int_t;
20 typedef uintptr_t nxt_atomic_uint_t;
21 typedef volatile nxt_atomic_uint_t nxt_atomic_t;
22
23 /*
24 * __sync_bool_compare_and_swap() is a full barrier.
25 * __sync_lock_test_and_set() is an acquire barrier.
26 * __sync_lock_release() is a release barrier.
27 */
28
29 #define nxt_atomic_cmp_set(lock, cmp, set) \
30 __sync_bool_compare_and_swap(lock, cmp, set)
31
32
33 #define nxt_atomic_xchg(lock, set) \
34 __sync_lock_test_and_set(lock, set)
35
36
37 #define nxt_atomic_fetch_add(value, add) \
38 __sync_fetch_and_add(value, add)
39
40
41 #define nxt_atomic_try_lock(lock) \
42 nxt_atomic_cmp_set(lock, 0, 1)
43
44
45 #define nxt_atomic_release(lock) \
46 __sync_lock_release(lock)
47
48
49 #define nxt_atomic_or_fetch(ptr, val) \
50 __sync_or_and_fetch(ptr, val)
51
52
53 #define nxt_atomic_and_fetch(ptr, val) \
54 __sync_and_and_fetch(ptr, val)
55
56
57 #if (__i386__ || __i386 || __amd64__ || __amd64)
58 #define nxt_cpu_pause() \
59 __asm__ ("pause")
60
61 #elif (__aarch64__ || __arm64__)
62 #define nxt_cpu_pause() \
63 __asm__ ("isb")
64
65 #else
66 #define nxt_cpu_pause()
67 #endif
68
69
70 #elif (NXT_HAVE_SOLARIS_ATOMIC) /* Solaris 10 */
71
72 #include <atomic.h>
73
74 typedef long nxt_atomic_int_t;
75 typedef ulong_t nxt_atomic_uint_t;
76 typedef volatile nxt_atomic_uint_t nxt_atomic_t;
77
78
79 #define nxt_atomic_cmp_set(lock, cmp, set) \
80 (atomic_cas_ulong(lock, cmp, set) == (ulong_t) cmp)
81
82
83 #define nxt_atomic_xchg(lock, set) \
84 atomic_add_swap(lock, set)
85
86
87 #define nxt_atomic_fetch_add(value, add) \
88 (atomic_add_long_nv(value, add) - add)
89
90
91 #define nxt_atomic_or_fetch(ptr, val) \
92 atomic_or_ulong_nv(ptr, val)
93
94
95 #define nxt_atomic_and_fetch(ptr, val) \
96 atomic_and_ulong_nv(ptr, val)
97
98
99 /*
100 * Solaris uses SPARC Total Store Order model. In this model:
101 * 1) Each atomic load-store instruction behaves as if it were followed by
102 * #LoadLoad, #LoadStore, and #StoreStore barriers.
103 * 2) Each load instruction behaves as if it were followed by
104 * #LoadLoad and #LoadStore barriers.
105 * 3) Each store instruction behaves as if it were followed by
106 * #StoreStore barrier.
107 *
108 * In X86_64 atomic instructions set a full barrier and usual instructions
109 * set implicit #LoadLoad, #LoadStore, and #StoreStore barriers.
110 *
111 * An acquire barrier requires at least #LoadLoad and #LoadStore barriers
112 * and they are provided by atomic load-store instruction.
113 *
114 * A release barrier requires at least #LoadStore and #StoreStore barriers,
115 * so a lock release does not require an explicit barrier: all load
116 * instructions in critical section is followed by implicit #LoadStore
117 * barrier and all store instructions are followed by implicit #StoreStore
118 * barrier.
119 */
120
121 #define nxt_atomic_try_lock(lock) \
122 nxt_atomic_cmp_set(lock, 0, 1)
123
124
125 #define nxt_atomic_release(lock) \
126 *lock = 0;
127
128
129 /*
130 * The "rep; nop" is used instead of "pause" to omit the "[ PAUSE ]" hardware
131 * capability added by linker since Solaris ld.so.1 does not know about it:
132 *
133 * ld.so.1: ...: fatal: hardware capability unsupported: 0x2000 [ PAUSE ]
134 */
135
136 #if (__i386__ || __i386 || __amd64__ || __amd64)
137 #define nxt_cpu_pause() \
138 __asm__ ("rep; nop")
139
140 #else
141 #define nxt_cpu_pause()
142 #endif
143
144
145 /* elif (NXT_HAVE_MACOSX_ATOMIC) */
146
147 /*
148 * The atomic(3) interface has been introduced in MacOS 10.4 (Tiger) and
149 * extended in 10.5 (Leopard). However its support is omitted because:
150 *
151 * 1) the interface is still incomplete:
152 * *) there are OSAtomicAdd32Barrier() and OSAtomicAdd64Barrier()
153 * but no OSAtomicAddLongBarrier();
154 * *) there is no interface for XCHG operation.
155 *
156 * 2) the interface is tuned for non-SMP systems due to omission of the
157 * LOCK prefix on single CPU system but nowadays MacOSX systems are at
158 * least dual core. Thus these indirect calls just add overhead as
159 * compared with inlined atomic operations which are supported by GCC
160 * and Clang in modern MacOSX systems.
161 */
162
163
164 #elif (NXT_HAVE_XLC_ATOMIC) /* XL C/C++ V8.0 for AIX */
165
166 #if (NXT_64BIT)
167
168 typedef long nxt_atomic_int_t;
169 typedef unsigned long nxt_atomic_uint_t;
170 typedef volatile nxt_atomic_int_t nxt_atomic_t;
171
172
173 nxt_inline nxt_bool_t
nxt_atomic_cmp_set(nxt_atomic_t * lock,nxt_atomic_int_t cmp,nxt_atomic_int_t set)174 nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp,
175 nxt_atomic_int_t set)
176 {
177 nxt_atomic_int_t old;
178
179 old = cmp;
180
181 return __compare_and_swaplp(lock, &old, set);
182 }
183
184
185 #define nxt_atomic_xchg(lock, set) \
186 __fetch_and_swaplp(lock, set)
187
188
189 #define nxt_atomic_fetch_add(value, add) \
190 __fetch_and_addlp(value, add)
191
192
193 #else /* NXT_32BIT */
194
195 typedef int nxt_atomic_int_t;
196 typedef unsigned int nxt_atomic_uint_t;
197 typedef volatile nxt_atomic_int_t nxt_atomic_t;
198
199
200 nxt_inline nxt_bool_t
nxt_atomic_cmp_set(nxt_atomic_t * lock,nxt_atomic_int_t cmp,nxt_atomic_int_t set)201 nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp,
202 nxt_atomic_int_t set)
203 {
204 nxt_atomic_int_t old;
205
206 old = cmp;
207
208 return __compare_and_swap(lock, &old, set);
209 }
210
211
212 #define nxt_atomic_xchg(lock, set) \
213 __fetch_and_swap(lock, set)
214
215
216 #define nxt_atomic_fetch_add(value, add) \
217 __fetch_and_add(value, add)
218
219
220 #endif /* NXT_32BIT*/
221
222
223 /*
224 * __lwsync() is a "lwsync" instruction that sets #LoadLoad, #LoadStore,
225 * and #StoreStore barrier.
226 *
227 * __compare_and_swap() is a pair of "ldarx" and "stdcx" instructions.
228 * A "lwsync" does not set #StoreLoad barrier so it can not be used after
229 * this pair since a next load inside critical section can be performed
230 * after the "ldarx" instruction but before the "stdcx" instruction.
231 * However, this next load instruction will load correct data because
232 * otherwise the "ldarx/stdcx" pair will fail and this data will be
233 * discarded. Nevertheless, the "isync" instruction is used for sure.
234 *
235 * A full barrier can be set with __sync(), a "sync" instruction, but there
236 * is also a faster __isync(), an "isync" instruction. This instruction is
237 * not a memory barrier but an instruction barrier. An "isync" instruction
238 * causes the processor to complete execution of all previous instructions
239 * and then to discard instructions (which may have begun execution) following
240 * the "isync". After the "isync" is executed, the following instructions
241 * then begin execution. The "isync" is used to ensure that the loads
242 * following entry into a critical section are not performed (because of
243 * aggressive out-of-order or speculative execution in the processor) until
244 * the lock is granted.
245 */
246
247 nxt_inline nxt_bool_t
nxt_atomic_try_lock(nxt_atomic_t * lock)248 nxt_atomic_try_lock(nxt_atomic_t *lock)
249 {
250 if (nxt_atomic_cmp_set(lock, 0, 1)) {
251 __isync();
252 return 1;
253 }
254
255 return 0;
256 }
257
258
259 #define nxt_atomic_release(lock) \
260 do { __lwsync(); *lock = 0; } while (0)
261
262
263 #define nxt_cpu_pause()
264
265
266 #endif /* NXT_HAVE_XLC_ATOMIC */
267
268
269 #endif /* _NXT_ATOMIC_H_INCLUDED_ */
270