xref: /unit/src/nxt_atomic.h (revision 0:a63ceefd6ab0)
1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #ifndef _NXT_ATOMIC_H_INCLUDED_
8 #define _NXT_ATOMIC_H_INCLUDED_
9 
10 
11 /*
12  * nxt_atomic_try_lock() must set an acquire barrier on lock.
13  * nxt_atomic_xchg() must set an acquire barrier.
14  * nxt_atomic_release() must set a release barrier.
15  */
16 
17 #if (NXT_HAVE_GCC_ATOMIC) /* GCC 4.1 builtin atomic operations */
18 
19 typedef intptr_t                    nxt_atomic_int_t;
20 typedef uintptr_t                   nxt_atomic_uint_t;
21 typedef volatile nxt_atomic_uint_t  nxt_atomic_t;
22 
23 /*
24  * __sync_bool_compare_and_swap() is a full barrier.
25  * __sync_lock_test_and_set() is an acquire barrier.
26  * __sync_lock_release() is a release barrier.
27  */
28 
29 #define                                                                       \
30 nxt_atomic_cmp_set(lock, cmp, set)                                            \
31     __sync_bool_compare_and_swap(lock, cmp, set)
32 
33 
34 #define                                                                       \
35 nxt_atomic_xchg(lock, set)                                                    \
36     __sync_lock_test_and_set(lock, set)
37 
38 
39 #define                                                                       \
40 nxt_atomic_fetch_add(value, add)                                              \
41     __sync_fetch_and_add(value, add)
42 
43 
44 #define                                                                       \
45 nxt_atomic_try_lock(lock)                                                     \
46     nxt_atomic_cmp_set(lock, 0, 1)
47 
48 
49 #define                                                                       \
50 nxt_atomic_release(lock)                                                      \
51     __sync_lock_release(lock)
52 
53 
54 #if (__i386__ || __i386 || __amd64__ || __amd64)
55 #define                                                                       \
56 nxt_cpu_pause()                                                               \
57     __asm__ ("pause")
58 
59 #else
60 #define                                                                       \
61 nxt_cpu_pause()
62 #endif
63 
64 
65 #elif (NXT_HAVE_SOLARIS_ATOMIC) /* Solaris 10 */
66 
67 #include <atomic.h>
68 
69 typedef long                        nxt_atomic_int_t;
70 typedef ulong_t                     nxt_atomic_uint_t;
71 typedef volatile nxt_atomic_uint_t  nxt_atomic_t;
72 
73 
74 #define                                                                       \
75 nxt_atomic_cmp_set(lock, cmp, set)                                            \
76     (atomic_cas_ulong(lock, cmp, set) == (ulong_t) cmp)
77 
78 
79 #define                                                                       \
80 nxt_atomic_xchg(lock, set)                                                    \
81     atomic_add_swap(lock, set)
82 
83 
84 #define                                                                       \
85 nxt_atomic_fetch_add(value, add)                                              \
86     (atomic_add_long_nv(value, add) - add)
87 
88 /*
89  * Solaris uses SPARC Total Store Order model.  In this model:
90  * 1) Each atomic load-store instruction behaves as if it were followed by
91  *    #LoadLoad, #LoadStore, and #StoreStore barriers.
92  * 2) Each load instruction behaves as if it were followed by
93  *    #LoadLoad and #LoadStore barriers.
94  * 3) Each store instruction behaves as if it were followed by
95  *    #StoreStore barrier.
96  *
97  * In X86_64 atomic instructions set a full barrier and usual instructions
98  * set implicit #LoadLoad, #LoadStore, and #StoreStore barriers.
99  *
100  * An acquire barrier requires at least #LoadLoad and #LoadStore barriers
101  * and they are provided by atomic load-store instruction.
102  *
103  * A release barrier requires at least #LoadStore and #StoreStore barriers,
104  * so a lock release does not require an explicit barrier: all load
105  * instructions in critical section is followed by implicit #LoadStore
106  * barrier and all store instructions are followed by implicit #StoreStore
107  * barrier.
108  */
109 
110 #define                                                                       \
111 nxt_atomic_try_lock(lock)                                                     \
112     nxt_atomic_cmp_set(lock, 0, 1)
113 
114 
115 #define                                                                       \
116 nxt_atomic_release(lock)                                                      \
117     *lock = 0;
118 
119 
120 /*
121  * The "rep; nop" is used instead of "pause" to omit the "[ PAUSE ]" hardware
122  * capability added by linker since Solaris ld.so.1 does not know about it:
123  *
124  *   ld.so.1: ...: fatal: hardware capability unsupported: 0x2000 [ PAUSE ]
125  */
126 
127 #if (__i386__ || __i386 || __amd64__ || __amd64)
128 #define                                                                       \
129 nxt_cpu_pause()                                                               \
130     __asm__ ("rep; nop")
131 
132 #else
133 #define                                                                       \
134 nxt_cpu_pause()
135 #endif
136 
137 
138 /* elif (NXT_HAVE_MACOSX_ATOMIC) */
139 
140 /*
141  * The atomic(3) interface has been introduced in MacOS 10.4 (Tiger) and
142  * extended in 10.5 (Leopard).  However its support is omitted because:
143  *
144  * 1) the interface is still incomplete:
145  *    *) there are OSAtomicAdd32Barrier() and OSAtomicAdd64Barrier()
146  *       but no OSAtomicAddLongBarrier();
147  *    *) there is no interface for XCHG operation.
148  *
149  * 2) the interface is tuned for non-SMP systems due to omission of the
150  *    LOCK prefix on single CPU system but nowadays MacOSX systems are at
151  *    least dual core.  Thus these indirect calls just add overhead as
152  *    compared with inlined atomic operations which are supported by GCC
153  *    and Clang in modern MacOSX systems.
154  */
155 
156 
157 #elif (NXT_HAVE_XLC_ATOMIC) /* XL C/C++ V8.0 for AIX */
158 
159 #if (NXT_64BIT)
160 
161 typedef long                        nxt_atomic_int_t;
162 typedef unsigned long               nxt_atomic_uint_t;
163 typedef volatile nxt_atomic_int_t   nxt_atomic_t;
164 
165 
166 nxt_inline nxt_bool_t
167 nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp,
168     nxt_atomic_int_t set)
169 {
170     nxt_atomic_int_t  old;
171 
172     old = cmp;
173 
174     return __compare_and_swaplp(lock, &old, set);
175 }
176 
177 
178 #define                                                                       \
179 nxt_atomic_xchg(lock, set)                                                    \
180     __fetch_and_swaplp(lock, set)
181 
182 
183 #define                                                                       \
184 nxt_atomic_fetch_add(value, add)                                              \
185     __fetch_and_addlp(value, add)
186 
187 
188 #else /* NXT_32BIT */
189 
190 typedef int                         nxt_atomic_int_t;
191 typedef unsigned int                nxt_atomic_uint_t;
192 typedef volatile nxt_atomic_int_t   nxt_atomic_t;
193 
194 
195 nxt_inline nxt_bool_t
196 nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp,
197     nxt_atomic_int_t set)
198 {
199     nxt_atomic_int_t  old;
200 
201     old = cmp;
202 
203     return __compare_and_swap(lock, &old, set);
204 }
205 
206 
207 #define                                                                       \
208 nxt_atomic_xchg(lock, set)                                                    \
209     __fetch_and_swap(lock, set)
210 
211 
212 #define                                                                       \
213 nxt_atomic_fetch_add(value, add)                                              \
214     __fetch_and_add(value, add)
215 
216 
217 #endif /* NXT_32BIT*/
218 
219 
220 /*
221  * __lwsync() is a "lwsync" instruction that sets #LoadLoad, #LoadStore,
222  * and #StoreStore barrier.
223  *
224  * __compare_and_swap() is a pair of "ldarx" and "stdcx" instructions.
225  * A "lwsync" does not set #StoreLoad barrier so it can not be used after
226  * this pair since a next load inside critical section can be performed
227  * after the "ldarx" instruction but before the "stdcx" instruction.
228  * However, this next load instruction will load correct data because
229  * otherwise the "ldarx/stdcx" pair will fail and this data will be
230  * discarded.  Nevertheless, the "isync" instruction is used for sure.
231  *
232  * A full barrier can be set with __sync(), a "sync" instruction, but there
233  * is also a faster __isync(), an "isync" instruction.  This instruction is
234  * not a memory barrier but an instruction barrier.  An "isync" instruction
235  * causes the processor to complete execution of all previous instructions
236  * and then to discard instructions (which may have begun execution) following
237  * the "isync".  After the "isync" is executed, the following instructions
238  * then begin execution.  The "isync" is used to ensure that the loads
239  * following entry into a critical section are not performed (because of
240  * aggressive out-of-order or speculative execution in the processor) until
241  * the lock is granted.
242  */
243 
244 nxt_inline nxt_bool_t
245 nxt_atomic_try_lock(nxt_atomic_t *lock)
246 {
247     if (nxt_atomic_cmp_set(lock, 0, 1)) {
248         __isync();
249         return 1;
250     }
251 
252     return 0;
253 }
254 
255 
256 #define                                                                       \
257 nxt_atomic_release(lock)                                                      \
258     do { __lwsync(); *lock = 0; } while (0)
259 
260 
261 #define                                                                       \
262 nxt_cpu_pause()
263 
264 
265 #endif /* NXT_HAVE_XLC_ATOMIC */
266 
267 
268 #endif /* _NXT_ATOMIC_H_INCLUDED_ */
269