xref: /unit/src/nxt_atomic.h (revision 2084)
10Sigor@sysoev.ru 
20Sigor@sysoev.ru /*
30Sigor@sysoev.ru  * Copyright (C) Igor Sysoev
40Sigor@sysoev.ru  * Copyright (C) NGINX, Inc.
50Sigor@sysoev.ru  */
60Sigor@sysoev.ru 
70Sigor@sysoev.ru #ifndef _NXT_ATOMIC_H_INCLUDED_
80Sigor@sysoev.ru #define _NXT_ATOMIC_H_INCLUDED_
90Sigor@sysoev.ru 
100Sigor@sysoev.ru 
110Sigor@sysoev.ru /*
120Sigor@sysoev.ru  * nxt_atomic_try_lock() must set an acquire barrier on lock.
130Sigor@sysoev.ru  * nxt_atomic_xchg() must set an acquire barrier.
140Sigor@sysoev.ru  * nxt_atomic_release() must set a release barrier.
150Sigor@sysoev.ru  */
160Sigor@sysoev.ru 
170Sigor@sysoev.ru #if (NXT_HAVE_GCC_ATOMIC) /* GCC 4.1 builtin atomic operations */
180Sigor@sysoev.ru 
190Sigor@sysoev.ru typedef intptr_t                    nxt_atomic_int_t;
200Sigor@sysoev.ru typedef uintptr_t                   nxt_atomic_uint_t;
210Sigor@sysoev.ru typedef volatile nxt_atomic_uint_t  nxt_atomic_t;
220Sigor@sysoev.ru 
230Sigor@sysoev.ru /*
240Sigor@sysoev.ru  * __sync_bool_compare_and_swap() is a full barrier.
250Sigor@sysoev.ru  * __sync_lock_test_and_set() is an acquire barrier.
260Sigor@sysoev.ru  * __sync_lock_release() is a release barrier.
270Sigor@sysoev.ru  */
280Sigor@sysoev.ru 
29*2084Salx.manpages@gmail.com #define nxt_atomic_cmp_set(lock, cmp, set)                                    \
300Sigor@sysoev.ru     __sync_bool_compare_and_swap(lock, cmp, set)
310Sigor@sysoev.ru 
320Sigor@sysoev.ru 
33*2084Salx.manpages@gmail.com #define nxt_atomic_xchg(lock, set)                                            \
340Sigor@sysoev.ru     __sync_lock_test_and_set(lock, set)
350Sigor@sysoev.ru 
360Sigor@sysoev.ru 
37*2084Salx.manpages@gmail.com #define nxt_atomic_fetch_add(value, add)                                      \
380Sigor@sysoev.ru     __sync_fetch_and_add(value, add)
390Sigor@sysoev.ru 
400Sigor@sysoev.ru 
41*2084Salx.manpages@gmail.com #define nxt_atomic_try_lock(lock)                                             \
420Sigor@sysoev.ru     nxt_atomic_cmp_set(lock, 0, 1)
430Sigor@sysoev.ru 
440Sigor@sysoev.ru 
45*2084Salx.manpages@gmail.com #define nxt_atomic_release(lock)                                              \
460Sigor@sysoev.ru     __sync_lock_release(lock)
470Sigor@sysoev.ru 
480Sigor@sysoev.ru 
4937Smax.romanov@nginx.com #define nxt_atomic_or_fetch(ptr, val)                                         \
5037Smax.romanov@nginx.com     __sync_or_and_fetch(ptr, val)
5137Smax.romanov@nginx.com 
5237Smax.romanov@nginx.com 
5337Smax.romanov@nginx.com #define nxt_atomic_and_fetch(ptr, val)                                        \
5437Smax.romanov@nginx.com     __sync_and_and_fetch(ptr, val)
5537Smax.romanov@nginx.com 
5637Smax.romanov@nginx.com 
570Sigor@sysoev.ru #if (__i386__ || __i386 || __amd64__ || __amd64)
58*2084Salx.manpages@gmail.com #define nxt_cpu_pause()                                                       \
590Sigor@sysoev.ru     __asm__ ("pause")
600Sigor@sysoev.ru 
610Sigor@sysoev.ru #else
62*2084Salx.manpages@gmail.com #define nxt_cpu_pause()
630Sigor@sysoev.ru #endif
640Sigor@sysoev.ru 
650Sigor@sysoev.ru 
660Sigor@sysoev.ru #elif (NXT_HAVE_SOLARIS_ATOMIC) /* Solaris 10 */
670Sigor@sysoev.ru 
680Sigor@sysoev.ru #include <atomic.h>
690Sigor@sysoev.ru 
700Sigor@sysoev.ru typedef long                        nxt_atomic_int_t;
710Sigor@sysoev.ru typedef ulong_t                     nxt_atomic_uint_t;
720Sigor@sysoev.ru typedef volatile nxt_atomic_uint_t  nxt_atomic_t;
730Sigor@sysoev.ru 
740Sigor@sysoev.ru 
75*2084Salx.manpages@gmail.com #define nxt_atomic_cmp_set(lock, cmp, set)                                    \
760Sigor@sysoev.ru     (atomic_cas_ulong(lock, cmp, set) == (ulong_t) cmp)
770Sigor@sysoev.ru 
780Sigor@sysoev.ru 
79*2084Salx.manpages@gmail.com #define nxt_atomic_xchg(lock, set)                                            \
800Sigor@sysoev.ru     atomic_add_swap(lock, set)
810Sigor@sysoev.ru 
820Sigor@sysoev.ru 
83*2084Salx.manpages@gmail.com #define nxt_atomic_fetch_add(value, add)                                      \
840Sigor@sysoev.ru     (atomic_add_long_nv(value, add) - add)
850Sigor@sysoev.ru 
8637Smax.romanov@nginx.com 
8737Smax.romanov@nginx.com #define nxt_atomic_or_fetch(ptr, val)                                         \
8837Smax.romanov@nginx.com     atomic_or_ulong_nv(ptr, val)
8937Smax.romanov@nginx.com 
9037Smax.romanov@nginx.com 
9137Smax.romanov@nginx.com #define nxt_atomic_and_fetch(ptr, val)                                        \
9237Smax.romanov@nginx.com     atomic_and_ulong_nv(ptr, val)
9337Smax.romanov@nginx.com 
9437Smax.romanov@nginx.com 
950Sigor@sysoev.ru /*
960Sigor@sysoev.ru  * Solaris uses SPARC Total Store Order model.  In this model:
970Sigor@sysoev.ru  * 1) Each atomic load-store instruction behaves as if it were followed by
980Sigor@sysoev.ru  *    #LoadLoad, #LoadStore, and #StoreStore barriers.
990Sigor@sysoev.ru  * 2) Each load instruction behaves as if it were followed by
1000Sigor@sysoev.ru  *    #LoadLoad and #LoadStore barriers.
1010Sigor@sysoev.ru  * 3) Each store instruction behaves as if it were followed by
1020Sigor@sysoev.ru  *    #StoreStore barrier.
1030Sigor@sysoev.ru  *
1040Sigor@sysoev.ru  * In X86_64 atomic instructions set a full barrier and usual instructions
1050Sigor@sysoev.ru  * set implicit #LoadLoad, #LoadStore, and #StoreStore barriers.
1060Sigor@sysoev.ru  *
1070Sigor@sysoev.ru  * An acquire barrier requires at least #LoadLoad and #LoadStore barriers
1080Sigor@sysoev.ru  * and they are provided by atomic load-store instruction.
1090Sigor@sysoev.ru  *
1100Sigor@sysoev.ru  * A release barrier requires at least #LoadStore and #StoreStore barriers,
1110Sigor@sysoev.ru  * so a lock release does not require an explicit barrier: all load
1120Sigor@sysoev.ru  * instructions in critical section is followed by implicit #LoadStore
1130Sigor@sysoev.ru  * barrier and all store instructions are followed by implicit #StoreStore
1140Sigor@sysoev.ru  * barrier.
1150Sigor@sysoev.ru  */
1160Sigor@sysoev.ru 
117*2084Salx.manpages@gmail.com #define nxt_atomic_try_lock(lock)                                             \
1180Sigor@sysoev.ru     nxt_atomic_cmp_set(lock, 0, 1)
1190Sigor@sysoev.ru 
1200Sigor@sysoev.ru 
121*2084Salx.manpages@gmail.com #define nxt_atomic_release(lock)                                              \
1220Sigor@sysoev.ru     *lock = 0;
1230Sigor@sysoev.ru 
1240Sigor@sysoev.ru 
1250Sigor@sysoev.ru /*
1260Sigor@sysoev.ru  * The "rep; nop" is used instead of "pause" to omit the "[ PAUSE ]" hardware
1270Sigor@sysoev.ru  * capability added by linker since Solaris ld.so.1 does not know about it:
1280Sigor@sysoev.ru  *
1290Sigor@sysoev.ru  *   ld.so.1: ...: fatal: hardware capability unsupported: 0x2000 [ PAUSE ]
1300Sigor@sysoev.ru  */
1310Sigor@sysoev.ru 
1320Sigor@sysoev.ru #if (__i386__ || __i386 || __amd64__ || __amd64)
133*2084Salx.manpages@gmail.com #define nxt_cpu_pause()                                                       \
1340Sigor@sysoev.ru     __asm__ ("rep; nop")
1350Sigor@sysoev.ru 
1360Sigor@sysoev.ru #else
137*2084Salx.manpages@gmail.com #define nxt_cpu_pause()
1380Sigor@sysoev.ru #endif
1390Sigor@sysoev.ru 
1400Sigor@sysoev.ru 
1410Sigor@sysoev.ru /* elif (NXT_HAVE_MACOSX_ATOMIC) */
1420Sigor@sysoev.ru 
1430Sigor@sysoev.ru /*
1440Sigor@sysoev.ru  * The atomic(3) interface has been introduced in MacOS 10.4 (Tiger) and
1450Sigor@sysoev.ru  * extended in 10.5 (Leopard).  However its support is omitted because:
1460Sigor@sysoev.ru  *
1470Sigor@sysoev.ru  * 1) the interface is still incomplete:
1480Sigor@sysoev.ru  *    *) there are OSAtomicAdd32Barrier() and OSAtomicAdd64Barrier()
1490Sigor@sysoev.ru  *       but no OSAtomicAddLongBarrier();
1500Sigor@sysoev.ru  *    *) there is no interface for XCHG operation.
1510Sigor@sysoev.ru  *
1520Sigor@sysoev.ru  * 2) the interface is tuned for non-SMP systems due to omission of the
1530Sigor@sysoev.ru  *    LOCK prefix on single CPU system but nowadays MacOSX systems are at
1540Sigor@sysoev.ru  *    least dual core.  Thus these indirect calls just add overhead as
1550Sigor@sysoev.ru  *    compared with inlined atomic operations which are supported by GCC
1560Sigor@sysoev.ru  *    and Clang in modern MacOSX systems.
1570Sigor@sysoev.ru  */
1580Sigor@sysoev.ru 
1590Sigor@sysoev.ru 
1600Sigor@sysoev.ru #elif (NXT_HAVE_XLC_ATOMIC) /* XL C/C++ V8.0 for AIX */
1610Sigor@sysoev.ru 
1620Sigor@sysoev.ru #if (NXT_64BIT)
1630Sigor@sysoev.ru 
1640Sigor@sysoev.ru typedef long                        nxt_atomic_int_t;
1650Sigor@sysoev.ru typedef unsigned long               nxt_atomic_uint_t;
1660Sigor@sysoev.ru typedef volatile nxt_atomic_int_t   nxt_atomic_t;
1670Sigor@sysoev.ru 
1680Sigor@sysoev.ru 
1690Sigor@sysoev.ru nxt_inline nxt_bool_t
1700Sigor@sysoev.ru nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp,
1710Sigor@sysoev.ru     nxt_atomic_int_t set)
1720Sigor@sysoev.ru {
1730Sigor@sysoev.ru     nxt_atomic_int_t  old;
1740Sigor@sysoev.ru 
1750Sigor@sysoev.ru     old = cmp;
1760Sigor@sysoev.ru 
1770Sigor@sysoev.ru     return __compare_and_swaplp(lock, &old, set);
1780Sigor@sysoev.ru }
1790Sigor@sysoev.ru 
1800Sigor@sysoev.ru 
181*2084Salx.manpages@gmail.com #define nxt_atomic_xchg(lock, set)                                            \
1820Sigor@sysoev.ru     __fetch_and_swaplp(lock, set)
1830Sigor@sysoev.ru 
1840Sigor@sysoev.ru 
185*2084Salx.manpages@gmail.com #define nxt_atomic_fetch_add(value, add)                                      \
1860Sigor@sysoev.ru     __fetch_and_addlp(value, add)
1870Sigor@sysoev.ru 
1880Sigor@sysoev.ru 
1890Sigor@sysoev.ru #else /* NXT_32BIT */
1900Sigor@sysoev.ru 
1910Sigor@sysoev.ru typedef int                         nxt_atomic_int_t;
1920Sigor@sysoev.ru typedef unsigned int                nxt_atomic_uint_t;
1930Sigor@sysoev.ru typedef volatile nxt_atomic_int_t   nxt_atomic_t;
1940Sigor@sysoev.ru 
1950Sigor@sysoev.ru 
1960Sigor@sysoev.ru nxt_inline nxt_bool_t
1970Sigor@sysoev.ru nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp,
1980Sigor@sysoev.ru     nxt_atomic_int_t set)
1990Sigor@sysoev.ru {
2000Sigor@sysoev.ru     nxt_atomic_int_t  old;
2010Sigor@sysoev.ru 
2020Sigor@sysoev.ru     old = cmp;
2030Sigor@sysoev.ru 
2040Sigor@sysoev.ru     return __compare_and_swap(lock, &old, set);
2050Sigor@sysoev.ru }
2060Sigor@sysoev.ru 
2070Sigor@sysoev.ru 
208*2084Salx.manpages@gmail.com #define nxt_atomic_xchg(lock, set)                                            \
2090Sigor@sysoev.ru     __fetch_and_swap(lock, set)
2100Sigor@sysoev.ru 
2110Sigor@sysoev.ru 
212*2084Salx.manpages@gmail.com #define nxt_atomic_fetch_add(value, add)                                      \
2130Sigor@sysoev.ru     __fetch_and_add(value, add)
2140Sigor@sysoev.ru 
2150Sigor@sysoev.ru 
2160Sigor@sysoev.ru #endif /* NXT_32BIT*/
2170Sigor@sysoev.ru 
2180Sigor@sysoev.ru 
2190Sigor@sysoev.ru /*
2200Sigor@sysoev.ru  * __lwsync() is a "lwsync" instruction that sets #LoadLoad, #LoadStore,
2210Sigor@sysoev.ru  * and #StoreStore barrier.
2220Sigor@sysoev.ru  *
2230Sigor@sysoev.ru  * __compare_and_swap() is a pair of "ldarx" and "stdcx" instructions.
2240Sigor@sysoev.ru  * A "lwsync" does not set #StoreLoad barrier so it can not be used after
2250Sigor@sysoev.ru  * this pair since a next load inside critical section can be performed
2260Sigor@sysoev.ru  * after the "ldarx" instruction but before the "stdcx" instruction.
2270Sigor@sysoev.ru  * However, this next load instruction will load correct data because
2280Sigor@sysoev.ru  * otherwise the "ldarx/stdcx" pair will fail and this data will be
2290Sigor@sysoev.ru  * discarded.  Nevertheless, the "isync" instruction is used for sure.
2300Sigor@sysoev.ru  *
2310Sigor@sysoev.ru  * A full barrier can be set with __sync(), a "sync" instruction, but there
2320Sigor@sysoev.ru  * is also a faster __isync(), an "isync" instruction.  This instruction is
2330Sigor@sysoev.ru  * not a memory barrier but an instruction barrier.  An "isync" instruction
2340Sigor@sysoev.ru  * causes the processor to complete execution of all previous instructions
2350Sigor@sysoev.ru  * and then to discard instructions (which may have begun execution) following
2360Sigor@sysoev.ru  * the "isync".  After the "isync" is executed, the following instructions
2370Sigor@sysoev.ru  * then begin execution.  The "isync" is used to ensure that the loads
2380Sigor@sysoev.ru  * following entry into a critical section are not performed (because of
2390Sigor@sysoev.ru  * aggressive out-of-order or speculative execution in the processor) until
2400Sigor@sysoev.ru  * the lock is granted.
2410Sigor@sysoev.ru  */
2420Sigor@sysoev.ru 
2430Sigor@sysoev.ru nxt_inline nxt_bool_t
2440Sigor@sysoev.ru nxt_atomic_try_lock(nxt_atomic_t *lock)
2450Sigor@sysoev.ru {
2460Sigor@sysoev.ru     if (nxt_atomic_cmp_set(lock, 0, 1)) {
2470Sigor@sysoev.ru         __isync();
2480Sigor@sysoev.ru         return 1;
2490Sigor@sysoev.ru     }
2500Sigor@sysoev.ru 
2510Sigor@sysoev.ru     return 0;
2520Sigor@sysoev.ru }
2530Sigor@sysoev.ru 
2540Sigor@sysoev.ru 
255*2084Salx.manpages@gmail.com #define nxt_atomic_release(lock)                                              \
2560Sigor@sysoev.ru     do { __lwsync(); *lock = 0; } while (0)
2570Sigor@sysoev.ru 
2580Sigor@sysoev.ru 
259*2084Salx.manpages@gmail.com #define nxt_cpu_pause()
2600Sigor@sysoev.ru 
2610Sigor@sysoev.ru 
2620Sigor@sysoev.ru #endif /* NXT_HAVE_XLC_ATOMIC */
2630Sigor@sysoev.ru 
2640Sigor@sysoev.ru 
2650Sigor@sysoev.ru #endif /* _NXT_ATOMIC_H_INCLUDED_ */
266