10Sigor@sysoev.ru 20Sigor@sysoev.ru /* 30Sigor@sysoev.ru * Copyright (C) Igor Sysoev 40Sigor@sysoev.ru * Copyright (C) NGINX, Inc. 50Sigor@sysoev.ru */ 60Sigor@sysoev.ru 70Sigor@sysoev.ru #ifndef _NXT_ATOMIC_H_INCLUDED_ 80Sigor@sysoev.ru #define _NXT_ATOMIC_H_INCLUDED_ 90Sigor@sysoev.ru 100Sigor@sysoev.ru 110Sigor@sysoev.ru /* 120Sigor@sysoev.ru * nxt_atomic_try_lock() must set an acquire barrier on lock. 130Sigor@sysoev.ru * nxt_atomic_xchg() must set an acquire barrier. 140Sigor@sysoev.ru * nxt_atomic_release() must set a release barrier. 150Sigor@sysoev.ru */ 160Sigor@sysoev.ru 170Sigor@sysoev.ru #if (NXT_HAVE_GCC_ATOMIC) /* GCC 4.1 builtin atomic operations */ 180Sigor@sysoev.ru 190Sigor@sysoev.ru typedef intptr_t nxt_atomic_int_t; 200Sigor@sysoev.ru typedef uintptr_t nxt_atomic_uint_t; 210Sigor@sysoev.ru typedef volatile nxt_atomic_uint_t nxt_atomic_t; 220Sigor@sysoev.ru 230Sigor@sysoev.ru /* 240Sigor@sysoev.ru * __sync_bool_compare_and_swap() is a full barrier. 250Sigor@sysoev.ru * __sync_lock_test_and_set() is an acquire barrier. 260Sigor@sysoev.ru * __sync_lock_release() is a release barrier. 270Sigor@sysoev.ru */ 280Sigor@sysoev.ru 29*2084Salx.manpages@gmail.com #define nxt_atomic_cmp_set(lock, cmp, set) \ 300Sigor@sysoev.ru __sync_bool_compare_and_swap(lock, cmp, set) 310Sigor@sysoev.ru 320Sigor@sysoev.ru 33*2084Salx.manpages@gmail.com #define nxt_atomic_xchg(lock, set) \ 340Sigor@sysoev.ru __sync_lock_test_and_set(lock, set) 350Sigor@sysoev.ru 360Sigor@sysoev.ru 37*2084Salx.manpages@gmail.com #define nxt_atomic_fetch_add(value, add) \ 380Sigor@sysoev.ru __sync_fetch_and_add(value, add) 390Sigor@sysoev.ru 400Sigor@sysoev.ru 41*2084Salx.manpages@gmail.com #define nxt_atomic_try_lock(lock) \ 420Sigor@sysoev.ru nxt_atomic_cmp_set(lock, 0, 1) 430Sigor@sysoev.ru 440Sigor@sysoev.ru 45*2084Salx.manpages@gmail.com #define nxt_atomic_release(lock) \ 460Sigor@sysoev.ru __sync_lock_release(lock) 470Sigor@sysoev.ru 480Sigor@sysoev.ru 4937Smax.romanov@nginx.com #define nxt_atomic_or_fetch(ptr, val) \ 5037Smax.romanov@nginx.com __sync_or_and_fetch(ptr, val) 5137Smax.romanov@nginx.com 5237Smax.romanov@nginx.com 5337Smax.romanov@nginx.com #define nxt_atomic_and_fetch(ptr, val) \ 5437Smax.romanov@nginx.com __sync_and_and_fetch(ptr, val) 5537Smax.romanov@nginx.com 5637Smax.romanov@nginx.com 570Sigor@sysoev.ru #if (__i386__ || __i386 || __amd64__ || __amd64) 58*2084Salx.manpages@gmail.com #define nxt_cpu_pause() \ 590Sigor@sysoev.ru __asm__ ("pause") 600Sigor@sysoev.ru 610Sigor@sysoev.ru #else 62*2084Salx.manpages@gmail.com #define nxt_cpu_pause() 630Sigor@sysoev.ru #endif 640Sigor@sysoev.ru 650Sigor@sysoev.ru 660Sigor@sysoev.ru #elif (NXT_HAVE_SOLARIS_ATOMIC) /* Solaris 10 */ 670Sigor@sysoev.ru 680Sigor@sysoev.ru #include <atomic.h> 690Sigor@sysoev.ru 700Sigor@sysoev.ru typedef long nxt_atomic_int_t; 710Sigor@sysoev.ru typedef ulong_t nxt_atomic_uint_t; 720Sigor@sysoev.ru typedef volatile nxt_atomic_uint_t nxt_atomic_t; 730Sigor@sysoev.ru 740Sigor@sysoev.ru 75*2084Salx.manpages@gmail.com #define nxt_atomic_cmp_set(lock, cmp, set) \ 760Sigor@sysoev.ru (atomic_cas_ulong(lock, cmp, set) == (ulong_t) cmp) 770Sigor@sysoev.ru 780Sigor@sysoev.ru 79*2084Salx.manpages@gmail.com #define nxt_atomic_xchg(lock, set) \ 800Sigor@sysoev.ru atomic_add_swap(lock, set) 810Sigor@sysoev.ru 820Sigor@sysoev.ru 83*2084Salx.manpages@gmail.com #define nxt_atomic_fetch_add(value, add) \ 840Sigor@sysoev.ru (atomic_add_long_nv(value, add) - add) 850Sigor@sysoev.ru 8637Smax.romanov@nginx.com 8737Smax.romanov@nginx.com #define nxt_atomic_or_fetch(ptr, val) \ 8837Smax.romanov@nginx.com atomic_or_ulong_nv(ptr, val) 8937Smax.romanov@nginx.com 9037Smax.romanov@nginx.com 9137Smax.romanov@nginx.com #define nxt_atomic_and_fetch(ptr, val) \ 9237Smax.romanov@nginx.com atomic_and_ulong_nv(ptr, val) 9337Smax.romanov@nginx.com 9437Smax.romanov@nginx.com 950Sigor@sysoev.ru /* 960Sigor@sysoev.ru * Solaris uses SPARC Total Store Order model. In this model: 970Sigor@sysoev.ru * 1) Each atomic load-store instruction behaves as if it were followed by 980Sigor@sysoev.ru * #LoadLoad, #LoadStore, and #StoreStore barriers. 990Sigor@sysoev.ru * 2) Each load instruction behaves as if it were followed by 1000Sigor@sysoev.ru * #LoadLoad and #LoadStore barriers. 1010Sigor@sysoev.ru * 3) Each store instruction behaves as if it were followed by 1020Sigor@sysoev.ru * #StoreStore barrier. 1030Sigor@sysoev.ru * 1040Sigor@sysoev.ru * In X86_64 atomic instructions set a full barrier and usual instructions 1050Sigor@sysoev.ru * set implicit #LoadLoad, #LoadStore, and #StoreStore barriers. 1060Sigor@sysoev.ru * 1070Sigor@sysoev.ru * An acquire barrier requires at least #LoadLoad and #LoadStore barriers 1080Sigor@sysoev.ru * and they are provided by atomic load-store instruction. 1090Sigor@sysoev.ru * 1100Sigor@sysoev.ru * A release barrier requires at least #LoadStore and #StoreStore barriers, 1110Sigor@sysoev.ru * so a lock release does not require an explicit barrier: all load 1120Sigor@sysoev.ru * instructions in critical section is followed by implicit #LoadStore 1130Sigor@sysoev.ru * barrier and all store instructions are followed by implicit #StoreStore 1140Sigor@sysoev.ru * barrier. 1150Sigor@sysoev.ru */ 1160Sigor@sysoev.ru 117*2084Salx.manpages@gmail.com #define nxt_atomic_try_lock(lock) \ 1180Sigor@sysoev.ru nxt_atomic_cmp_set(lock, 0, 1) 1190Sigor@sysoev.ru 1200Sigor@sysoev.ru 121*2084Salx.manpages@gmail.com #define nxt_atomic_release(lock) \ 1220Sigor@sysoev.ru *lock = 0; 1230Sigor@sysoev.ru 1240Sigor@sysoev.ru 1250Sigor@sysoev.ru /* 1260Sigor@sysoev.ru * The "rep; nop" is used instead of "pause" to omit the "[ PAUSE ]" hardware 1270Sigor@sysoev.ru * capability added by linker since Solaris ld.so.1 does not know about it: 1280Sigor@sysoev.ru * 1290Sigor@sysoev.ru * ld.so.1: ...: fatal: hardware capability unsupported: 0x2000 [ PAUSE ] 1300Sigor@sysoev.ru */ 1310Sigor@sysoev.ru 1320Sigor@sysoev.ru #if (__i386__ || __i386 || __amd64__ || __amd64) 133*2084Salx.manpages@gmail.com #define nxt_cpu_pause() \ 1340Sigor@sysoev.ru __asm__ ("rep; nop") 1350Sigor@sysoev.ru 1360Sigor@sysoev.ru #else 137*2084Salx.manpages@gmail.com #define nxt_cpu_pause() 1380Sigor@sysoev.ru #endif 1390Sigor@sysoev.ru 1400Sigor@sysoev.ru 1410Sigor@sysoev.ru /* elif (NXT_HAVE_MACOSX_ATOMIC) */ 1420Sigor@sysoev.ru 1430Sigor@sysoev.ru /* 1440Sigor@sysoev.ru * The atomic(3) interface has been introduced in MacOS 10.4 (Tiger) and 1450Sigor@sysoev.ru * extended in 10.5 (Leopard). However its support is omitted because: 1460Sigor@sysoev.ru * 1470Sigor@sysoev.ru * 1) the interface is still incomplete: 1480Sigor@sysoev.ru * *) there are OSAtomicAdd32Barrier() and OSAtomicAdd64Barrier() 1490Sigor@sysoev.ru * but no OSAtomicAddLongBarrier(); 1500Sigor@sysoev.ru * *) there is no interface for XCHG operation. 1510Sigor@sysoev.ru * 1520Sigor@sysoev.ru * 2) the interface is tuned for non-SMP systems due to omission of the 1530Sigor@sysoev.ru * LOCK prefix on single CPU system but nowadays MacOSX systems are at 1540Sigor@sysoev.ru * least dual core. Thus these indirect calls just add overhead as 1550Sigor@sysoev.ru * compared with inlined atomic operations which are supported by GCC 1560Sigor@sysoev.ru * and Clang in modern MacOSX systems. 1570Sigor@sysoev.ru */ 1580Sigor@sysoev.ru 1590Sigor@sysoev.ru 1600Sigor@sysoev.ru #elif (NXT_HAVE_XLC_ATOMIC) /* XL C/C++ V8.0 for AIX */ 1610Sigor@sysoev.ru 1620Sigor@sysoev.ru #if (NXT_64BIT) 1630Sigor@sysoev.ru 1640Sigor@sysoev.ru typedef long nxt_atomic_int_t; 1650Sigor@sysoev.ru typedef unsigned long nxt_atomic_uint_t; 1660Sigor@sysoev.ru typedef volatile nxt_atomic_int_t nxt_atomic_t; 1670Sigor@sysoev.ru 1680Sigor@sysoev.ru 1690Sigor@sysoev.ru nxt_inline nxt_bool_t 1700Sigor@sysoev.ru nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp, 1710Sigor@sysoev.ru nxt_atomic_int_t set) 1720Sigor@sysoev.ru { 1730Sigor@sysoev.ru nxt_atomic_int_t old; 1740Sigor@sysoev.ru 1750Sigor@sysoev.ru old = cmp; 1760Sigor@sysoev.ru 1770Sigor@sysoev.ru return __compare_and_swaplp(lock, &old, set); 1780Sigor@sysoev.ru } 1790Sigor@sysoev.ru 1800Sigor@sysoev.ru 181*2084Salx.manpages@gmail.com #define nxt_atomic_xchg(lock, set) \ 1820Sigor@sysoev.ru __fetch_and_swaplp(lock, set) 1830Sigor@sysoev.ru 1840Sigor@sysoev.ru 185*2084Salx.manpages@gmail.com #define nxt_atomic_fetch_add(value, add) \ 1860Sigor@sysoev.ru __fetch_and_addlp(value, add) 1870Sigor@sysoev.ru 1880Sigor@sysoev.ru 1890Sigor@sysoev.ru #else /* NXT_32BIT */ 1900Sigor@sysoev.ru 1910Sigor@sysoev.ru typedef int nxt_atomic_int_t; 1920Sigor@sysoev.ru typedef unsigned int nxt_atomic_uint_t; 1930Sigor@sysoev.ru typedef volatile nxt_atomic_int_t nxt_atomic_t; 1940Sigor@sysoev.ru 1950Sigor@sysoev.ru 1960Sigor@sysoev.ru nxt_inline nxt_bool_t 1970Sigor@sysoev.ru nxt_atomic_cmp_set(nxt_atomic_t *lock, nxt_atomic_int_t cmp, 1980Sigor@sysoev.ru nxt_atomic_int_t set) 1990Sigor@sysoev.ru { 2000Sigor@sysoev.ru nxt_atomic_int_t old; 2010Sigor@sysoev.ru 2020Sigor@sysoev.ru old = cmp; 2030Sigor@sysoev.ru 2040Sigor@sysoev.ru return __compare_and_swap(lock, &old, set); 2050Sigor@sysoev.ru } 2060Sigor@sysoev.ru 2070Sigor@sysoev.ru 208*2084Salx.manpages@gmail.com #define nxt_atomic_xchg(lock, set) \ 2090Sigor@sysoev.ru __fetch_and_swap(lock, set) 2100Sigor@sysoev.ru 2110Sigor@sysoev.ru 212*2084Salx.manpages@gmail.com #define nxt_atomic_fetch_add(value, add) \ 2130Sigor@sysoev.ru __fetch_and_add(value, add) 2140Sigor@sysoev.ru 2150Sigor@sysoev.ru 2160Sigor@sysoev.ru #endif /* NXT_32BIT*/ 2170Sigor@sysoev.ru 2180Sigor@sysoev.ru 2190Sigor@sysoev.ru /* 2200Sigor@sysoev.ru * __lwsync() is a "lwsync" instruction that sets #LoadLoad, #LoadStore, 2210Sigor@sysoev.ru * and #StoreStore barrier. 2220Sigor@sysoev.ru * 2230Sigor@sysoev.ru * __compare_and_swap() is a pair of "ldarx" and "stdcx" instructions. 2240Sigor@sysoev.ru * A "lwsync" does not set #StoreLoad barrier so it can not be used after 2250Sigor@sysoev.ru * this pair since a next load inside critical section can be performed 2260Sigor@sysoev.ru * after the "ldarx" instruction but before the "stdcx" instruction. 2270Sigor@sysoev.ru * However, this next load instruction will load correct data because 2280Sigor@sysoev.ru * otherwise the "ldarx/stdcx" pair will fail and this data will be 2290Sigor@sysoev.ru * discarded. Nevertheless, the "isync" instruction is used for sure. 2300Sigor@sysoev.ru * 2310Sigor@sysoev.ru * A full barrier can be set with __sync(), a "sync" instruction, but there 2320Sigor@sysoev.ru * is also a faster __isync(), an "isync" instruction. This instruction is 2330Sigor@sysoev.ru * not a memory barrier but an instruction barrier. An "isync" instruction 2340Sigor@sysoev.ru * causes the processor to complete execution of all previous instructions 2350Sigor@sysoev.ru * and then to discard instructions (which may have begun execution) following 2360Sigor@sysoev.ru * the "isync". After the "isync" is executed, the following instructions 2370Sigor@sysoev.ru * then begin execution. The "isync" is used to ensure that the loads 2380Sigor@sysoev.ru * following entry into a critical section are not performed (because of 2390Sigor@sysoev.ru * aggressive out-of-order or speculative execution in the processor) until 2400Sigor@sysoev.ru * the lock is granted. 2410Sigor@sysoev.ru */ 2420Sigor@sysoev.ru 2430Sigor@sysoev.ru nxt_inline nxt_bool_t 2440Sigor@sysoev.ru nxt_atomic_try_lock(nxt_atomic_t *lock) 2450Sigor@sysoev.ru { 2460Sigor@sysoev.ru if (nxt_atomic_cmp_set(lock, 0, 1)) { 2470Sigor@sysoev.ru __isync(); 2480Sigor@sysoev.ru return 1; 2490Sigor@sysoev.ru } 2500Sigor@sysoev.ru 2510Sigor@sysoev.ru return 0; 2520Sigor@sysoev.ru } 2530Sigor@sysoev.ru 2540Sigor@sysoev.ru 255*2084Salx.manpages@gmail.com #define nxt_atomic_release(lock) \ 2560Sigor@sysoev.ru do { __lwsync(); *lock = 0; } while (0) 2570Sigor@sysoev.ru 2580Sigor@sysoev.ru 259*2084Salx.manpages@gmail.com #define nxt_cpu_pause() 2600Sigor@sysoev.ru 2610Sigor@sysoev.ru 2620Sigor@sysoev.ru #endif /* NXT_HAVE_XLC_ATOMIC */ 2630Sigor@sysoev.ru 2640Sigor@sysoev.ru 2650Sigor@sysoev.ru #endif /* _NXT_ATOMIC_H_INCLUDED_ */ 266