11# Atomic Functions
22
3- # TODO : support for 64-bit atomics via atom_cmpxchg (from cl_khr_int64_base_atomics)
4-
5- # "atomic operations on 32-bit signed, unsigned integers and single precision
6- # floating-point to locations in __global or __local memory"
3+ # provides atomic functions that rely on the OpenCL base atomics, as well as the
4+ # cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions.
75
86const atomic_integer_types = [UInt32, Int32, UInt64, Int64]
9- # TODO : 64-bit atomics with ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS
10- # TODO : additional floating-point atomics with ZE_extension_float_atomics
117const atomic_memory_types = [AS. Workgroup, AS. CrossWorkgroup]
128
139
6763for as in atomic_memory_types
6864@eval begin
6965
66+ # There is native support for atomic_xchg on Float32, but not for Float64,
67+ # so we always reinterpret for consistency.
7068@device_function atomic_xchg! (p:: LLVMPtr{Float32,$as} , val:: Float32 ) =
71- @builtin_ccall (" atomic_xchg" , Float32, (LLVMPtr{Float32,$ as}, Float32,), p, val)
69+ reinterpret (Float32, atomic_xchg! (reinterpret (LLVMPtr{UInt32,$ as}, p),
70+ reinterpret (UInt32, val)))
71+ @device_function atomic_xchg! (p:: LLVMPtr{Float64,$as} , val:: Float64 ) =
72+ reinterpret (Float64, atomic_xchg! (reinterpret (LLVMPtr{UInt64,$ as}, p),
73+ reinterpret (UInt64, val)))
7274
73- # XXX : why is only xchg supported on floats? isn't it safe for cmpxchg too,
74- # which should only perform bitwise comparisons?
7575@device_function atomic_cmpxchg! (p:: LLVMPtr{Float32,$as} , cmp:: Float32 , val:: Float32 ) =
7676 reinterpret (Float32, atomic_cmpxchg! (reinterpret (LLVMPtr{UInt32,$ as}, p),
7777 reinterpret (UInt32, cmp),
7878 reinterpret (UInt32, val)))
79+ @device_function atomic_cmpxchg! (p:: LLVMPtr{Float64,$as} , cmp:: Float64 , val:: Float64 ) =
80+ reinterpret (Float64, atomic_cmpxchg! (reinterpret (LLVMPtr{UInt64,$ as}, p),
81+ reinterpret (UInt64, cmp),
82+ reinterpret (UInt64, val)))
7983
8084end
8185end
239243 atomic_arrayset (A, Base. _to_linear_index (A, Is... ), op, convert (T, val))
240244
241245# native atomics
246+ # TODO : support inc/dec
247+ # TODO : this depends on available extensions
248+ # - UInt64: requires cl_khr_int64_base_atomics for add/sub/inc/dec,
249+ # requires cl_khr_int64_extended_atomics for min/max/and/or/xor
250+ # - Float64: always should hit the fallback
242251for (op,impl) in [(+ ) => atomic_add!,
243252 (- ) => atomic_sub!,
244253 (& ) => atomic_and!,
@@ -252,6 +261,7 @@ for (op,impl) in [(+) => atomic_add!,
252261end
253262
254263# fallback using compare-and-swap
264+ # TODO : for 64-bit types, this depends on cl_khr_int64_base_atomics
255265function atomic_arrayset (A:: AbstractArray{T} , I:: Integer , op:: Function , val) where {T}
256266 ptr = pointer (A, I)
257267 old = Base. unsafe_load (ptr, 1 )
0 commit comments