Bump version + clean-ups.

maleadt · maleadt · commit af1720390bbe · 2025-07-30T13:10:10.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -31,7 +31,7 @@ Preferences = "1"
 Printf = "1"
 Random = "1"
 Reexport = "1"
-SPIRVIntrinsics = "0.4"
+SPIRVIntrinsics = "0.5"
 SPIRV_LLVM_Backend_jll = "20"
 SPIRV_Tools_jll = "2025.1"
 StaticArrays = "1"
diff --git a/lib/intrinsics/Project.toml b/lib/intrinsics/Project.toml
@@ -1,7 +1,7 @@
 name = "SPIRVIntrinsics"
 uuid = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
 authors = ["Tim Besard <tim.besard@gmail.com>"]
-version = "0.4.0"
+version = "0.5.0"
 
 [deps]
 ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
diff --git a/lib/intrinsics/src/atomic.jl b/lib/intrinsics/src/atomic.jl
@@ -1,13 +1,9 @@
 # Atomic Functions
 
-# TODO: support for 64-bit atomics via atom_cmpxchg (from cl_khr_int64_base_atomics)
-
-# "atomic operations on 32-bit signed, unsigned integers and single precision
-#  floating-point to locations in __global or __local memory"
+# provides atomic functions that rely on the OpenCL base atomics, as well as the
+# cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions.
 
 const atomic_integer_types = [UInt32, Int32, UInt64, Int64]
-# TODO: 64-bit atomics with ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS
-# TODO: additional floating-point atomics with ZE_extension_float_atomics
 const atomic_memory_types = [AS.Workgroup, AS.CrossWorkgroup]
 
 
@@ -67,15 +63,23 @@ end
 for as in atomic_memory_types
 @eval begin
 
+# There is native support for atomic_xchg on Float32, but not for Float64,
+# so we always reinterpret for consistency.
 @device_function atomic_xchg!(p::LLVMPtr{Float32,$as}, val::Float32) =
-    @builtin_ccall("atomic_xchg", Float32, (LLVMPtr{Float32,$as}, Float32,), p, val)
+    reinterpret(Float32, atomic_xchg!(reinterpret(LLVMPtr{UInt32,$as}, p),
+                                      reinterpret(UInt32, val)))
+@device_function atomic_xchg!(p::LLVMPtr{Float64,$as}, val::Float64) =
+    reinterpret(Float64, atomic_xchg!(reinterpret(LLVMPtr{UInt64,$as}, p),
+                                      reinterpret(UInt64, val)))
 
-# XXX: why is only xchg supported on floats? isn't it safe for cmpxchg too,
-#      which should only perform bitwise comparisons?
 @device_function atomic_cmpxchg!(p::LLVMPtr{Float32,$as}, cmp::Float32, val::Float32) =
     reinterpret(Float32, atomic_cmpxchg!(reinterpret(LLVMPtr{UInt32,$as}, p),
                                          reinterpret(UInt32, cmp),
                                          reinterpret(UInt32, val)))
+@device_function atomic_cmpxchg!(p::LLVMPtr{Float64,$as}, cmp::Float64, val::Float64) =
+    reinterpret(Float64, atomic_cmpxchg!(reinterpret(LLVMPtr{UInt64,$as}, p),
+                                         reinterpret(UInt64, cmp),
+                                         reinterpret(UInt64, val)))
 
 end
 end
@@ -239,6 +243,11 @@ end
     atomic_arrayset(A, Base._to_linear_index(A, Is...), op, convert(T, val))
 
 # native atomics
+# TODO: support inc/dec
+# TODO: this depends on available extensions
+#       - UInt64: requires cl_khr_int64_base_atomics for add/sub/inc/dec,
+#                 requires cl_khr_int64_extended_atomics for min/max/and/or/xor
+#       - Float64: always should hit the fallback
 for (op,impl) in [(+)      => atomic_add!,
                   (-)      => atomic_sub!,
                   (&)      => atomic_and!,
@@ -252,6 +261,7 @@ for (op,impl) in [(+)      => atomic_add!,
 end
 
 # fallback using compare-and-swap
+# TODO: for 64-bit types, this depends on cl_khr_int64_base_atomics
 function atomic_arrayset(A::AbstractArray{T}, I::Integer, op::Function, val) where {T}
     ptr = pointer(A, I)
     old = Base.unsafe_load(ptr, 1)
diff --git a/src/compiler/execution.jl b/src/compiler/execution.jl
@@ -4,7 +4,7 @@ export @opencl, clfunction
 ## high-level @opencl interface
 
 const MACRO_KWARGS = [:launch]
-const COMPILER_KWARGS = [:kernel, :name, :always_inline, :extensions]
+const COMPILER_KWARGS = [:kernel, :name, :always_inline]
 const LAUNCH_KWARGS = [:global_size, :local_size, :queue]
 
 macro opencl(ex...)