diff --git a/Project.toml b/Project.toml index 2121c75a..c76fb007 100644 --- a/Project.toml +++ b/Project.toml @@ -31,7 +31,7 @@ Preferences = "1" Printf = "1" Random = "1" Reexport = "1" -SPIRVIntrinsics = "0.4" +SPIRVIntrinsics = "0.5" SPIRV_LLVM_Backend_jll = "20" SPIRV_Tools_jll = "2025.1" StaticArrays = "1" diff --git a/lib/intrinsics/Project.toml b/lib/intrinsics/Project.toml index daa9f09b..8142aabc 100644 --- a/lib/intrinsics/Project.toml +++ b/lib/intrinsics/Project.toml @@ -1,7 +1,7 @@ name = "SPIRVIntrinsics" uuid = "71d1d633-e7e8-4a92-83a1-de8814b09ba8" authors = ["Tim Besard "] -version = "0.4.0" +version = "0.5.0" [deps] ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04" diff --git a/lib/intrinsics/src/atomic.jl b/lib/intrinsics/src/atomic.jl index a1c6007d..9bbbdbe6 100644 --- a/lib/intrinsics/src/atomic.jl +++ b/lib/intrinsics/src/atomic.jl @@ -1,13 +1,9 @@ # Atomic Functions -# TODO: support for 64-bit atomics via atom_cmpxchg (from cl_khr_int64_base_atomics) +# provides atomic functions that rely on the OpenCL base atomics, as well as the +# cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions. -# "atomic operations on 32-bit signed, unsigned integers and single precision -# floating-point to locations in __global or __local memory" - -const atomic_integer_types = [UInt32, Int32] -# TODO: 64-bit atomics with ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS -# TODO: additional floating-point atomics with ZE_extension_float_atomics +const atomic_integer_types = [UInt32, Int32, UInt64, Int64] const atomic_memory_types = [AS.Workgroup, AS.CrossWorkgroup] @@ -67,15 +63,23 @@ end for as in atomic_memory_types @eval begin +# There is native support for atomic_xchg on Float32, but not for Float64, +# so we always reinterpret for consistency. @device_function atomic_xchg!(p::LLVMPtr{Float32,$as}, val::Float32) = - @builtin_ccall("atomic_xchg", Float32, (LLVMPtr{Float32,$as}, Float32,), p, val) + reinterpret(Float32, atomic_xchg!(reinterpret(LLVMPtr{UInt32,$as}, p), + reinterpret(UInt32, val))) +@device_function atomic_xchg!(p::LLVMPtr{Float64,$as}, val::Float64) = + reinterpret(Float64, atomic_xchg!(reinterpret(LLVMPtr{UInt64,$as}, p), + reinterpret(UInt64, val))) -# XXX: why is only xchg supported on floats? isn't it safe for cmpxchg too, -# which should only perform bitwise comparisons? @device_function atomic_cmpxchg!(p::LLVMPtr{Float32,$as}, cmp::Float32, val::Float32) = reinterpret(Float32, atomic_cmpxchg!(reinterpret(LLVMPtr{UInt32,$as}, p), reinterpret(UInt32, cmp), reinterpret(UInt32, val))) +@device_function atomic_cmpxchg!(p::LLVMPtr{Float64,$as}, cmp::Float64, val::Float64) = + reinterpret(Float64, atomic_cmpxchg!(reinterpret(LLVMPtr{UInt64,$as}, p), + reinterpret(UInt64, cmp), + reinterpret(UInt64, val))) end end @@ -239,6 +243,11 @@ end atomic_arrayset(A, Base._to_linear_index(A, Is...), op, convert(T, val)) # native atomics +# TODO: support inc/dec +# TODO: this depends on available extensions +# - UInt64: requires cl_khr_int64_base_atomics for add/sub/inc/dec, +# requires cl_khr_int64_extended_atomics for min/max/and/or/xor +# - Float64: always should hit the fallback for (op,impl) in [(+) => atomic_add!, (-) => atomic_sub!, (&) => atomic_and!, @@ -247,11 +256,12 @@ for (op,impl) in [(+) => atomic_add!, Base.max => atomic_max!, Base.min => atomic_min!] @eval @inline atomic_arrayset(A::AbstractArray{T}, I::Integer, ::typeof($op), - val::T) where {T <: Union{Int32,UInt32}} = + val::T) where {T <: Union{atomic_integer_types...}} = $impl(pointer(A, I), val) end # fallback using compare-and-swap +# TODO: for 64-bit types, this depends on cl_khr_int64_base_atomics function atomic_arrayset(A::AbstractArray{T}, I::Integer, op::Function, val) where {T} ptr = pointer(A, I) old = Base.unsafe_load(ptr, 1) diff --git a/test/atomics.jl b/test/atomics.jl new file mode 100644 index 00000000..068e8cd7 --- /dev/null +++ b/test/atomics.jl @@ -0,0 +1,16 @@ +@testset "atomics" begin + +function atomic_count(counter) + OpenCL.@atomic counter[] += 1 + return +end + +@testset "atomic_add! ($T)" for T in [Int32, UInt32, Int64, UInt64] + if sizeof(T) == 4 || "cl_khr_int64_extended_atomics" in cl.device().extensions + a = OpenCL.zeros(T) + @opencl global_size=1000 atomic_count(a) + @test OpenCL.@allowscalar a[] == 1000 + end +end + +end diff --git a/test/setup.jl b/test/setup.jl index e826d0d5..066424c6 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -90,7 +90,7 @@ function runtests(f, name, platform_filter) end # some tests require native execution capabilities - requires_il = name in ["execution", "intrinsics", "kernelabstractions"] || + requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] || startswith(name, "gpuarrays/") ex = quote