diff --git a/src/host/broadcast.jl b/src/host/broadcast.jl index 5ae6b9a8..fa73a311 100644 --- a/src/host/broadcast.jl +++ b/src/host/broadcast.jl @@ -61,7 +61,7 @@ end axes(dest) == axes(bc) || Broadcast.throwdm(axes(dest), axes(bc)) bc′ = Broadcast.preprocess(dest, bc) gpu_call(dest, bc′; name="broadcast") do ctx, dest, bc′ - let I = CartesianIndex(@cartesianidx(dest)) + let I = @cartesianidx(dest) @inbounds dest[I] = bc′[I] end return @@ -81,12 +81,42 @@ end allequal(x) = true allequal(x, y, z...) = x == y && allequal(y, z...) -function Base.map!(f, y::GPUDestArray, xs::AbstractArray...) - @assert allequal(size.((y, xs...))...) - return y .= f.(xs...) +function Base.map!(f, dest::GPUDestArray, xs::AbstractArray...) + indices = LinearIndices.((dest, xs...)) + common_length = minimum(length.(indices)) + + # custom broadcast, ignoring the container size mismatches + # (avoids the reshape + view that our mapreduce impl has to do) + bc = Broadcast.instantiate(Broadcast.broadcasted(f, xs...)) + bc′ = Broadcast.preprocess(dest, bc) + gpu_call(dest, bc′; name="map!", total_threads=common_length) do ctx, dest, bc′ + i = linear_index(ctx) + if i <= common_length + I = CartesianIndices(axes(bc′))[i] + @inbounds dest[i] = bc′[I] + end + return + end + + return dest end -function Base.map(f, y::GPUDestArray, xs::AbstractArray...) - @assert allequal(size.((y, xs...))...) - return f.(y, xs...) +function Base.map(f, x::GPUDestArray, xs::AbstractArray...) + # if argument sizes match, their shape needs to be preserved + xs = (x, xs...) + if allequal(size.(xs)...) + return f.(xs...) + end + + # if not, treat them as iterators + indices = LinearIndices.(xs) + common_length = minimum(length.(indices)) + + # construct a broadcast to figure out the destination container + bc = Broadcast.instantiate(Broadcast.broadcasted(f, xs...)) + ElType = Broadcast.combine_eltypes(bc.f, bc.args) + isbitstype(ElType) || error("Cannot map function returning non-isbits $ElType.") + dest = similar(bc, ElType, common_length) + + return map!(f, dest, xs...) end diff --git a/src/host/mapreduce.jl b/src/host/mapreduce.jl index e8def4fd..d4634718 100644 --- a/src/host/mapreduce.jl +++ b/src/host/mapreduce.jl @@ -1,9 +1,14 @@ # map-reduce +const AbstractArrayOrBroadcasted = Union{AbstractArray,Broadcast.Broadcasted} + # GPUArrays' mapreduce methods build on `Base.mapreducedim!`, but with an additional # argument `init` value to avoid eager initialization of `R` (if set to something). -mapreducedim!(f, op, R::AbstractGPUArray, As::AbstractArray...; init=nothing) = error("Not implemented") # COV_EXCL_LINE +mapreducedim!(f, op, R::AbstractGPUArray, A::AbstractArrayOrBroadcasted; + init=nothing) = error("Not implemented") # COV_EXCL_LINE +# resolve ambiguities Base.mapreducedim!(f, op, R::AbstractGPUArray, A::AbstractArray) = mapreducedim!(f, op, R, A) +Base.mapreducedim!(f, op, R::AbstractGPUArray, A::Broadcast.Broadcasted) = mapreducedim!(f, op, R, A) neutral_element(op, T) = error("""GPUArrays.jl needs to know the neutral element for your operator `$op`. @@ -18,11 +23,30 @@ neutral_element(::typeof(Base.mul_prod), T) = one(T) neutral_element(::typeof(Base.min), T) = typemax(T) neutral_element(::typeof(Base.max), T) = typemin(T) -function Base.mapreduce(f, op, As::AbstractGPUArray...; dims=:, init=nothing) +# resolve ambiguities +Base.mapreduce(f, op, A::AbstractGPUArray, As::AbstractArrayOrBroadcasted...; + dims=:, init=nothing) = _mapreduce(f, op, A, As...; dims=dims, init=init) +Base.mapreduce(f, op, A::Broadcast.Broadcasted{<:AbstractGPUArrayStyle}, As::AbstractArrayOrBroadcasted...; + dims=:, init=nothing) = _mapreduce(f, op, A, As...; dims=dims, init=init) + +function _mapreduce(f, op, As...; dims, init) + # mapreduce should apply `f` like `map` does, consuming elements like iterators. + bc = if allequal(size.(As)...) + Broadcast.instantiate(Broadcast.broadcasted(f, As...)) + else + # TODO: can we avoid the reshape + view? + indices = LinearIndices.(As) + common_length = minimum(length.(indices)) + Bs = map(As) do A + view(reshape(A, length(A)), 1:common_length) + end + Broadcast.instantiate(Broadcast.broadcasted(f, Bs...)) + end + # figure out the destination container type by looking at the initializer element, # or by relying on inference to reason through the map and reduce functions. if init === nothing - ET = Base.promote_op(f, map(eltype, As)...) + ET = Broadcast.combine_eltypes(bc.f, bc.args) ET = Base.promote_op(op, ET, ET) (ET === Union{} || ET === Any) && error("mapreduce cannot figure the output element type, please pass an explicit init value") @@ -32,14 +56,10 @@ function Base.mapreduce(f, op, As::AbstractGPUArray...; dims=:, init=nothing) ET = typeof(init) end - # TODO: Broadcast-semantics after JuliaLang-julia#31020 - A = first(As) - all(B -> size(A) == size(B), As) || throw(DimensionMismatch("dimensions of containers must be identical")) - - sz = size(A) - red = ntuple(i->(dims==Colon() || i in dims) ? 1 : sz[i], ndims(A)) - R = similar(A, ET, red) - mapreducedim!(f, op, R, As...; init=init) + sz = size(bc) + red = ntuple(i->(dims==Colon() || i in dims) ? 1 : sz[i], length(sz)) + R = similar(bc, ET, red) + mapreducedim!(identity, op, R, bc; init=init) if dims==Colon() @allowscalar R[] @@ -57,7 +77,7 @@ Base.count(pred::Function, A::AbstractGPUArray) = mapreduce(pred, +, A; init = 0 Base.:(==)(A::AbstractGPUArray, B::AbstractGPUArray) = Bool(mapreduce(==, &, A, B)) -# avoid calling into `initarray!`` +# avoid calling into `initarray!` Base.sum!(R::AbstractGPUArray, A::AbstractGPUArray) = Base.reducedim!(Base.add_sum, R, A) Base.prod!(R::AbstractGPUArray, A::AbstractGPUArray) = Base.reducedim!(Base.mul_prod, R, A) Base.maximum!(R::AbstractGPUArray, A::AbstractGPUArray) = Base.reducedim!(max, R, A) diff --git a/src/reference.jl b/src/reference.jl index 832f7c55..4d2afddc 100644 --- a/src/reference.jl +++ b/src/reference.jl @@ -298,11 +298,12 @@ Adapt.adapt_storage(::Adaptor, x::JLArray{T,N}) where {T,N} = GPUArrays.unsafe_reinterpret(::Type{T}, A::JLArray, size::Tuple) where T = reshape(reinterpret(T, A.data), size) -function GPUArrays.mapreducedim!(f, op, R::JLArray, As::AbstractArray...; init=nothing) +function GPUArrays.mapreducedim!(f, op, R::JLArray, A::Union{AbstractArray,Broadcast.Broadcasted}; + init=nothing) if init !== nothing fill!(R, init) end - @allowscalar Base.reducedim!(op, R.data, map(f, As...)) + @allowscalar Base.reducedim!(op, R.data, map(f, A)) end end diff --git a/test/testsuite/broadcasting.jl b/test/testsuite/broadcasting.jl index bd54fcac..419d1c76 100644 --- a/test/testsuite/broadcasting.jl +++ b/test/testsuite/broadcasting.jl @@ -117,6 +117,30 @@ function broadcasting(AT) @test compare((A, B) -> A .* B, AT, rand(ET, 40, 40), rand(ET, 40, 40)) @test compare((A, B) -> A .* B .+ ET(10), AT, rand(ET, 40, 40), rand(ET, 40, 40)) end + + @testset "map! $ET" begin + @test compare(AT, rand(2,2), rand(2,2)) do x,y + map!(+, x, y) + end + @test compare(AT, rand(2), rand(2,2)) do x,y + map!(+, x, y) + end + @test compare(AT, rand(2,2), rand(2)) do x,y + map!(+, x, y) + end + end + + @testset "map $ET" begin + @test compare(AT, rand(2,2), rand(2,2)) do x,y + map(+, x, y) + end + @test compare(AT, rand(2), rand(2,2)) do x,y + map(+, x, y) + end + @test compare(AT, rand(2,2), rand(2)) do x,y + map(+, x, y) + end + end end @testset "0D" begin diff --git a/test/testsuite/mapreduce.jl b/test/testsuite/mapreduce.jl index 1d635204..47615e5c 100644 --- a/test/testsuite/mapreduce.jl +++ b/test/testsuite/mapreduce.jl @@ -158,6 +158,15 @@ function test_mapreduce(AT) ET <: Complex || @test compare(minimum, AT,rand(range, dims)) end end + @testset "broadcasting behavior" begin + @test compare((x,y)->mapreduce(+, +, x, y), AT, + rand(range, 1), rand(range, 2, 2)) + @test compare(AT, rand(range, 1), rand(range, 2, 2)) do x, y + bc = Broadcast.instantiate(Broadcast.broadcasted(*, x, y)) + reduce(+, bc) + end + + end end end @testset "any all ==" begin