Skip to content

Commit f17f4c0

Browse files
committed
Support mapreduce over dimensions with SkipMissing
Allows calling mapreduce and specialized functinos with the dims argument on SkipMissing objects. The implementation is copied on the generic methods, but missing values need to be handled directly inside functions for efficiency and because mapreduce_impl returns a Some object which needs special handling.
1 parent 875607b commit f17f4c0

File tree

4 files changed

+382
-12
lines changed

4 files changed

+382
-12
lines changed

base/missing.jl

Lines changed: 175 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ float(A::AbstractArray{Missing}) = A
146146
skipmissing(itr)
147147
148148
Return an iterator over the elements in `itr` skipping [`missing`](@ref) values.
149+
In addition to supporting any function taking iterators, the resulting object
150+
implements reductions over dimensions (i.e. the `dims` argument to
151+
[`mapreduce`](@ref), [`reduce`](@ref) and special functions like [`sum`](@ref)).
149152
150153
Use [`collect`](@ref) to obtain an `Array` containing the non-`missing` values in
151154
`itr`. Note that even if `itr` is a multidimensional array, the result will always
@@ -154,9 +157,6 @@ of the input.
154157
155158
# Examples
156159
```jldoctest
157-
julia> sum(skipmissing([1, missing, 2]))
158-
3
159-
160160
julia> collect(skipmissing([1, missing, 2]))
161161
2-element Array{Int64,1}:
162162
1
@@ -166,6 +166,31 @@ julia> collect(skipmissing([1 missing; 2 missing]))
166166
2-element Array{Int64,1}:
167167
1
168168
2
169+
170+
julia> sum(skipmissing([1, missing, 2]))
171+
3
172+
173+
julia> B = [1 missing; 3 4]
174+
2×2 Array{Union{Missing, Int64},2}:
175+
1 missing
176+
3 4
177+
178+
julia> sum(skipmissing(B), dims=1)
179+
1×2 Array{Int64,2}:
180+
4 4
181+
182+
julia> sum(skipmissing(B), dims=2)
183+
2×1 Array{Int64,2}:
184+
1
185+
7
186+
187+
julia> reduce(*, skipmissing(B), dims=1)
188+
1×2 Array{Int64,2}:
189+
3 4
190+
191+
julia> mapreduce(cos, +, skipmissing(B), dims=1)
192+
1×2 Array{Float64,2}:
193+
-0.44969 -0.653644
169194
```
170195
"""
171196
skipmissing(itr) = SkipMissing(itr)
@@ -282,6 +307,153 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
282307
end
283308
end
284309

310+
# mapreducedim implementation
311+
312+
mapreduce(f, op, itr::SkipMissing{<:AbstractArray}; dims=:, kw...) =
313+
_mapreduce_dim(f, op, kw.data, itr, dims)
314+
315+
_mapreduce_dim(f, op, nt::NamedTuple{(:init,)}, itr::SkipMissing{<:AbstractArray}, ::Colon) =
316+
mapfoldl(f, op, itr; nt...)
317+
318+
_mapreduce_dim(f, op, ::NamedTuple{()}, itr::SkipMissing{<:AbstractArray}, ::Colon) =
319+
_mapreduce(f, op, IndexStyle(itr.x), itr)
320+
321+
_mapreduce_dim(f, op, nt::NamedTuple{(:init,)}, itr::SkipMissing{<:AbstractArray}, dims) =
322+
mapreducedim!(f, op, reducedim_initarray(itr, dims, nt.init), itr)
323+
324+
_mapreduce_dim(f, op, ::NamedTuple{()}, itr::SkipMissing{<:AbstractArray}, dims) =
325+
mapreducedim!(f, op, reducedim_init(f, op, itr, dims), itr)
326+
327+
for Op in (:(typeof(max)), :(typeof(min)))
328+
@eval _mapreduce_dim(f, op::$(Op), ::NamedTuple{()}, itr::SkipMissing{<:AbstractArray}, dims) =
329+
mapreducedim!(f, op, reducedim_init(f, op, itr, dims), itr)
330+
@eval _mapreduce_dim(f, op::$(Op), ::NamedTuple{()}, itr::SkipMissing{<:AbstractArray}, ::Colon) =
331+
_mapreduce(f, op, IndexStyle(itr.x), itr)
332+
end
333+
334+
reducedim_initarray(itr::SkipMissing{<:AbstractArray}, region, init, ::Type{R}) where {R} =
335+
reducedim_initarray(itr.x, region, init, R)
336+
reducedim_initarray(itr::SkipMissing{<:AbstractArray}, region, init::T) where {T} =
337+
reducedim_initarray(itr.x, region, init, T)
338+
339+
function reducedim_initarray0(itr::SkipMissing{<:AbstractArray}, region, f, ops)
340+
A = itr.x
341+
T = eltype(itr)
342+
ri = reduced_indices0(A, region)
343+
if isempty(itr)
344+
if prod(length, reduced_indices(A, region)) != 0
345+
reducedim_initarray0_empty(A, region, f, ops) # ops over empty slice of A
346+
else
347+
R = f == identity ? T : Core.Compiler.return_type(f, (T,))
348+
similar(A, R, ri)
349+
end
350+
else
351+
R = f == identity ? T : typeof(f(first(itr)))
352+
si = similar(A, R, ri)
353+
mapfirst!(f, si, itr)
354+
end
355+
si
356+
end
357+
358+
function mapfirst!(f, R::AbstractArray, itr::SkipMissing{<:AbstractArray})
359+
A = itr.x
360+
lsiz = check_reducedims(R,A)
361+
indsAt, indsRt = safe_tail(axes(A)), safe_tail(axes(R)) # handle d=1 manually
362+
keep, Idefault = Broadcast.shapeindexer(indsRt)
363+
if reducedim1(R, A)
364+
# keep the accumulator as a local variable when reducing along the first dimension
365+
i1 = first(indices1(R))
366+
@inbounds for IA in CartesianIndices(indsAt)
367+
IR = Broadcast.newindex(IA, keep, Idefault)
368+
filled = false
369+
for i in axes(A, 1)
370+
x = A[i, IA]
371+
if x !== missing
372+
R[i1,IR] = f(x)
373+
filled = true
374+
break
375+
end
376+
end
377+
if !filled
378+
throw(ArgumentError("cannot reduce over slices with only missing values"))
379+
end
380+
end
381+
else
382+
filled = fill!(similar(R, Bool), false)
383+
allfilled = false
384+
@inbounds for IA in CartesianIndices(indsAt)
385+
IR = Broadcast.newindex(IA, keep, Idefault)
386+
for i in axes(A, 1)
387+
filled[i,IR] && continue
388+
x = A[i, IA]
389+
if x !== missing
390+
R[i,IR] = f(x)
391+
filled[i,IR] = true
392+
end
393+
end
394+
(allfilled = all(filled)) && break
395+
end
396+
if !allfilled
397+
throw(ArgumentError("cannot reduce over slices with only missing values"))
398+
end
399+
end
400+
end
401+
402+
function _mapreducedim!(f, op, R::AbstractArray, itr::SkipMissing{<:AbstractArray})
403+
A = itr.x
404+
lsiz = check_reducedims(R,A)
405+
isempty(A) && return R
406+
407+
if has_fast_linear_indexing(A) && lsiz > 16
408+
# use mapreduce_impl, which is probably better tuned to achieve higher performance
409+
nslices = div(_length(A), lsiz)
410+
ibase = first(LinearIndices(A))-1
411+
for i = 1:nslices
412+
x = mapreduce_impl(f, op, itr, ibase+1, ibase+lsiz)
413+
if x !== nothing
414+
@inbounds R[i] = op(R[i], something(x))
415+
end
416+
ibase += lsiz
417+
end
418+
return R
419+
end
420+
indsAt, indsRt = safe_tail(axes(A)), safe_tail(axes(R)) # handle d=1 manually
421+
keep, Idefault = Broadcast.shapeindexer(indsRt)
422+
if reducedim1(R, A)
423+
# keep the accumulator as a local variable when reducing along the first dimension
424+
i1 = first(indices1(R))
425+
@inbounds for IA in CartesianIndices(indsAt)
426+
IR = Broadcast.newindex(IA, keep, Idefault)
427+
r = R[i1,IR]
428+
@simd for i in axes(A, 1)
429+
x = A[i, IA]
430+
if x !== missing
431+
r = op(r, f(x))
432+
end
433+
end
434+
435+
R[i1,IR] = r
436+
end
437+
else
438+
@inbounds for IA in CartesianIndices(indsAt)
439+
IR = Broadcast.newindex(IA, keep, Idefault)
440+
@simd for i in axes(A, 1)
441+
x = A[i, IA]
442+
if x !== missing
443+
R[i,IR] = op(R[i,IR], f(x))
444+
end
445+
end
446+
end
447+
end
448+
return R
449+
end
450+
451+
mapreducedim!(f, op, R::AbstractArray, A::SkipMissing{<:AbstractArray}) =
452+
(_mapreducedim!(f, op, R, A); R)
453+
454+
reducedim!(op, R::AbstractArray{RT}, A::SkipMissing{<:AbstractArray}) where {RT} =
455+
mapreducedim!(identity, op, R, A)
456+
285457
"""
286458
coalesce(x, y...)
287459

0 commit comments

Comments
 (0)