From cd83dfe19887f732a7c5971dd141b409459386d0 Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Sun, 9 Mar 2025 02:29:50 +0300 Subject: [PATCH 01/23] include DataType in CategoricalEltypes (#876) --- src/Lookups/lookup_arrays.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Lookups/lookup_arrays.jl b/src/Lookups/lookup_arrays.jl index 3be595a0c..c72a600be 100644 --- a/src/Lookups/lookup_arrays.jl +++ b/src/Lookups/lookup_arrays.jl @@ -468,7 +468,7 @@ abstract type AbstractCategorical{T,O} <: Aligned{T,O} end order(lookup::AbstractCategorical) = lookup.order metadata(lookup::AbstractCategorical) = lookup.metadata -const CategoricalEltypes = Union{AbstractChar,Symbol,AbstractString} +const CategoricalEltypes = Union{AbstractChar,Symbol,AbstractString,DataType} function Adapt.adapt_structure(to, l::AbstractCategorical) rebuild(l; data=Adapt.adapt(to, parent(l)), metadata=NoMetadata()) From 685efd7f8ad3678cb0e30b9c1926216b8ad77d4d Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 9 Mar 2025 00:30:50 +0100 Subject: [PATCH 02/23] Breaking: `DimVector` of `NamedTuple` is a `NamedTuple` `DimTable` (#839) * DimVector of NamedTuple is a NamedTuple table * bugfix * remove show * fix ambiguity --- src/tables.jl | 137 ++++++++++++++++++++++++++++++------------------- src/utils.jl | 8 ++- test/tables.jl | 15 +++++- 3 files changed, 105 insertions(+), 55 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index 9773a8495..d158b2e1c 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -5,6 +5,9 @@ Abstract supertype for dim tables """ abstract type AbstractDimTable <: Tables.AbstractColumns end +struct Columns end +struct Rows end + # Tables.jl interface for AbstractDimStack and AbstractDimArray DimTableSources = Union{AbstractDimStack,AbstractDimArray} @@ -12,12 +15,8 @@ DimTableSources = Union{AbstractDimStack,AbstractDimArray} Tables.istable(::Type{<:DimTableSources}) = true Tables.columnaccess(::Type{<:DimTableSources}) = true Tables.columns(x::DimTableSources) = DimTable(x) - -Tables.columnnames(A::AbstractDimArray) = _colnames(DimStack(A)) -Tables.columnnames(s::AbstractDimStack) = _colnames(s) - -Tables.schema(A::AbstractDimArray) = Tables.schema(DimStack(A)) -Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) +Tables.columnnames(x::DimTableSources) = _colnames(x) +Tables.schema(x::DimTableSources) = Tables.schema(DimTable(x)) @inline Tables.getcolumn(x::DimTableSources, i::Int) = Tables.getcolumn(DimTable(x), i) @inline Tables.getcolumn(x::DimTableSources, key::Symbol) = @@ -27,11 +26,14 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) @inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) = Tables.getcolumn(t, dimnum(t, dim)) -function _colnames(s::AbstractDimStack) - dimkeys = map(name, dims(s)) - # The data is always the last column/s - (dimkeys..., keys(s)...) +_colnames(s::AbstractDimStack) = (map(name, dims(s))..., keys(s)...) +function _colnames(A::AbstractDimArray) + n = Symbol(name(A)) == Symbol("") ? :value : Symbol(name(A)) + (map(name, dims(A))..., n) end +_colnames(A::AbstractDimVector{T}) where T<:NamedTuple = + (map(name, dims(A))..., _colnames(T)...) +_colnames(::Type{<:NamedTuple{Keys}}) where Keys = Keys # DimTable @@ -87,18 +89,20 @@ julia> a = DimArray(ones(16, 16, 3), (X, Y, Dim{:band})) 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 -julia> +julia> ``` """ -struct DimTable <: AbstractDimTable +struct DimTable{Mode} <: AbstractDimTable parent::Union{AbstractDimArray,AbstractDimStack} colnames::Vector{Symbol} dimcolumns::Vector{AbstractVector} - dimarraycolumns::Vector{AbstractVector} + dimarraycolumns::Vector end -function DimTable(s::AbstractDimStack; mergedims=nothing) +function DimTable(s::AbstractDimStack; + mergedims=nothing, +) s = isnothing(mergedims) ? s : DD.mergedims(s, mergedims) dimcolumns = collect(_dimcolumns(s)) dimarraycolumns = if hassamedims(s) @@ -107,40 +111,52 @@ function DimTable(s::AbstractDimStack; mergedims=nothing) map(A -> vec(DimExtensionArray(A, dims(s))), layers(s)) end |> collect keys = collect(_colnames(s)) - return DimTable(s, keys, dimcolumns, dimarraycolumns) + return DimTable{Columns}(s, keys, dimcolumns, dimarraycolumns) end -function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing) +function DimTable(As::Vararg{AbstractDimArray}; + layernames=nothing, + mergedims=nothing, +) # Check that dims are compatible - comparedims(xs...) - + comparedims(As...) # Construct Layer Names - layernames = isnothing(layernames) ? [Symbol("layer_$i") for i in eachindex(xs)] : layernames - + layernames = isnothing(layernames) ? uniquekeys(As) : layernames # Construct dimension and array columns with DimExtensionArray - xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs) - dims_ = dims(first(xs)) + As = isnothing(mergedims) ? As : map(x -> DD.mergedims(x, mergedims), As) + dims_ = dims(first(As)) dimcolumns = collect(_dimcolumns(dims_)) dimnames = collect(map(name, dims_)) - dimarraycolumns = collect(map(vec ∘ parent, xs)) + dimarraycolumns = collect(map(vec ∘ parent, As)) colnames = vcat(dimnames, layernames) # Return DimTable - return DimTable(first(xs), colnames, dimcolumns, dimarraycolumns) + return DimTable{Columns}(first(As), colnames, dimcolumns, dimarraycolumns) end -function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) - if !isnothing(layersfrom) && any(hasdim(x, layersfrom)) - d = dims(x, layersfrom) - nlayers = size(x, d) - layers = [view(x, rebuild(d, i)) for i in 1:nlayers] +function DimTable(A::AbstractDimArray; + layersfrom=nothing, + mergedims=nothing, +) + if !isnothing(layersfrom) && any(hasdim(A, layersfrom)) + d = dims(A, layersfrom) + nlayers = size(A, d) + layers = [view(A, rebuild(d, i)) for i in 1:nlayers] layernames = if iscategorical(d) Symbol.((name(d),), '_', lookup(d)) else Symbol.(("$(name(d))_$i" for i in 1:nlayers)) end - return DimTable(layers..., layernames=layernames, mergedims=mergedims) + return DimTable(layers...; layernames, mergedims) else - s = name(x) == NoName() ? DimStack((;value=x)) : DimStack(x) - return DimTable(s, mergedims=mergedims) + A = isnothing(mergedims) ? A : DD.mergedims(A, mergedims) + dimcolumns = collect(_dimcolumns(A)) + colnames = collect(_colnames(A)) + if (ndims(A) == 1) && (eltype(A) <: NamedTuple) + dimarrayrows = parent(A) + return DimTable{Rows}(A, colnames, dimcolumns, dimarrayrows) + else + dimarraycolumns = [vec(parent(A))] + return DimTable{Columns}(A, colnames, dimcolumns, dimarraycolumns) + end end end @@ -155,8 +171,6 @@ function _dimcolumn(x, d::Dimension) end end - - dimcolumns(t::DimTable) = getfield(t, :dimcolumns) dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns) colnames(t::DimTable) = Tuple(getfield(t, :colnames)) @@ -174,12 +188,26 @@ Tables.columnaccess(::Type{<:DimTable}) = true Tables.columns(t::DimTable) = t Tables.columnnames(c::DimTable) = colnames(c) -function Tables.schema(t::DimTable) - types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...]) +function Tables.schema(t::DimTable) + types = vcat([map(eltype, dimcolumns(t))...], _dimarraycolumn_eltypes(t)) Tables.Schema(colnames(t), types) end -@inline function Tables.getcolumn(t::DimTable, i::Int) +_dimarraycolumn_eltypes(t::DimTable{Columns}) = [map(eltype, dimarraycolumns(t))...] +_dimarraycolumn_eltypes(t::DimTable{Rows}) = _eltypes(eltype(dimarraycolumns(t))) +_eltypes(::Type{T}) where T<:NamedTuple = collect(T.types) + +@inline function Tables.getcolumn(t::DimTable{Rows}, i::Int) + nkeys = length(colnames(t)) + if i > length(dims(t)) + map(nt -> nt[i], dimarraycolumns(t)) + elseif i > 0 && i < nkeys + dimcolumns(t)[i] + else + throw(ArgumentError("There is no table column $i")) + end +end +@inline function Tables.getcolumn(t::DimTable{Columns}, i::Int) nkeys = length(colnames(t)) if i > length(dims(t)) dimarraycolumns(t)[i - length(dims(t))] @@ -189,12 +217,19 @@ end throw(ArgumentError("There is no table column $i")) end end - -@inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType) +@inline function Tables.getcolumn(t::DimTable, dim::Union{Dimension,Type{<:Dimension}}) dimcolumns(t)[dimnum(t, dim)] end - -@inline function Tables.getcolumn(t::DimTable, key::Symbol) +@inline function Tables.getcolumn(t::DimTable{Rows}, key::Symbol) + key in colnames(t) || throw(ArgumentError("There is no table column $key")) + if hasdim(parent(t), key) + dimcolumns(t)[dimnum(t, key)] + else + # Function barrier + _col_from_rows(dimarraycolumns(t), key) + end +end +@inline function Tables.getcolumn(t::DimTable{Columns}, key::Symbol) keys = colnames(t) i = findfirst(==(key), keys) if isnothing(i) @@ -203,22 +238,20 @@ end return Tables.getcolumn(t, i) end end - @inline function Tables.getcolumn(t::DimTable, ::Type{T}, i::Int, key::Symbol) where T Tables.getcolumn(t, key) end -# TableTraits.jl interface - +_col_from_rows(rows, key) = map(row -> row[key], rows) -function IteratorInterfaceExtensions.getiterator(x::DimTableSources) - return Tables.datavaluerows(Tables.dictcolumntable(x)) -end -IteratorInterfaceExtensions.isiterable(::DimTableSources) = true +# TableTraits.jl interface TableTraits.isiterabletable(::DimTableSources) = true +TableTraits.isiterabletable(::DimTable) = true -function IteratorInterfaceExtensions.getiterator(t::DimTable) - return Tables.datavaluerows(Tables.dictcolumntable(t)) -end +# IteratorInterfaceExtensions.jl interface +IteratorInterfaceExtensions.getiterator(x::DimTableSources) = + Tables.datavaluerows(Tables.dictcolumntable(x)) +IteratorInterfaceExtensions.getiterator(t::DimTable) = + Tables.datavaluerows(Tables.dictcolumntable(t)) +IteratorInterfaceExtensions.isiterable(::DimTableSources) = true IteratorInterfaceExtensions.isiterable(::DimTable) = true -TableTraits.isiterabletable(::DimTable) = true diff --git a/src/utils.jl b/src/utils.jl index 8c9f5cb43..1a5ad439e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -193,9 +193,13 @@ function uniquekeys(keys::Vector{Symbol}) end end function uniquekeys(keys::Tuple{Symbol,Vararg{Symbol}}) - ids = ntuple(x -> x, length(keys)) + ids = ntuple(identity, length(keys)) map(keys, ids) do k, id - count(k1 -> k == k1, keys) > 1 ? Symbol(:layer, id) : k + if k == Symbol("") + Symbol(:layer, id) + else + count(k1 -> k == k1, keys) > 1 ? Symbol(:layer, id) : k + end end end uniquekeys(t::Tuple) = ntuple(i -> Symbol(:layer, i), length(t)) diff --git a/test/tables.jl b/test/tables.jl index b5bd416ea..f5ea708db 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -1,4 +1,9 @@ -using DimensionalData, IteratorInterfaceExtensions, TableTraits, Tables, Test, DataFrames +using DimensionalData +using Test +using Tables +using IteratorInterfaceExtensions +using TableTraits +using DataFrames using DimensionalData.Lookups, DimensionalData.Dimensions using DimensionalData: DimTable, DimExtensionArray @@ -154,3 +159,11 @@ end @test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3) @test Tables.columnnames(t4) == (:band, :geometry, :value) end + +@testset "DimTable NamedTuple" begin + da = DimArray([(; a=1.0f0i, b=2.0i) for i in 1:10], X) + t = DimTable(da) + s = Tables.schema(t) + @test s.names == (:X, :a, :b) + @test s.types == (Int, Float32, Float64) +end From 49746efdc38b0e1559dccd7af71ea4ddd4b1a57f Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 9 Mar 2025 00:33:53 +0100 Subject: [PATCH 03/23] Breaking: add `combine` method for `groupby` output, fixing `similar` for `AbstractDimStack` (#903) * add combine method * test groupby and similar * docs entry --- docs/src/api/reference.md | 1 + src/DimensionalData.jl | 2 +- src/array/methods.jl | 2 +- src/groupby.jl | 49 +++++++++++++++++++++++++++++++++++++-- src/stack/indexing.jl | 26 ++++++++++----------- src/stack/stack.jl | 31 ++++++++++++++++++++++++- src/utils.jl | 5 ++++ test/groupby.jl | 27 ++++++++++++++++++--- test/stack.jl | 16 +++++++++++-- 9 files changed, 135 insertions(+), 24 deletions(-) diff --git a/docs/src/api/reference.md b/docs/src/api/reference.md index 3f6667d82..7ddb1c83c 100644 --- a/docs/src/api/reference.md +++ b/docs/src/api/reference.md @@ -69,6 +69,7 @@ For transforming DimensionalData objects: ```@docs groupby +combine DimensionalData.DimGroupByArray Bins ranges diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 32247dba3..0d04bdf65 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -83,7 +83,7 @@ export dimnum, hasdim, hasselection, otherdims export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims, unmergedims, maplayers -export groupby, seasons, months, hours, intervals, ranges +export groupby, combine, seasons, months, hours, intervals, ranges export @d diff --git a/src/array/methods.jl b/src/array/methods.jl index c4a63ad8d..27a1cdc35 100644 --- a/src/array/methods.jl +++ b/src/array/methods.jl @@ -421,7 +421,7 @@ function _check_cat_lookups(D, ::Regular, lookups...) @warn _cat_warn_string(D, "step sizes $(step(span(l))) and $s do not match") return false end - if !(lastval + s ≈ first(l)) + if !(s isa Dates.AbstractTime) && !(lastval + s ≈ first(l)) @warn _cat_warn_string(D, "`Regular` lookups do not join with the correct step size: $(lastval) + $s ≈ $(first(l)) should hold") return false end diff --git a/src/groupby.jl b/src/groupby.jl index 6ee94a2b4..bfcb63cdd 100644 --- a/src/groupby.jl +++ b/src/groupby.jl @@ -249,7 +249,6 @@ Group some data along the time dimension: ```jldoctest groupby; setup = :(using Random; Random.seed!(123)) julia> using DimensionalData, Dates - julia> A = rand(X(1:0.1:20), Y(1:20), Ti(DateTime(2000):Day(3):DateTime(2003))); julia> groups = groupby(A, Ti => month) # Group by month @@ -356,6 +355,7 @@ end function _group_indices(dim::Dimension, f::Base.Callable; labels=nothing) orig_lookup = lookup(dim) k1 = f(first(orig_lookup)) + # TODO: using a Dict here is a bit slow indices_dict = Dict{typeof(k1),Vector{Int}}() for (i, x) in enumerate(orig_lookup) k = f(x) @@ -447,7 +447,8 @@ end Generate a `Vector` of `UnitRange` with length `step(A)` """ -intervals(rng::AbstractRange) = IntervalSets.Interval{:closed,:open}.(rng, rng .+ step(rng)) +intervals(rng::AbstractRange) = + IntervalSets.Interval{:closed,:open}.(rng, rng .+ step(rng)) """ ranges(A::AbstractRange{<:Integer}) @@ -455,3 +456,47 @@ intervals(rng::AbstractRange) = IntervalSets.Interval{:closed,:open}.(rng, rng . Generate a `Vector` of `UnitRange` with length `step(A)` """ ranges(rng::AbstractRange{<:Integer}) = map(x -> x:x+step(rng)-1, rng) + +""" + combine(f::Function, gb::DimGroupByArray; dims=:) + +Combine the `DimGroupByArray` using function `f` over the group dimensions. +Unlike broadcasting a reducing function over a `DimGroupByArray`, this function +always returns a new flattened `AbstractDimArray` even where not all dimensions +are reduced. It will also work over grouped `AbstractDimStack`. + +If `dims` is given, it will combine only the dimensions in `dims`, the +others will be present in the final array. Note that all grouped dimensions +must be reduced and included in `dims`. + +The reducing function `f` must also accept a `dims` keyword. + +# Example + +```jldoctest groupby +```` +""" +function combine(f::Function, gb::DimGroupByArray{G}; dims=:) where G + targetdims = DD.commondims(first(gb), dims) + all(hasdim(first(gb), targetdims)) || throw(ArgumentError("dims must be a subset of the groupby dimensions")) + all(hasdim(targetdims, DD.dims(gb))) || throw(ArgumentError("grouped dimensions $(DD.basedims(gb)) must be included in dims")) + # This works for both arrays and stacks + # Combine the remaining dimensions after reduction and the group dimensions + destdims = (otherdims(DD.dims(first(gb)), dims)..., DD.dims(gb)...) + # Get the output eltype + T = Base.promote_op(f, G) + # Create a output array with the combined dimensions + dest = similar(first(gb), T, destdims) + for D in DimIndices(gb) + if all(hasdim(targetdims, DD.dims(first(gb)))) + # Assigned reduced scalar to dest + dest[D...] = f(gb[D]) + else + # Reduce with `f` and drop length 1 dimensions + xs = dropdims(f(gb[D]; dims); dims) + # Broadcast the reduced array to dest + broadcast_dims!(identity, view(dest, D...), xs) + end + end + return dest +end \ No newline at end of file diff --git a/src/stack/indexing.jl b/src/stack/indexing.jl index dad1ebaaf..8843c99ad 100644 --- a/src/stack/indexing.jl +++ b/src/stack/indexing.jl @@ -150,6 +150,9 @@ for f in (:getindex, :view, :dotview) end end +@generated function _any_dimarray(v::Union{NamedTuple,Tuple}) + any(T -> T <: AbstractDimArray, v.types) +end #### setindex #### @propagate_inbounds Base.setindex!(s::AbstractDimStack, xs, I...; kw...) = @@ -160,22 +163,17 @@ end hassamedims(s) ? _map_setindex!(s, xs, i; kw...) : _setindex_mixed!(s, xs, i; kw...) @propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, i::AbstractArray; kw...) = hassamedims(s) ? _map_setindex!(s, xs, i; kw...) : _setindex_mixed!(s, xs, i; kw...) +@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, i::DimensionIndsArrays; kw...) = + _map_setindex!(s, xs, i; kw...) +@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, I...; kw...) = + _map_setindex!(s, xs, I...; kw...) -@propagate_inbounds function Base.setindex!( - s::AbstractDimStack, xs::NamedTuple, I...; kw... -) - map((A, x) -> setindex!(A, x, I...; kw...), layers(s), xs) -end - -_map_setindex!(s, xs, i; kw...) = map((A, x) -> setindex!(A, x, i...; kw...), layers(s), xs) +_map_setindex!(s, xs, i...; kw...) = map((A, x) -> setindex!(A, x, i...; kw...), layers(s), xs) -_setindex_mixed!(s::AbstractDimStack, x, i::AbstractArray) = - map(A -> setindex!(A, x, DimIndices(dims(s))[i]), layers(s)) -_setindex_mixed!(s::AbstractDimStack, i::Integer) = - map(A -> setindex!(A, x, DimIndices(dims(s))[i]), layers(s)) -function _setindex_mixed!(s::AbstractDimStack, x, i::Colon) - map(DimIndices(dims(s))) do D - map(A -> setindex!(A, D), x, layers(s)) +function _setindex_mixed!(s::AbstractDimStack, xs::NamedTuple, i) + D = DimIndices(dims(s))[i] + map(layers(s), xs) do A, x + A[D] = x end end diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 92bb07963..0e212f1c8 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -153,7 +153,6 @@ Base.length(s::AbstractDimStack) = prod(size(s)) Base.axes(s::AbstractDimStack) = map(first ∘ axes, dims(s)) Base.axes(s::AbstractDimStack, dims::DimOrDimType) = axes(s, dimnum(s, dims)) Base.axes(s::AbstractDimStack, dims::Integer) = axes(s)[dims] -Base.similar(s::AbstractDimStack, args...) = maplayers(A -> similar(A, args...), s) Base.eltype(::AbstractDimStack{<:Any,T}) where T = T Base.ndims(::AbstractDimStack{<:Any,<:Any,N}) where N = N Base.CartesianIndices(s::AbstractDimStack) = CartesianIndices(dims(s)) @@ -197,6 +196,36 @@ Base.get(f::Base.Callable, st::AbstractDimStack, k::Symbol) = @propagate_inbounds Base.iterate(st::AbstractDimStack, i) = i > length(st) ? nothing : (st[DimIndices(st)[i]], i + 1) +Base.similar(s::AbstractDimStack) = similar(s, eltype(s)) +Base.similar(s::AbstractDimStack, dims::Dimension...) = similar(s, dims) +Base.similar(s::AbstractDimStack, ::Type{T},dims::Dimension...) where T = + similar(s, T, dims) +Base.similar(s::AbstractDimStack, dims::Tuple{Vararg{Dimension}}) = + similar(s, eltype(s), dims) +Base.similar(s::AbstractDimStack, ::Type{T}) where T = + similar(s, T, dims(s)) +function Base.similar(s::AbstractDimStack, ::Type{T}, dims::Tuple) where T + # Any dims not in the stack are added to all layers + ods = otherdims(dims, DD.dims(s)) + maplayers(s) do A + # Original layer dims are maintained, other dims are added + D = DD.commondims(dims, (DD.dims(A)..., ods...)) + similar(A, T, D) + end +end +function Base.similar(s::AbstractDimStack, ::Type{T}, dims::Tuple) where T<:NamedTuple + ods = otherdims(dims, DD.dims(s)) + maplayers(s, _nt_types(T)) do A, Tx + D = DD.commondims(dims, (DD.dims(A)..., ods...)) + similar(A, Tx, D) + end +end + +@generated function _nt_types(::Type{NamedTuple{K,T}}) where {K,T} + expr = Expr(:tuple, T.parameters...) + return :(NamedTuple{K}($expr)) +end + # `merge` for AbstractDimStack and NamedTuple. # One of the first three arguments must be an AbstractDimStack for dispatch to work. Base.merge(s::AbstractDimStack) = s diff --git a/src/utils.jl b/src/utils.jl index 1a5ad439e..0b8092f12 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -160,6 +160,11 @@ function broadcast_dims!(f, dest::AbstractDimArray{<:Any,N}, As::AbstractBasicDi od = map(A -> otherdims(dest, dims(A)), As) return _broadcast_dims_inner!(f, dest, As, od) end +function broadcast_dims!(f, dest::AbstractDimStack, stacks::AbstractDimStack...) + maplayers(dest, stacks...) do d, layers... + broadcast_dims!(f, d, layers...) + end +end # Function barrier function _broadcast_dims_inner!(f, dest, As, od) diff --git a/test/groupby.jl b/test/groupby.jl index 3f6e1f7ce..cd19274bd 100644 --- a/test/groupby.jl +++ b/test/groupby.jl @@ -8,7 +8,6 @@ days = DateTime(2000):Day(1):DateTime(2000, 12, 31) A = DimArray((1:6) * (1:366)', (X(1:0.2:2), Ti(days))) st = DimStack((a=A, b=A, c=A[X=1])) - @testset "group eltype matches indexed values" begin da = rand(X(1:10), Y(1:10)) grps = groupby(da, X => isodd) @@ -22,10 +21,16 @@ end mean(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1]) end @test mean.(groupby(A, Ti=>month)) == manualmeans + combinedmeans = combine(mean, groupby(A, Ti=>month)) + @test combinedmeans isa DimArray + @test combinedmeans == manualmeans manualmeans_st = map(months) do m mean(st[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1]) end @test mean.(groupby(st, Ti=>month)) == manualmeans_st + combinedmeans_st = combine(mean, groupby(st, Ti=>month)) + @test combinedmeans_st isa DimStack{(:a, :b, :c), @NamedTuple{a::Float64, b::Float64, c::Float64}} + @test collect(combinedmeans_st) == manualmeans_st manualsums = mapreduce(hcat, months) do m vcat(sum(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1, X=1 .. 1.5]), @@ -36,6 +41,8 @@ end @test dims(gb_sum, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())) @test typeof(dims(gb_sum, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))) @test gb_sum == manualsums + combined_sum = combine(sum, groupby(A, Ti=>month, X => >(1.5))) + @test collect(combined_sum) == manualsums manualsums_st = mapreduce(hcat, months) do m vcat(sum(st[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1, X=1 .. 1.5]), @@ -46,10 +53,22 @@ end @test dims(gb_sum_st, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())) @test typeof(dims(gb_sum_st, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))) @test gb_sum_st == manualsums_st + combined_sum_st = combine(sum, groupby(st, Ti=>month, X => >(1.5))) + @test collect(combined_sum_st) == manualsums_st @test_throws ArgumentError groupby(st, Ti=>month, Y=>isodd) end +@testset "partial reductions in combine" begin + months = DateTime(2000):Month(1):DateTime(2000, 12, 31) + using BenchmarkTools + manualmeans = cat(map(months) do m + mean(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1]; dims=Ti) + end...; dims=Ti(collect(1:12))) + combinedmeans = combine(mean, groupby(A, Ti()=>month); dims=Ti()) + @test combinedmeans == manualmeans +end + @testset "bins" begin seasons = DateTime(2000):Month(3):DateTime(2000, 12, 31) manualmeans = map(seasons) do s @@ -59,6 +78,7 @@ end @test mean.(groupby(A, Ti=>Bins(month, ranges(1:3:12)))) == manualmeans @test mean.(groupby(A, Ti=>Bins(month, intervals(1:3:12)))) == manualmeans @test mean.(groupby(A, Ti=>Bins(month, 4))) == manualmeans + @test combine(mean, groupby(A, Ti=>Bins(month, ranges(1:3:12)))) == manualmeans end @testset "dimension matching groupby" begin @@ -75,9 +95,10 @@ end end @test all(collect(mean.(gb)) .=== manualmeans) @test all(mean.(gb) .=== manualmeans) + @test all(combine(mean, gb) .=== manualmeans) end -@testset "broadcastdims runs after groupby" begin +@testset "broadcast_dims runs after groupby" begin dimlist = ( Ti(Date("2021-12-01"):Day(1):Date("2022-12-31")), X(range(1, 10, length=10)), @@ -87,7 +108,7 @@ end data = rand(396, 10, 15, 2) A = DimArray(data, dimlist) month_length = DimArray(daysinmonth, dims(A, Ti)) - g_tempo = DimensionalData.groupby(month_length, Ti=>seasons(; start=December)) + g_tempo = DimensionalData.groupby(month_length, Ti => seasons(; start=December)) sum_days = sum.(g_tempo, dims=Ti) @test sum_days isa DimArray weights = map(./, g_tempo, sum_days) diff --git a/test/stack.jl b/test/stack.jl index cfde24449..ee6d2eac0 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -3,7 +3,7 @@ using DimensionalData, Test, LinearAlgebra, Statistics, ConstructionBase, Random using DimensionalData: data using DimensionalData: Sampled, Categorical, AutoLookup, NoLookup, Transformed, Regular, Irregular, Points, Intervals, Start, Center, End, - Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, layers, basedims + Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, layers, basedims, layerdims A = [1.0 2.0 3.0; 4.0 5.0 6.0] @@ -94,11 +94,23 @@ end @test all(maplayers(similar(mixed), mixed) do s, m dims(s) == dims(m) && dims(s) === dims(m) && eltype(s) === eltype(m) end) - @test eltype(similar(s, Int)) === @NamedTuple{one::Int, two::Int, three::Int} + @test eltype(similar(s, Int)) === + @NamedTuple{one::Int, two::Int, three::Int} + @test eltype(similar(s, @NamedTuple{one::Int, two::Float32, three::Bool})) === + @NamedTuple{one::Int, two::Float32, three::Bool} st2 = similar(mixed, Bool, x, y) @test dims(st2) === (x, y) @test dims(st2[:one]) === (x, y) @test eltype(st2) === @NamedTuple{one::Bool, two::Bool, extradim::Bool} + @test eltype(similar(mixed)) == eltype(mixed) + @test size(similar(mixed)) == size(mixed) + @test keys(similar(mixed)) == keys(mixed) + @test layerdims(similar(mixed)) == layerdims(mixed) + xy = (X(), Y()) + @test layerdims(similar(mixed, dims(mixed, (X, Y)))) == (one=xy, two=xy, extradim=xy) + st3 = similar(mixed, @NamedTuple{one::Int, two::Float32, extradim::Bool}, (Z([:a, :b, :c]), Ti(1:12), X(1:3))) + @test layerdims(st3) == (one=(Ti(), X()), two=(Ti(), X()), extradim=(Z(), Ti(), X())) + @test eltype(st3) == @NamedTuple{one::Int, two::Float32, extradim::Bool} end @testset "merge" begin From 4b8bd51c9759d5e5b34d68ac8db5fa45f2cb2771 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 26 Mar 2025 22:49:18 +0100 Subject: [PATCH 04/23] Breaking: `preservedims` in tables (#917) * add preservedims keyword to DimTable * add tests * Apply suggestions from code review Co-authored-by: Anshul Singhvi * tests, and fix DimSlices * better table docs * cleanup * test * indexing overhaul * fix similar and broadcast for basicdimarray * bugfix rebuildsliced * more indexing cleanup * cleanup similar and gubfix indexing * bugfixes * uncomment * fix doctests * just dont doctest unreproducable failures, for now * combine new Tables integrations * bugfix and cleanup show * bugfix and more tests for preservedims and mergedims --------- Co-authored-by: Anshul Singhvi --- src/DimensionalData.jl | 1 + src/Dimensions/dimension.jl | 1 + src/Lookups/Lookups.jl | 6 +- src/Lookups/beginend.jl | 2 + src/Lookups/lookup_arrays.jl | 11 +- src/Lookups/selector.jl | 9 + src/array/array.jl | 72 ++++-- src/array/broadcast.jl | 40 +++- src/array/indexing.jl | 189 +++++++-------- src/array/methods.jl | 5 +- src/dimindices.jl | 429 ++++++++++++++++++++--------------- src/groupby.jl | 140 ++++++++---- src/opaque.jl | 19 ++ src/stack/methods.jl | 4 +- src/tables.jl | 151 ++++++++---- test/dimindices.jl | 15 +- test/groupby.jl | 6 +- test/indexing.jl | 2 + test/runtests.jl | 1 - test/stack.jl | 2 +- test/tables.jl | 73 ++++-- 21 files changed, 742 insertions(+), 436 deletions(-) create mode 100644 src/opaque.jl diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 0d04bdf65..009084c4e 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -116,6 +116,7 @@ include("tables.jl") include("plotrecipes.jl") include("utils.jl") include("set.jl") +include("opaque.jl") include("groupby.jl") include("precompile.jl") include("interface_tests.jl") diff --git a/src/Dimensions/dimension.jl b/src/Dimensions/dimension.jl index 38d07a631..6e554448c 100644 --- a/src/Dimensions/dimension.jl +++ b/src/Dimensions/dimension.jl @@ -277,6 +277,7 @@ Base.axes(d::Dimension, i) = axes(d)[i] Base.eachindex(d::Dimension) = eachindex(val(d)) Base.length(d::Dimension) = length(val(d)) Base.ndims(d::Dimension) = 0 +Base.parentindices(d::Dimension{<:AbstractArray}) = parentindices(parent(d)) Base.ndims(d::Dimension{<:AbstractArray}) = ndims(val(d)) Base.iterate(d::Dimension{<:AbstractArray}, args...) = iterate(lookup(d), args...) Base.first(d::Dimension) = val(d) diff --git a/src/Lookups/Lookups.jl b/src/Lookups/Lookups.jl index fb7220d87..a1dee6e06 100644 --- a/src/Lookups/Lookups.jl +++ b/src/Lookups/Lookups.jl @@ -59,17 +59,15 @@ export Unaligned, Transformed, ArrayLookup # Deprecated export LookupArray -const StandardIndices = Union{AbstractArray{<:Integer},Colon,Integer,CartesianIndex,CartesianIndices} - # As much as possible keyword rebuild is automatic rebuild(x; kw...) = ConstructionBase.setproperties(x, (; kw...)) -include("metadata.jl") include("lookup_traits.jl") +include("metadata.jl") include("lookup_arrays.jl") +include("beginend.jl") include("predicates.jl") include("selector.jl") -include("beginend.jl") include("indexing.jl") include("methods.jl") include("utils.jl") diff --git a/src/Lookups/beginend.jl b/src/Lookups/beginend.jl index 86fdf190b..a84e0ac70 100644 --- a/src/Lookups/beginend.jl +++ b/src/Lookups/beginend.jl @@ -45,6 +45,8 @@ Base.to_indices(A, inds, (r, args...)::Tuple{<:Union{Begin,End,<:LazyMath},Varar _to_index(inds, a::Int) = a _to_index(inds, ::Begin) = first(inds) _to_index(inds, ::End) = last(inds) +_to_index(inds, ::Type{Begin}) = first(inds) +_to_index(inds, ::Type{End}) = last(inds) _to_index(inds, l::LazyMath{End}) = l.f(last(inds)) _to_index(inds, l::LazyMath{Begin}) = l.f(first(inds)) diff --git a/src/Lookups/lookup_arrays.jl b/src/Lookups/lookup_arrays.jl index c72a600be..44d99c3eb 100644 --- a/src/Lookups/lookup_arrays.jl +++ b/src/Lookups/lookup_arrays.jl @@ -32,6 +32,7 @@ Base.first(l::Lookup) = first(parent(l)) Base.last(l::Lookup) = last(parent(l)) Base.firstindex(l::Lookup) = firstindex(parent(l)) Base.lastindex(l::Lookup) = lastindex(parent(l)) +Base.parentindices(l::Lookup) = parentindices(parent(l)) function Base.:(==)(l1::Lookup, l2::Lookup) basetypeof(l1) == basetypeof(l2) && parent(l1) == parent(l2) end @@ -159,11 +160,10 @@ NoLookup() = NoLookup(AutoValues()) rebuild(l::NoLookup; data=parent(l), kw...) = NoLookup(data) # Used in @d broadcasts -struct Length1NoLookup <: AbstractNoLookup end -Length1NoLookup(::AbstractVector) = Length1NoLookup() - -rebuild(l::Length1NoLookup; kw...) = Length1NoLookup() -Base.parent(::Length1NoLookup) = Base.OneTo(1) +struct Length1NoLookup{A<:AbstractUnitRange} <: AbstractNoLookup + data::A +end +Length1NoLookup() = Length1NoLookup(Base.OneTo(1)) """ AbstractSampled <: Aligned @@ -866,6 +866,7 @@ promote_first(x1, x2, xs...) = # Fallback NoLookup if not identical type promote_first(l1::Lookup) = l1 promote_first(l1::L, ls::L...) where L<:Lookup = rebuild(l1; metadata=NoMetadata) +promote_first(l1::L, ls::L...) where L<:AbstractNoLookup = l1 function promote_first(l1::Lookup, ls1::Lookup...) ls = _remove(Length1NoLookup, l1, ls1...) if length(ls) != length(ls1) + 1 diff --git a/src/Lookups/selector.jl b/src/Lookups/selector.jl index 01c6f6d7e..302120d62 100644 --- a/src/Lookups/selector.jl +++ b/src/Lookups/selector.jl @@ -1,3 +1,12 @@ +const StandardIndices = Union{ + AbstractArray{<:Integer}, + Colon, + Integer, + CartesianIndex, + CartesianIndices, + BeginEndRange, +} + struct SelectorError{L,S} <: Exception lookup::L selector::S diff --git a/src/array/array.jl b/src/array/array.jl index 0b776fa67..ec414e5ba 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -1,4 +1,5 @@ const IDim = Dimension{<:StandardIndices} +const MaybeDimTuple = Tuple{Vararg{Dimension}} """ AbstractBasicDimArray <: AbstractArray @@ -8,7 +9,7 @@ returns a `Tuple` of `Dimension` Only keyword `rebuild` is guaranteed to work with `AbstractBasicDimArray`. """ -abstract type AbstractBasicDimArray{T,N,D<:Tuple} <: AbstractArray{T,N} end +abstract type AbstractBasicDimArray{T,N,D<:MaybeDimTuple} <: AbstractArray{T,N} end const AbstractBasicDimVector = AbstractBasicDimArray{T,1} where T const AbstractBasicDimMatrix = AbstractBasicDimArray{T,2} where T @@ -94,10 +95,27 @@ metadata(A::AbstractDimArray) = A.metadata layerdims(A::AbstractDimArray) = basedims(A) -@inline rebuildsliced(A::AbstractBasicDimArray, args...) = rebuildsliced(getindex, A, args...) -@inline function rebuildsliced(f::Function, A::AbstractBasicDimArray, data::AbstractArray, I::Tuple, name=name(A)) +""" + rebuildsliced(f::Function, A::AbstractBasicDimArray, I) + +Rebuild `AbstractDimArray` where `f` is `getindex` , `view` or `dotview`. + +This does not need to be defined for `AbstractDimArray`, as `f` +is simply called on the parent array, dims and refdims are sliced with `slicedims`, +and `rebuild` is called. + +However for custom `AbstractBasicDimArray`, `rebuildsliced` methods are needed +to define slicing behavior, as there not be a parent array. +""" +@propagate_inbounds rebuildsliced(A::AbstractBasicDimArray, args...) = rebuildsliced(getindex, A, args...) +@propagate_inbounds function rebuildsliced(f::Function, A::AbstractDimArray, I::Tuple, name=name(A)) + I1 = to_indices(A, I) + data = f(parent(A), I1...) + return rebuildsliced(f, A, data, I1, name) +end +@propagate_inbounds function rebuildsliced(f::Function, A::AbstractDimArray, data::AbstractArray, I::Tuple, name=name(A)) I1 = to_indices(A, I) - rebuild(A, data, slicedims(f, A, I1)..., name) + return rebuild(A, data, slicedims(f, A, I1)..., name) end # Array interface methods ###################################################### @@ -107,6 +125,7 @@ Base.axes(A::AbstractDimArray) = map(Dimensions.DimUnitRange, axes(parent(A)), d Base.iterate(A::AbstractDimArray, args...) = iterate(parent(A), args...) Base.IndexStyle(A::AbstractDimArray) = Base.IndexStyle(parent(A)) Base.parent(A::AbstractDimArray) = data(A) +Base.parentindices(A::AbstractDimArray) = parentindices(parent(A)) Base.vec(A::AbstractDimArray) = vec(parent(A)) # Only compare data and dim - metadata and refdims can be different Base.:(==)(A1::AbstractDimArray, A2::AbstractDimArray) = @@ -170,14 +189,6 @@ end # An alternative would be to fill missing dims with `Anon`, and keep existing # dims but strip the Lookup? It just seems a little complicated when the methods # below using DimTuple work better anyway. -Base.similar(A::AbstractDimArray, i::Integer, I::Vararg{Integer}; kw...) = - similar(A, eltype(A), (i, I...); kw...) -Base.similar(A::AbstractDimArray, I::Tuple{Int,Vararg{Int}}; kw...) = - similar(A, eltype(A), I; kw...) -Base.similar(A::AbstractDimArray, ::Type{T}, i::Integer, I::Vararg{Integer}; kw...) where T = - similar(A, T, (i, I...); kw...) -Base.similar(A::AbstractDimArray, ::Type{T}, I::Tuple{Int,Vararg{Int}}; kw...) where T = - similar(parent(A), T, I) const MaybeDimUnitRange = Union{Integer,Base.OneTo,Dimensions.DimUnitRange} # when all axes are DimUnitRanges we can return an `AbstractDimArray` @@ -256,14 +267,27 @@ function _similar(::Type{T}, shape::Tuple; kw...) where {T<:AbstractArray} end # With Dimensions we can return an `AbstractDimArray` -Base.similar(A::AbstractBasicDimArray, D::DimTuple; kw...) = Base.similar(A, eltype(A), D; kw...) -Base.similar(A::AbstractBasicDimArray, D::Dimension...; kw...) = Base.similar(A, eltype(A), D; kw...) -Base.similar(A::AbstractBasicDimArray, ::Type{T}, D::Dimension...; kw...) where T = - Base.similar(A, T, D; kw...) +Base.similar(A::AbstractBasicDimArray, d1::Dimension, D::Dimension...; kw...) = + Base.similar(A, eltype(A), (d1, D...); kw...) +Base.similar(A::AbstractBasicDimArray, ::Type{T}, d1::Dimension, D::Dimension...; kw...) where T = + Base.similar(A, T, (d1, D...); kw...) +Base.similar(A::AbstractBasicDimArray, D::DimTuple; kw...) = + Base.similar(A, eltype(A), D; kw...) +function Base.similar(A::AbstractBasicDimArray, ::Type{T}, D::DimTuple; kw...) where T + data = _arraytype(T)(undef, _dimlength(D)) + dimconstructor(D)(data, D; kw...) +end +function Base.similar(A::AbstractBasicDimArray, ::Type{T}, D::Tuple{}; + refdims=(), name=_noname(A), metadata=NoMetadata(), kw... +) where T + data = _arraytype(T)(undef, _dimlength(D)) + dimconstructor(D)(data, (); refdims, name, metadata, kw...) +end + function Base.similar(A::AbstractDimArray, ::Type{T}, D::DimTuple; refdims=(), name=_noname(A), metadata=NoMetadata(), kw... ) where T - data = similar(parent(A), T, _dimlength(D)) + data = _arraytype(T)(undef, _dimlength(D)) dims = _maybestripval(D) return rebuild(A; data, dims, refdims, metadata, name, kw...) end @@ -274,6 +298,20 @@ function Base.similar(A::AbstractDimArray, ::Type{T}, D::Tuple{}; rebuild(A; data, dims=(), refdims, metadata, name, kw...) end +Base.similar(A::AbstractBasicDimArray, shape::Int...; kw...) = + similar(A, eltype(A), shape; kw...) +Base.similar(A::AbstractBasicDimArray, shape::Tuple{Vararg{Int}}; kw...) = + similar(A, eltype(A), shape; kw...) +Base.similar(A::AbstractBasicDimArray, ::Type{T}, shape::Int...; kw...) where T = + similar(A, T, shape; kw...) +Base.similar(A::AbstractBasicDimArray, ::Type{T}, shape::Tuple{Vararg{Int}}; kw...) where T = + _arraytype(T)(undef, shape) +Base.similar(A::AbstractDimArray, ::Type{T}, shape::Tuple{Vararg{Int}}; kw...) where T = + similar(parent(A), T, shape) + +_arraytype(::Type{T}) where T = Array{T} +_arraytype(::Type{Bool}) = BitArray + # Keep the same type in `similar` _noname(A::AbstractBasicDimArray) = _noname(name(A)) _noname(s::String) = "" diff --git a/src/array/broadcast.jl b/src/array/broadcast.jl index b6f6f27d9..30b86383f 100644 --- a/src/array/broadcast.jl +++ b/src/array/broadcast.jl @@ -35,7 +35,9 @@ strict_broadcast!(x::Bool) = STRICT_BROADCAST_CHECKS[] = x # It preserves the dimension names. # `S` should be the `BroadcastStyle` of the wrapped type. # Copied from NamedDims.jl (thanks @oxinabox). -struct DimensionalStyle{S <: BroadcastStyle} <: AbstractArrayStyle{Any} end +struct BasicDimensionalStyle{N} <: AbstractArrayStyle{Any} end + +struct DimensionalStyle{S<:BroadcastStyle} <: AbstractArrayStyle{Any} end DimensionalStyle(::S) where {S} = DimensionalStyle{S}() DimensionalStyle(::S, ::Val{N}) where {S,N} = DimensionalStyle(S(Val(N))) DimensionalStyle(::Val{N}) where N = DimensionalStyle{DefaultArrayStyle{N}}() @@ -53,6 +55,8 @@ function BroadcastStyle(::Type{<:AbstractDimArray{T,N,D,A}}) where {T,N,D,A} inner_style = typeof(BroadcastStyle(A)) return DimensionalStyle{inner_style}() end +BroadcastStyle(::Type{<:AbstractBasicDimArray{T,N}}) where {T,N} = + BasicDimensionalStyle{N}() BroadcastStyle(::DimensionalStyle, ::Base.Broadcast.Unknown) = Unknown() BroadcastStyle(::Base.Broadcast.Unknown, ::DimensionalStyle) = Unknown() @@ -79,12 +83,31 @@ function Broadcast.copy(bc::Broadcasted{DimensionalStyle{S}}) where S dims = format(Dimensions.promotedims(bdims...; skip_length_one=true), data) return rebuild(A; data, dims, refdims=refdims(A), name=Symbol("")) end +function Broadcast.copy(bc::Broadcasted{BasicDimensionalStyle{N}}) where N + A = _firstdimarray(bc) + data = collect(bc) + A isa Nothing && return data # No AbstractDimArray + + bdims = _broadcasted_dims(bc) + _comparedims_broadcast(A, bdims...) + + data isa AbstractArray || return data # result is a scalar + + # Return an AbstractDimArray + dims = format(Dimensions.promotedims(bdims...; skip_length_one=true), data) + return dimconstructor(dims)(data, dims; refdims=refdims(A), name=Symbol("")) +end function Base.copyto!(dest::AbstractArray, bc::Broadcasted{DimensionalStyle{S}}) where S fda = _firstdimarray(bc) isnothing(fda) || _comparedims_broadcast(fda, _broadcasted_dims(bc)...) copyto!(dest, _unwrap_broadcasted(bc)) end +function Base.copyto!(dest::AbstractArray, bc::Broadcasted{BasicDimensionalStyle{N}}) where N + fda = _firstdimarray(bc) + isnothing(fda) || _comparedims_broadcast(fda, _broadcasted_dims(bc)...) + copyto!(dest, bc) +end @inline function Base.Broadcast.materialize!(dest::AbstractDimArray, bc::Base.Broadcast.Broadcasted{<:Any}) # Need to check whether the dims are compatible in dest, @@ -97,7 +120,15 @@ end function Base.similar(bc::Broadcast.Broadcasted{DimensionalStyle{S}}, ::Type{T}) where {S,T} A = _firstdimarray(bc) - rebuildsliced(A, similar(_unwrap_broadcasted(bc), T, axes(bc)...), axes(bc), Symbol("")) + data = similar(_unwrap_broadcasted(bc), T, size(bc)) + dims, refdims = slicedims(A, axes(bc)) + return rebuild(A; data, dims, refdims, name=Symbol("")) +end +function Base.similar(bc::Broadcast.Broadcasted{BasicDimensionalStyle{N}}, ::Type{T}) where {N,T} + A = _firstdimarray(bc) + data = similar(A, T, size(bc)) + dims, refdims = slicedims(A, axes(bc)) + return dimconstructor(dims)(data, dims; refdims, name=Symbol("")) end @@ -383,9 +414,10 @@ _unwrap_broadcasted(boda::BroadcastOptionsDimArray) = parent(parent(boda)) # Get the first dimensional array in the broadcast _firstdimarray(x::Broadcasted) = _firstdimarray(x.args) -_firstdimarray(x::Tuple{<:AbstractDimArray,Vararg}) = x[1] +_firstdimarray(x::Tuple{<:AbstractBasicDimArray,Vararg}) = x[1] +_firstdimarray(x::AbstractBasicDimArray) = x _firstdimarray(ext::Base.Broadcast.Extruded) = _firstdimarray(ext.x) -function _firstdimarray(x::Tuple{<:Broadcasted,Vararg}) +function _firstdimarray(x::Tuple{<:Union{Broadcasted,Base.Broadcast.Extruded},Vararg}) found = _firstdimarray(x[1]) if found isa Nothing _firstdimarray(tail(x)) diff --git a/src/array/indexing.jl b/src/array/indexing.jl index ecce5fbef..68dc41c28 100644 --- a/src/array/indexing.jl +++ b/src/array/indexing.jl @@ -1,112 +1,87 @@ -# getindex/view/setindex! ====================================================== +const SelectorOrStandard = Union{SelectorOrInterval,StandardIndices} +const DimensionIndsArrays = Union{AbstractArray{<:Dimension},AbstractArray{<:DimTuple}} +const DimensionalIndices = Union{DimTuple,DimIndices,DimSelectors,Dimension,DimensionIndsArrays} +const _DimIndicesAmb = Union{AbstractArray{Union{}},DimIndices{<:Integer},DimSelectors{<:Integer}} +const IntegerOrCartesian = Union{Integer,CartesianIndex} -#### getindex/view #### +# getindex/view/setindex! ====================================================== for f in (:getindex, :view, :dotview) _dim_f = Symbol(:_dim_, f) + + # Integer indexing if f === :view - # No indices and we try to rebuild, for 0d - @eval @propagate_inbounds Base.view(A::AbstractDimArray) = rebuild(A, Base.view(parent(A)), ()) # With one Integer and 0d and 1d we try to rebuild - @eval @propagate_inbounds Base.$f(A::AbstractDimArray{<:Any,0}, i::Integer) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), i), (i,)) - @eval @propagate_inbounds Base.$f(A::AbstractDimVector, i::Integer) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), i), (i,)) + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray{<:Any,0}, i::Integer) = + rebuildsliced(Base.$f, A, (i,)) + # One Integer on a vector and we also rebuild + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::Integer) = + rebuildsliced(Base.$f, A, (i,)) + # More Integers and we rebuild + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::Integer, i2::Integer, I::Integer...) = + rebuildsliced(Base.$f, A, (i1, i2, I...)) # Otherwise its linear indexing, don't rebuild - @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i::Integer) = + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::Integer) = Base.$f(parent(A), i) - # More Integer and we rebuild again - @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i1::Integer, i2::Integer, I::Integer...) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), i1, i2, I...), (i1, i2, I...)) - else - @eval @propagate_inbounds Base.$f(A::AbstractDimVector, i::Integer) = Base.$f(parent(A), i) - @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i::Integer) = Base.$f(parent(A), i) - @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i1::Integer, i2::Integer, I::Integer...) = - Base.$f(parent(A), i1, i2, I...) - @eval @propagate_inbounds Base.$f(A::AbstractDimArray) = Base.$f(parent(A)) end @eval begin - @propagate_inbounds Base.$f(A::AbstractDimVector, I::CartesianIndex) = + ### Standard indices + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, I::CartesianIndex) = Base.$f(A, to_indices(A, (I,))...) - @propagate_inbounds Base.$f(A::AbstractDimArray, I::CartesianIndex) = + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, I::CartesianIndex) = Base.$f(A, to_indices(A, (I,))...) - @propagate_inbounds Base.$f(A::AbstractDimVector, I::CartesianIndices) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), I), (I,)) - @propagate_inbounds Base.$f(A::AbstractDimArray, I::CartesianIndices) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), I), (I,)) - @propagate_inbounds function Base.$f(A::AbstractDimVector, i) - x = Base.$f(parent(A), Lookups._construct_types(i)) - if x isa AbstractArray - rebuildsliced(Base.$f, A, x, to_indices(A, (i,))) - else - x - end - end - @propagate_inbounds function Base.$f(A::AbstractDimArray, i1, i2, Is...) - I = Lookups._construct_types(i1, i2, Is...) - x = Base.$f(parent(A), I...) - if x isa AbstractArray - rebuildsliced(Base.$f, A, x, to_indices(A, I)) - else - x - end - end - # Linear indexing forwards to the parent array as it will break the dimensions - @propagate_inbounds Base.$f(A::AbstractDimArray, i::Union{Colon,AbstractArray{<:Integer}}) = - Base.$f(parent(A), i) - # Except 1D DimArrays - @propagate_inbounds Base.$f(A::AbstractDimVector, i::Union{Colon,AbstractArray{<:Integer}}) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), i), (i,)) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::SelectorOrInterval) = + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::IntegerOrCartesian, i2::IntegerOrCartesian, Is::IntegerOrCartesian...) = + Base.$f(A, to_indices(A, (i1, i2, Is...))...) + # 1D DimArrays dont need linear indexing + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::Union{Colon,AbstractArray{<:Integer}}) = + rebuildsliced(Base.$f, A, (i,)) + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, I::CartesianIndices) = rebuildsliced(Base.$f, A, (I,)) + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, I::CartesianIndices) = rebuildsliced(Base.$f, A, (I,)) + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::StandardIndices, i2::StandardIndices, Is::StandardIndices...) = + rebuildsliced(Base.$f, A, to_indices(A, (i1, i2, Is...))) + + ### Selector/Interval indexing + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::SelectorOrInterval) = Base.$f(A, dims2indices(A, (i,))...) - # Selector/Interval indexing - @propagate_inbounds Base.$f(A::AbstractDimArray, i1::SelectorOrStandard, i2::SelectorOrStandard, I::SelectorOrStandard...) = + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::SelectorOrStandard, i2::SelectorOrStandard, I::SelectorOrStandard...) = Base.$f(A, dims2indices(A, (i1, i2, I...))...) + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::Selector{<:Extents.Extent}) = + Base.$f(A, dims2indices(A, i)...) + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::Selector{<:Extents.Extent}) = + Base.$f(A, dims2indices(A, i)...) - @propagate_inbounds Base.$f(A::AbstractDimVector, extent::Union{Extents.Extent,Near{<:Extents.Extent},Touches{<:Extents.Extent}}) = - Base.$f(A, dims2indices(A, extent)...) - @propagate_inbounds Base.$f(A::AbstractDimArray, extent::Union{Extents.Extent,Near{<:Extents.Extent},Touches{<:Extents.Extent}}) = + # Extent indexing + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, extent::Extents.Extent) = Base.$f(A, dims2indices(A, extent)...) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, extent::Union{Extents.Extent,Near{<:Extents.Extent},Touches{<:Extents.Extent}}) = + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, extent::Extents.Extent) = Base.$f(A, dims2indices(A, extent)...) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, extent::Union{Extents.Extent,Near{<:Extents.Extent},Touches{<:Extents.Extent}}) = - Base.$f(A, dims2indices(A, extent)...) - # All Dimension indexing modes combined - @propagate_inbounds Base.$f(A::AbstractBasicDimArray; kw...) = - $_dim_f(A, _simplify_dim_indices(kw2dims(values(kw))...,)...) + + ### Dimension indexing + @propagate_inbounds function Base.$f(A::AbstractBasicDimArray; kw...) + # Need to use one method and check keywords to avoid method overwrites + if isempty(kw) + rebuildsliced(Base.$f, A, ()) + else + $_dim_f(A, _simplify_dim_indices(kw2dims(values(kw))...,)...) + end + end @propagate_inbounds Base.$f(A::AbstractBasicDimArray, d1::DimensionalIndices; kw...) = $_dim_f(A, _simplify_dim_indices(d1, kw2dims(values(kw))...)...) @propagate_inbounds Base.$f(A::AbstractBasicDimArray, d1::DimensionalIndices, d2::DimensionalIndices, D::DimensionalIndices...; kw...) = $_dim_f(A, _simplify_dim_indices(d1, d2, D..., kw2dims(values(kw))...)...) - @propagate_inbounds Base.$f(A::AbstractDimArray, i1::DimensionalIndices, i2::DimensionalIndices, I::DimensionalIndices...) = - $_dim_f(A, _simplify_dim_indices(i1, i2, I...)...) - @propagate_inbounds Base.$f(A::AbstractDimArray, i1::_DimIndicesAmb, i2::_DimIndicesAmb, I::_DimIndicesAmb...) = - $_dim_f(A, _simplify_dim_indices(i1, i2, I...)...) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::DimensionalIndices) = - $_dim_f(A, _simplify_dim_indices(i)...) @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::DimensionalIndices) = $_dim_f(A, _simplify_dim_indices(i)...) - # For ambiguity - @propagate_inbounds Base.$f(A::AbstractDimArray, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArray, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArray, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::_DimIndicesAmb) = $_dim_f(A, i) - - # Use underscore methods to minimise ambiguities - @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, ds::DimTuple) = - $_dim_f(A, ds...) + + # All dimension indexing is passed to these underscore methods to minimise ambiguities + @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, ds::DimTuple) = $_dim_f(A, ds...) @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, d1::Dimension, ds::Dimension...) = Base.$f(A, dims2indices(A, (d1, ds...))...) - @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, ds::Dimension...) = - Base.$f(A, dims2indices(A, ds)...) + # Regular non-dimensional indexing + @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, I...) = Base.$f(A, I...) + # Catch the edge case dims were passed but did not match - + # we want to index with all colons [:, :, ...], not [] + @propagate_inbounds $_dim_f(A::AbstractBasicDimArray{<:Any,N}) where N = + rebuildsliced(Base.$f, A, ntuple(i -> Colon(), Val(N))) @propagate_inbounds function $_dim_f( A::AbstractBasicDimArray, d1::Union{Dimension,DimensionIndsArrays}, @@ -114,35 +89,41 @@ for f in (:getindex, :view, :dotview) ) return merge_and_index(Base.$f, A, (d1, ds...)) end - end - # Standard indices - if f == :view - @eval @propagate_inbounds function Base.$f(A::AbstractDimArray, i1::StandardIndices, i2::StandardIndices, I::StandardIndices...) - I = to_indices(A, (i1, i2, I...)) - x = Base.$f(parent(A), I...) - rebuildsliced(Base.$f, A, x, I) - end - else - @eval @propagate_inbounds function Base.$f(A::AbstractDimArray, i1::StandardIndices, i2::StandardIndices, Is::StandardIndices...) - I = to_indices(A, (i1, i2, Is...)) - x = Base.$f(parent(A), I...) - all(i -> i isa Integer, I) ? x : rebuildsliced(Base.$f, A, x, I) + @propagate_inbounds function $_dim_f(A::AbstractBasicDimArray{<:Any,0}, d1::Dimension, ds::Dimension...) + Dimensions._extradimswarn((d1, ds...)) + return rebuildsliced(Base.$f, A, ()) end end + + ##### AbstractDimArray only methods + # Here we know we can just index into the parent object + # Linear indexing forwards to the parent array as it will break the dimensions + # AbstractBasicDimArray must defined their own methods + @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i::Union{Colon,AbstractArray{<:Integer}}) = + Base.$f(parent(A), i) + # Except for AbstractDimVector + @eval @propagate_inbounds Base.$f(A::AbstractDimVector, i::Union{Colon,AbstractArray{<:Integer}}) = + rebuildsliced(Base.$f, A, (i,)) + if f in (:getindex, :dotview) + # We only define getindex with Integer on AbstractDimArray + # AbstractBasicDimArray must defined their own + @eval @propagate_inbounds Base.$f(A::AbstractDimVector, i::Integer) = Base.$f(parent(A), i) + @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i::Integer) = Base.$f(parent(A), i) + @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i1::Integer, i2::Integer, I::Integer...) = + Base.$f(parent(A), i1, i2, I...) + @eval @propagate_inbounds Base.$f(A::AbstractDimArray) = Base.$f(parent(A)) + end # Special case zero dimensional arrays being indexed with missing dims if f == :getindex # Catch this before the dimension is converted to () - @eval @propagate_inbounds function $_dim_f(A::AbstractDimArray{<:Any,0}) - return rebuild(A, fill(A[])) - end - @eval @propagate_inbounds function $_dim_f(A::AbstractDimArray{<:Any,0}, d1::Dimension, ds::Dimension...) + @eval $_dim_f(A::AbstractDimArray{<:Any,0}) = rebuild(A, fill(A[])) + @eval function $_dim_f(A::AbstractDimArray{<:Any,0}, d1::Dimension, ds::Dimension...) Dimensions._extradimswarn((d1, ds...)) return rebuild(A, fill(A[])) end end end - function merge_and_index(f, A, dims) dims, inds_arrays = _separate_dims_arrays(_simplify_dim_indices(dims...)...) # No arrays here, so abort (dispatch is tricky...) @@ -263,3 +244,7 @@ Base.@assume_effects :foldable @inline _simplify_dim_indices() = () view(A, args...; kw...) @propagate_inbounds Base.maybeview(A::AbstractDimArray, args::Vararg{Union{Number,Base.AbstractCartesianIndex}}; kw...) = view(A, args...; kw...) + +# We only own this to_indices dispatch for AbstractBasicDimArray +Base.to_indices(A::AbstractBasicDimArray, inds, (r, args...)::Tuple{<:Type,Vararg}) = + (Lookups._to_index(inds[1], r), to_indices(A, Base.tail(inds), args)...) \ No newline at end of file diff --git a/src/array/methods.jl b/src/array/methods.jl index 27a1cdc35..dec9f8c4f 100644 --- a/src/array/methods.jl +++ b/src/array/methods.jl @@ -90,7 +90,7 @@ end function Base.dropdims(A::AbstractDimArray; dims) dims = DD.dims(A, dims) data = Base.dropdims(parent(A); dims=dimnum(A, dims)) - rebuildsliced(A, data, _dropinds(A, dims)) + rebuildsliced(view, A, data, _dropinds(A, dims)) end @inline _dropinds(A, dims::Tuple) = dims2indices(A, map(d -> rebuild(d, 1), dims)) @@ -582,7 +582,8 @@ end r = axes(A) # Copied from Base.diff r0 = ntuple(i -> i == dims ? UnitRange(1, last(r[i]) - 1) : UnitRange(r[i]), N) - rebuildsliced(A, diff(parent(A); dims=dimnum(A, dims)), r0) + data = diff(parent(A); dims=dimnum(A, dims)) + rebuildsliced(getindex, A, data, r0) end # Forward `replace` to parent objects diff --git a/src/dimindices.jl b/src/dimindices.jl index 5b0779b18..fee1062b2 100644 --- a/src/dimindices.jl +++ b/src/dimindices.jl @@ -1,16 +1,21 @@ +""" + AbstractDimArrayGenerator <: AbstractBasicDimArray +Abstract supertype for all AbstractBasicDimArrays that +generate their `data` on demand during `getindex`. +""" abstract type AbstractDimArrayGenerator{T,N,D} <: AbstractBasicDimArray{T,N,D} end dims(dg::AbstractDimArrayGenerator) = dg.dims +# Dims that contribute to the element type. +# May be larger than `dims` after slicing +eldims(di::AbstractDimArrayGenerator) = dims((dims(di)..., refdims(di)...), orderdims(di)) +eldims(di::AbstractDimArrayGenerator, d) = dims(eldims(di), d) + Base.size(dg::AbstractDimArrayGenerator) = map(length, dims(dg)) Base.axes(dg::AbstractDimArrayGenerator) = map(d -> axes(d, 1), dims(dg)) -Base.similar(A::AbstractDimArrayGenerator, ::Type{T}, D::DimTuple) where T = - dimconstructor(D)(A; data=similar(Array{T}, size(D)), dims=D, refdims=(), metadata=NoMetadata()) -Base.similar(A::AbstractDimArrayGenerator, ::Type{T}, D::Tuple{}) where T = - dimconstructor(D)(A; data=similar(Array{T}, ()), dims=(), refdims=(), metadata=NoMetadata()) - @inline Base.permutedims(A::AbstractDimArrayGenerator{<:Any,2}) = rebuild(A; dims=reverse(dims(A))) @inline Base.permutedims(A::AbstractDimArrayGenerator{<:Any,1}) = @@ -25,7 +30,34 @@ end rebuild(A; dims=dims(dims(A), Tuple(perm))) end -abstract type AbstractDimIndices{T,N,D} <: AbstractDimArrayGenerator{T,N,D} end +""" + AbstractRebuildableDimArrayGenerator <: AbstractDimArrayGenerator + +Abstract supertype for all AbstractDimArrayGenerator that +can be rebuilt when subsetted with `view` or `getindex`. + +These arrays must have `dims` and `refdims` fields that defined the data +They do not need to define `rebuildsliced` methods as this is defined +as simply doing `slicedims` on `dims` and `refdims` and rebuilding. +""" +abstract type AbstractRebuildableDimArrayGenerator{T,N,D,R<:MaybeDimTuple} <: AbstractDimArrayGenerator{T,N,D} end + +refdims(A::AbstractRebuildableDimArrayGenerator) = A.refdims + +_refdims_firsts(A::AbstractRebuildableDimArrayGenerator) = map(d -> rebuild(d, first(d)), refdims(A)) + +# Custom rebuildsliced where data is ignored, and just dims and refdims are slices +# This makes sense for AbstractRebuildableDimArrayGenerator because Arrays are +# generated in getindex from the dims/refdims combination. +# `f` is ignored, and views are always used +@propagate_inbounds function rebuildsliced(f::Function, A::AbstractRebuildableDimArrayGenerator, I) + dims, refdims = slicedims(view, A, I) + return rebuild(A; dims, refdims) +end + +abstract type AbstractDimIndices{T,N,D,R,O<:MaybeDimTuple} <: AbstractRebuildableDimArrayGenerator{T,N,D,R} end + +orderdims(di::AbstractDimIndices) = di.orderdims """ DimIndices <: AbstractArray @@ -85,48 +117,52 @@ julia> A[di] # Index A with these indices 0.6 0.745673 0.692209 ``` """ -struct DimIndices{T,N,D<:Tuple{Vararg{Dimension}}} <: AbstractDimIndices{T,N,D} +struct DimIndices{T<:MaybeDimTuple,N,D,R,O} <: AbstractDimIndices{T,N,D,R,O} dims::D + refdims::R + orderdims::O # Manual inner constructor for ambiguity only - function DimIndices{T,N,D}(dims::Tuple{Vararg{Dimension}}) where {T,N,D<:Tuple{Vararg{Dimension}}} - new{T,N,D}(dims) + function DimIndices(dims::D, refdims::R, orderdims::O) where {D<:MaybeDimTuple,R<:MaybeDimTuple,O<:MaybeDimTuple} + eldims = DD.dims((dims..., refdims...), orderdims) + T = typeof(map(d -> rebuild(d, 1), eldims)) + N = length(dims) + new{T,N,D,R,O}(dims, refdims, orderdims) end end -function DimIndices(dims::D) where {D<:Tuple{Vararg{Dimension}}} - T = typeof(map(d -> rebuild(d, 1), dims)) - N = length(dims) - dims = N > 0 ? _dimindices_format(dims) : dims - DimIndices{T,N,typeof(dims)}(dims) +function DimIndices(dims::MaybeDimTuple) + dims = length(dims) > 0 ? _dimindices_format(dims) : dims + return DimIndices(dims, (), basedims(dims)) end DimIndices(x) = DimIndices(dims(x)) DimIndices(dim::Dimension) = DimIndices((dim,)) DimIndices(::Nothing) = throw(ArgumentError("Object has no `dims` method")) # Forces multiple indices not linear -function Base.getindex(di::DimIndices, i1::Integer, i2::Integer, I::Integer...) - map(dims(di), (i1, i2, I...)) do d, i +function Base.getindex(A::DimIndices, i1::Integer, i2::Integer, I::Integer...) + dis = map(dims(A), (i1, i2, I...)) do d, i rebuild(d, d[i]) end + dims((dis..., _refdims_firsts(A)...), orderdims(A)) end # Dispatch to avoid linear indexing in multidimensional DimIndices -function Base.getindex(di::DimIndices{<:Any,1}, i::Integer) - d = dims(di, 1) - (rebuild(d, d[i]),) +function Base.getindex(A::DimIndices{<:Any,1}, i::Integer) + d = dims(A, 1) + di = rebuild(d, d[i]) + return dims((di, _refdims_firsts(A)...), orderdims(A)) end +Base.getindex(A::DimIndices{<:Any,0}) = dims(_refdims_firsts(A), orderdims(A)) _dimindices_format(dims::Tuple{}) = () _dimindices_format(dims::Tuple) = map(rebuild, dims, map(_dimindices_axis, dims)) -# Allow only CartesianIndices arguments _dimindices_axis(x::Integer) = Base.OneTo(x) _dimindices_axis(x::AbstractRange{<:Integer}) = x -# And Lookup, which we take the axes from _dimindices_axis(x::Dimension) = _dimindices_axis(val(x)) _dimindices_axis(x::Lookup) = axes(x, 1) _dimindices_axis(x) = throw(ArgumentError("`$x` is not a valid input for `DimIndices`. Use `Dimension`s wrapping `Integer`, `AbstractArange{<:Integer}`, or a `Lookup` (the `axes` will be used)")) -abstract type AbstractDimVals{T,N,D} <: AbstractDimIndices{T,N,D} end +abstract type AbstractDimVals{T,N,D,R,O} <: AbstractDimIndices{T,N,D,R,O} end (::Type{T})(::Nothing; kw...) where T<:AbstractDimVals = throw(ArgumentError("Object has no `dims` method")) (::Type{T})(x; kw...) where T<:AbstractDimVals = T(dims(x); kw...) @@ -150,34 +186,40 @@ that defines a `dims` method can be passed in. - `order`: determines the order of the points, the same as the order of `dims` by default. """ -struct DimPoints{T,N,D<:Tuple{Vararg{Dimension}},O} <: AbstractDimVals{T,N,D} +struct DimPoints{T<:Tuple,N,D,R,O} <: AbstractDimVals{T,N,D,R,O} dims::D - order::O + refdims::R + orderdims::O + function DimPoints(dims::D, refdims::R, orderdims::O) where {D<:MaybeDimTuple,R<:MaybeDimTuple,O<:MaybeDimTuple} + eldims = DD.dims((dims..., refdims...), orderdims) + T = Tuple{map(eltype, eldims)...} + N = length(dims) + new{T,N,D,R,O}(dims, refdims, orderdims) + end end DimPoints(dims::Tuple; order=dims) = DimPoints(dims, order) function DimPoints(dims::Tuple, order::Tuple) - order = map(d -> basetypeof(d)(), order) - T = Tuple{map(eltype, dims)...} - N = length(dims) - dims = N > 0 ? _format(dims) : dims - DimPoints{T,N,typeof(dims),typeof(order)}(dims, order) + dims = length(dims) > 0 ? format(dims) : dims + DimPoints(dims, (), basedims(order)) end -function Base.getindex(dp::DimPoints, i1::Integer, i2::Integer, I::Integer...) +function Base.getindex(A::DimPoints, i1::Integer, i2::Integer, I::Integer...) # Get dim-wrapped point values at i1, I... - pointdims = map(dims(dp), (i1, i2, I...)) do d, i + pointdims = map(dims(A), (i1, i2, I...)) do d, i rebuild(d, d[i]) end # Return the unwrapped point sorted by `order - return map(val, DD.dims(pointdims, dp.order)) + return map(val, DD.dims((pointdims..., _refdims_firsts(A)...), orderdims(A))) end -Base.getindex(di::DimPoints{<:Any,1}, i::Integer) = (dims(di, 1)[i],) - -_format(::Tuple{}) = () -function _format(dims::Tuple) - ax = map(d -> axes(val(d), 1), dims) - return format(dims, ax) +function Base.getindex(A::DimPoints{<:Any,1}, i::Integer) + # Get dim-wrapped point values at i1, I... + d1 = dims(A, 1) + pointdim = rebuild(d1, d1[i]) + # Return the unwrapped point sorted by `order + D = dims((pointdim, _refdims_firsts(A)...), orderdims(A)) + return map(val, D) end +Base.getindex(A::DimPoints{<:Any,0}) = map(val, dims(_refdims_firsts(A), orderdims(A))) """ DimSelectors <: AbstractArray @@ -226,19 +268,49 @@ Using `At` would make sure we only use exact interpolation, while `Contains` with sampling of `Intervals` would make sure that each values is taken only from an Interval that is present in the lookups. """ -struct DimSelectors{T,N,D<:Tuple{Vararg{Dimension}},S<:Tuple} <: AbstractDimVals{T,N,D} +struct DimSelectors{T<:MaybeDimTuple,N,D,R,O,S<:Tuple} <: AbstractDimVals{T,N,D,R,O} dims::D + refdims::R + orderdims::O selectors::S + function DimSelectors(dims::D, refdims::R, orderdims::O, selectors::S) where {D<:Tuple,R<:Tuple,O<:Tuple,S<:Tuple} + eldims = DD.dims((dims..., refdims...), orderdims) + T = _selector_eltype(eldims, selectors) + N = length(dims) + new{T,N,D,R,O,S}(dims, refdims, orderdims, selectors) + end end -function DimSelectors(dims::Tuple{Vararg{Dimension}}; atol=nothing, selectors=At()) +function DimSelectors(dims::MaybeDimTuple; atol=nothing, selectors=At()) s = _format_selectors(dims, selectors, atol) DimSelectors(dims, s) end -function DimSelectors(dims::Tuple{Vararg{Dimension}}, selectors::Tuple) - T = _selector_eltype(dims, selectors) - N = length(dims) - dims = N > 0 ? _format(dims) : dims - DimSelectors{T,N,typeof(dims),typeof(selectors)}(dims, selectors) +function DimSelectors(dims::MaybeDimTuple, selectors::Tuple) + dims = length(dims) > 0 ? format(dims) : dims + orderdims = basedims(dims) + refdims = () + length(dims) == length(selectors) || throw(ArgumentError("`length(dims) must match `length(selectors)`, got $(length(dims)) and $(length(selectors))")) + DimSelectors(dims, refdims, orderdims, selectors) +end + +@propagate_inbounds function Base.getindex(A::DimSelectors, i1::Integer, i2::Integer, I::Integer...) + D = map(dims(A), (i1, i2, I...)) do d, i + rebuild(d, d[i]) + end + return _rebuild_selectors(A, D) +end +@propagate_inbounds function Base.getindex(A::DimSelectors{<:Any,1}, i::Integer) + d1 = dims(A, 1) + d = rebuild(d1, d1[i]) + return _rebuild_selectors(A, (d,)) +end +@propagate_inbounds Base.getindex(A::DimSelectors{<:Any,0}) = + _rebuild_selectors(A, ()) + +function _rebuild_selectors(A, D) + sorteddims = dims((D..., _refdims_firsts(A)...), orderdims(A)) + map(sorteddims, A.selectors) do d, s + rebuild(d, rebuild(s; val=val(d))) + end end _selector_eltype(dims::Tuple, selectors::Tuple) = @@ -268,8 +340,7 @@ end _format_selectors(dims, selectors, map(_ -> atol, dims)) @inline _format_selectors(dims::Tuple, selectors::Tuple, atol::Tuple) = map(_format_selectors, dims, selectors, atol) - -_format_selectors(d::Dimension, T::Type, atol) = _format_selectors(d, T(), atol) +@inline _format_selectors(d::Dimension, T::Type, atol) = _format_selectors(d, T(), atol) @inline _format_selectors(d::Dimension, ::Near, atol) = Near(nothing) @inline _format_selectors(d::Dimension, ::Contains, atol) = Contains(nothing) @inline function _format_selectors(d::Dimension, at::At, atol) @@ -282,68 +353,127 @@ _atol(T::Type{<:AbstractFloat}, atol, ::Nothing) = atol _atol(T::Type{<:AbstractFloat}, ::Nothing, atol) = atol _atol(T::Type{<:AbstractFloat}, ::Nothing, ::Nothing) = eps(T) -@propagate_inbounds function Base.getindex(di::DimSelectors, i1::Integer, i2::Integer, I::Integer...) - map(dims(di), di.selectors, (i1, i2, I...)) do d, s, i - rebuild(d, rebuild(s; val=d[i])) # At selector with the value at i - end -end -@propagate_inbounds function Base.getindex(di::DimSelectors{<:Any,1}, i::Integer) - d = dims(di, 1) - (rebuild(d, rebuild(di.selectors[1]; val=d[i])),) -end - # Deprecated const DimKeys = DimSelectors -struct DimSlices{T,N,D<:Tuple{Vararg{Dimension}},P} <: AbstractDimArrayGenerator{T,N,D} +const SliceDim = Dimension{<:Union{<:AbstractVector{Int},<:AbstractVector{<:AbstractVector{Int}}}} + +""" + DimSlices <: AbstractRebuildableDimArrayGenerator + + DimSlices(x, dims; drop=true) + +A `Base.Slices` like object for returning view slices from a DimArray. + +This is used for `eachslice` on stacks. + +`dims` must be a `Tuple` of `Dimension` holding `AbstractVector{Int}` +or `AbstractVector{<:AbstractVector{Int}}`. + +# Keywords + +- `drop`: whether to drop dimensions from the outer array or keep the + same dimensions as the inner view, but with length 1. +""" +struct DimSlices{T,N,D,R,P,U} <: AbstractRebuildableDimArrayGenerator{T,N,D,R} _data::P dims::D + refdims::R + reduced::U end DimSlices(x; dims, drop=true) = DimSlices(x, dims; drop) -function DimSlices(x, dims; drop=true) - newdims = if length(dims) == 0 - map(d -> rebuild(d, :), DD.dims(x)) +DimSlices(x, dim; kw...) = DimSlices(x, (dim,); kw...) +function DimSlices(x, dims::Tuple; drop::Union{Bool,Nothing}=nothing) + dims = DD.dims(x, dims) + refdims = () + inds = if length(dims) == 0 + map(d -> rebuild(d, :), DD.dims(x)) + else + map(d -> rebuild(d, firstindex(d)), dims) + end + slicedims, reduced = if isnothing(drop) || drop + # We have to handle filling in colons for no dims because passing + # no dims at all is owned by base to mean A[] not A[D1(:), D2(:), D3(:)] + dims, () else - dims - end - inds = map(newdims) do d - rebuild(d, first(d)) - end - # `getindex` returns these views + # Get other dimensions as length 1 + reduced = map(otherdims(x, dims)) do o + reducedims(o) + end + # Re-sort to x dim order + slicedims = DD.dims((reduced..., dims...), DD.dims(x)) + sliceddims, basedims(reduced) + end T = typeof(view(x, inds...)) - N = length(newdims) - D = typeof(newdims) - P = typeof(x) - return DimSlices{T,N,D,P}(x, newdims) + N = length(slicedims) + D = typeof(slicedims) + R = typeof(refdims) + A = typeof(x) + U = typeof(reduced) + return DimSlices{T,N,D,R,A,U}(x, slicedims, refdims, reduced) end -rebuild(ds::A; dims) where {A<:DimSlices{T,N}} where {T,N} = - DimSlices{T,N,typeof(dims),typeof(ds._data)}(ds._data, dims) +function rebuild(ds::DimSlices{T,N}; + dims::D, refdims::R, reduced::U=ds.reduced +) where {T,N,D,R,U} + A = typeof(ds._data) + DimSlices{T,N,D,R,A,U}(ds._data, dims, refdims, reduced) +end +@propagate_inbounds function rebuildsliced(::Function, A::DimSlices, I) + @boundscheck checkbounds(A, I...) + # We use `unafe_view` to force always wrapping as a view, even for ranges + # Then in `_refdims_firsts` we can use `first(parentindices(d))` to get the offset + dims, refdims = slicedims(Base.unsafe_view, A, I) + return rebuild(A; dims, refdims) +end + +# We need to get the vist index from the view, so define this custom for DimSlices +_refdims_firsts(A::DimSlices) = map(d -> rebuild(d, first(parentindices(d))), refdims(A)) function Base.summary(io::IO, A::DimSlices{T,N}) where {T,N} print_ndims(io, size(A)) print(io, string(nameof(typeof(A)), "{$(nameof(T)),$N}")) end -@propagate_inbounds function Base.getindex(ds::DimSlices, i1::Integer, i2::Integer, Is::Integer...) +@propagate_inbounds function Base.getindex(A::DimSlices, i1::Integer, i2::Integer, Is::Integer...) I = (i1, i2, Is...) - @boundscheck checkbounds(ds, I...) - D = map(dims(ds), I) do d, i - rebuild(d, d[i]) + D = map(dims(A), I) do d, i + i1 = if hasdim(A.reduced, d) + @boundscheck checkbounds(d, i) + Colon() + else + eachindex(d)[i] + end + return rebuild(d, i1) end - return view(ds._data, D...) + R = _refdims_firsts(A) + return view(A._data, D..., R...) end # Dispatch to avoid linear indexing in multidimensional DimIndices -@propagate_inbounds function Base.getindex(ds::DimSlices{<:Any,1}, i::Integer) - d = dims(ds, 1) - return view(ds._data, rebuild(d, d[i])) +@propagate_inbounds function Base.getindex(A::DimSlices{<:Any,1}, i::Integer) + d1 = dims(A, 1) + d = if hasdim(A.reduced, d1) + @boundscheck checkbounds(d1, i) + rebuild(d1, :) + else + rebuild(d1, eachindex(d1)[i]) + end + return view(A._data, d, _refdims_firsts(A)...) +end +@propagate_inbounds function Base.getindex(A::DimSlices{<:Any,0}) + R = _refdims_firsts(A) + # Need to manually force the Colons in case there are no dims at all + D = map(otherdims(A._data, R)) do d + rebuild(d, :) + end + view(A._data, D..., R...) end # Extends the dimensions of any `AbstractBasicDimArray` # as if the array assigned into a larger array across all dimensions, # but without the copying. Theres is a cost for linear indexing these objects # as we need to convert to Cartesian. -struct DimExtensionArray{T,N,D<:Tuple{Vararg{Dimension}},R<:Tuple{Vararg{Dimension}},A<:AbstractBasicDimArray{T}} <: AbstractDimArrayGenerator{T,N,D} +struct DimExtensionArray{T,N,D<:MaybeDimTuple,R<:MaybeDimTuple,A<:AbstractBasicDimArray{T}} <: AbstractDimArrayGenerator{T,N,D} _data::A dims::D refdims::R @@ -361,71 +491,41 @@ DimExtensionArray(A::AbstractBasicDimArray, dims::Tuple; refdims=refdims(A)) = name(A::DimExtensionArray) = name(A._data) metadata(A::DimExtensionArray) = metadata(A._data) -# Indexing that returns a new object with the same number of dims -for f in (:getindex, :dotview, :view) - __f = Symbol(:__, f) - T = Union{Colon,AbstractRange} - # For ambiguity - @eval @propagate_inbounds function Base.$f(de::DimExtensionArray{<:Any,1}, i::Integer) - if ndims(parent(de)) == 0 - $f(de._data) - else - $f(de._data, i) - end - end - @eval @propagate_inbounds function Base.$f(di::DimExtensionArray{<:Any,1}, i::Union{AbstractRange,Colon}) - rebuild(di; _data=di.data[i], dims=(dims(di, 1)[i],)) - end - # For ambiguity - @eval @propagate_inbounds function Base.$f(de::DimExtensionArray, i1::$T, i2::$T, Is::$T...) - $__f(de, i1, i2, Is...) - end - @eval @propagate_inbounds function Base.$f(de::DimExtensionArray, i1::StandardIndices, i2::StandardIndices, Is::StandardIndices...) - $__f(de, i1, i2, Is...) - end - @eval @propagate_inbounds function Base.$f( - de::DimensionalData.DimExtensionArray, - i1::Union{AbstractArray{Union{}}, DimensionalData.DimIndices{<:Integer}, DimensionalData.DimSelectors{<:Integer}}, - i2::Union{AbstractArray{Union{}}, DimensionalData.DimIndices{<:Integer}, DimensionalData.DimSelectors{<:Integer}}, - Is::Vararg{Union{AbstractArray{Union{}}, DimensionalData.DimIndices{<:Integer}, DimensionalData.DimSelectors{<:Integer}}} - ) - $__f(de, i1, i2, Is...) - end - @eval Base.@assume_effects :foldable @propagate_inbounds function $__f(de::DimExtensionArray, i1, i2, Is...) - I = (i1, i2, Is...) - newdims, newrefdims = slicedims(dims(de), refdims(de), I) - D = map(rebuild, dims(de), I) - A = de._data - realdims = dims(D, dims(A)) - if all(map(d -> val(d) isa Colon, realdims)) - rebuild(de; dims=newdims, refdims=newrefdims) - else - newrealparent = begin - x = parent(A)[dims2indices(A, realdims)...] - x isa AbstractArray ? x : fill(x) - end - newrealdims = dims(newdims, realdims) - newdata = rebuild(A; data=newrealparent, dims=newrealdims) - rebuild(de; _data=newdata, dims=newdims, refdims=newrefdims) +@propagate_inbounds function rebuildsliced(f::Function, de::DimExtensionArray, I) + newdims, newrefdims = slicedims(dims(de), refdims(de), I) + D = map(rebuild, dims(de), I) + A = de._data + realdims = dims(D, dims(A)) + if all(map(d -> val(d) isa Colon, realdims)) + rebuild(de; dims=newdims, refdims=newrefdims) + else + newrealparent = begin + x = f(parent(A), dims2indices(A, realdims)...) + x isa AbstractArray ? x : fill(x) end - end - @eval @propagate_inbounds function $__f(de::DimExtensionArray{<:Any,1}, i::$T) - newdims, _ = slicedims(dims(de), (i,)) - A = de._data - D = rebuild(only(dims(de)), i) - rebuild(de; dims=newdims, _data=A[D...]) + newrealdims = dims(newdims, realdims) + newdata = rebuild(A; data=newrealparent, dims=newrealdims) + rebuild(de; _data=newdata, dims=newdims, refdims=newrefdims) end end -for f in (:getindex, :dotview) - __f = Symbol(:__, f) - @eval function $__f(de::DimExtensionArray, i1::Int, i2::Int, Is::Int...) - D = map(rebuild, dims(de), (i1, i2, Is...)) - A = de._data - return $f(A, dims(D, dims(A))...) - end - @eval $__f(de::DimExtensionArray{<:Any,1}, i::Int) = $f(de._data, rebuild(dims(de, 1), i)) +@propagate_inbounds function rebuildsliced( + f::Function, de::DimExtensionArray{<:Any,1}, I::Tuple{<:Union{Colon,AbstractRange}} +) + newdims, _ = slicedims(dims(de), I) + A = de._data + D = rebuild(only(dims(de)), only(I)) + rebuild(de; dims=newdims, _data=A[D...]) end +# Integer indexing +function Base.getindex(de::DimExtensionArray, i1::Integer, i2::Integer, Is::Integer...) + D = map(rebuild, dims(de), (i1, i2, Is...)) + A = de._data + return getindex(A, dims(D, dims(A))...) +end +Base.getindex(de::DimExtensionArray{<:Any,1}, i::Integer) = getindex(de._data, rebuild(dims(de, 1), i)) +Base.getindex(de::DimExtensionArray{<:Any,0}) = de._data[] + function mergedims(A::DimExtensionArray, dim_pairs::Pair...) all_dims = dims(A) dims_new = mergedims(all_dims, dim_pairs...) @@ -434,47 +534,4 @@ function mergedims(A::DimExtensionArray, dim_pairs::Pair...) Aperm = PermutedDimsArray(A, dims_perm) data_merged = reshape(parent(Aperm), map(length, dims_new)) return DimArray(data_merged, dims_new) -end - -const SelectorOrStandard = Union{SelectorOrInterval,StandardIndices} -const DimensionIndsArrays = Union{AbstractArray{<:Dimension},AbstractArray{<:DimTuple}} -const DimensionalIndices = Union{DimTuple,DimIndices,DimSelectors,Dimension,DimensionIndsArrays} -const _DimIndicesAmb = Union{AbstractArray{Union{}},DimIndices{<:Integer},DimSelectors{<:Integer}} - -# Indexing that returns a new object with the same number of dims -for f in (:getindex, :dotview, :view) - T = Union{Colon,AbstractVector} - _dim_f = Symbol(:_dim_, f) - @eval begin - @propagate_inbounds function Base.$f(di::AbstractDimArrayGenerator, i1::$T, i2::$T, Is::$T...) - I = (i1, i2, Is...) - newdims, _ = slicedims(dims(di), I) - rebuild(di; dims=newdims) - end - @propagate_inbounds function Base.$f( - di::AbstractDimArrayGenerator, - i1::DimensionalIndices, - i2::DimensionalIndices, - Is::DimensionalIndices... - ) - $_dim_f(di, i1, i2, Is...) - end - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator, i::DimensionalIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator{<:Any,1}, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator{<:Any,1}, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator{<:Any,1}, i::DimensionalIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator{<:Any,1}, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(di::AbstractDimArrayGenerator{<:Any,1}, i::$T) = - rebuild(di; dims=(dims(di, 1)[i],)) - @propagate_inbounds Base.$f(dg::AbstractDimArrayGenerator, i::Integer) = - Base.$f(dg, Tuple(CartesianIndices(dg)[i])...) - end - if f == :view - @eval @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator) = A - else - @eval @propagate_inbounds Base.$f(::AbstractDimArrayGenerator) = () - end -end +end \ No newline at end of file diff --git a/src/groupby.jl b/src/groupby.jl index bfcb63cdd..a73e5a9ca 100644 --- a/src/groupby.jl +++ b/src/groupby.jl @@ -11,24 +11,47 @@ This wrapper allows for specialisations on later broadcast or reducing operations, e.g. for chunk reading with DiskArrays.jl, because we know the data originates from a single array. """ -struct DimGroupByArray{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me} <: AbstractDimArray{T,N,D,A} - data::A +struct DimGroupByArray{T,N,D,R,A,Na,Me} <: AbstractRebuildableDimArrayGenerator{T,N,D,R} + _data::A dims::D refdims::R name::Na metadata::Me - function DimGroupByArray( - data::A, dims::D, refdims::R, name::Na, metadata::Me - ) where {D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me} where {T,N} - checkdims(data, dims) - new{T,N,D,R,A,Na,Me}(data, dims, refdims, name, metadata) - end +end +function DimGroupByArray(_data::A, dims::D, refdims::R, name::Na, metadata::Me) where {D,R,A,Na,Me} + T = typeof(view(_data, firstgroupinds(dims)...)) + N = length(dims) + DimGroupByArray{T,N,D,R,A,Na,Me}(_data, dims, refdims, name, metadata) end function DimGroupByArray(data::AbstractArray, dims::Union{Tuple,NamedTuple}; refdims=(), name=NoName(), metadata=NoMetadata() ) DimGroupByArray(data, format(dims, data), refdims, name, metadata) end + +name(A::DimGroupByArray) = A.name +metadata(A::DimGroupByArray) = A.metadata +refdims(A::DimGroupByArray) = A.refdims + +function groupinds(A::DimGroupByArray, I::Integer...) + # Get the group indices for each dimension + D = groupinds(dims(A), I...) + # And the indices in refdims + R = map(refdims(A)) do d + rebuild(d, only(hiddenparent(d))) + end + # And combine them, dim indexing will fix the order later + return (D..., R...) +end +function groupinds(dims::MaybeDimTuple, I::Integer...) + map(dims, I) do d, i + rebuild(d, hiddenparent(d)[i]) + end +end + +firstgroupinds(dims::MaybeDimTuple) = + groupinds(dims, ntuple(_ -> 1, length(dims))...) + @inline function rebuild( A::DimGroupByArray, data::AbstractArray, dims::Tuple, refdims::Tuple, name, metadata ) @@ -41,6 +64,18 @@ end rebuild(A, data, dims, refdims, name, metadata) # Rebuild as a regular DimArray end +Base.size(A::DimGroupByArray) = map(length, dims(A)) + +@propagate_inbounds function Base.getindex(A::DimGroupByArray, i1::Integer, i2::Integer, Is::Integer...) + I = (i1, i2, Is...) + return view(A._data, groupinds(A, I...)...) +end +# Dispatch to avoid linear indexing in multidimensional DimIndices +@propagate_inbounds Base.getindex(A::DimGroupByArray{<:Any,1}, i::Integer) = + view(A._data, groupinds(A, i)...) +@propagate_inbounds Base.getindex(A::DimGroupByArray{<:Any,0}) = + view(A._data, groupinds(A)...) + function Base.summary(io::IO, A::DimGroupByArray{T,N}) where {T<:AbstractArray{T1,N1},N} where {T1,N1} print_ndims(io, size(A)) print(io, string(nameof(typeof(A)), "{$(nameof(T)){$T1,$N1},$N}")) @@ -83,18 +118,6 @@ function Base.show(io::IO, s::DimSummariser) end Base.alignment(io::IO, s::DimSummariser) = (textwidth(sprint(show, s)), 0) -# An array that doesn't know what it holds, to simplify dispatch -# It can also hold something that is not an AbstractArray itself. -struct OpaqueArray{T,N,P} <: AbstractArray{T,N} - parent::P -end -OpaqueArray(A::P) where P<:AbstractArray{T,N} where {T,N} = OpaqueArray{T,N,P}(A) -OpaqueArray(st::P) where P<:AbstractDimStack{<:Any,T,N} where {T,N} = OpaqueArray{T,N,P}(st) - -Base.size(A::OpaqueArray) = size(A.parent) -Base.getindex(A::OpaqueArray, args...) = Base.getindex(A.parent, args...) -Base.setindex!(A::OpaqueArray, args...) = Base.setindex!(A.parent, args...) - abstract type AbstractBins <: Function end @@ -252,6 +275,7 @@ julia> using DimensionalData, Dates julia> A = rand(X(1:0.1:20), Y(1:20), Ti(DateTime(2000):Day(3):DateTime(2003))); julia> groups = groupby(A, Ti => month) # Group by month +metadata = Dict{Symbol, Any}(:groupby => (:Ti => Dates.month)) ┌ 12-element DimGroupByArray{DimArray{Float64,3},1} ┐ ├───────────────────────────────────────────────────┴───────────── dims ┐ ↓ Ti Sampled{Int64} [1, 2, …, 11, 12] ForwardOrdered Irregular Points @@ -276,15 +300,16 @@ julia> groupmeans = mean.(groups) # Take the monthly mean ┌ 12-element DimArray{Float64, 1} ┐ ├─────────────────────────────────┴─────────────────────────────── dims ┐ ↓ Ti Sampled{Int64} [1, 2, …, 11, 12] ForwardOrdered Irregular Points -├───────────────────────────────────────────────────────────── metadata ┤ - Dict{Symbol, Any} with 1 entry: - :groupby => :Ti=>month └───────────────────────────────────────────────────────────────────────┘ 1 0.500064 2 0.499762 3 0.500083 4 0.499985 - ⋮ + 5 0.500511 + 6 0.500042 + 7 0.500003 + 8 0.500257 + 9 0.500868 10 0.500874 11 0.498704 12 0.50047 @@ -296,24 +321,29 @@ match after application of `mean`. ```jldoctest groupby julia> map(.-, groupby(A, Ti=>month), mean.(groupby(A, Ti=>month), dims=Ti)); +metadata = Dict{Symbol, Any}(:groupby => (:Ti => Dates.month)) +metadata = Dict{Symbol, Any}(:groupby => (:Ti => Dates.month)) ``` +ups Or do something else with Y: ```jldoctest groupby julia> groupmeans = mean.(groupby(A, Ti=>month, Y=>isodd)) +metadata = Dict{Symbol, Any}(:groupby => (:Ti => Dates.month, :Y => isodd)) ┌ 12×2 DimArray{Float64, 2} ┐ ├───────────────────────────┴────────────────────────────────────── dims ┐ ↓ Ti Sampled{Int64} [1, 2, …, 11, 12] ForwardOrdered Irregular Points, → Y Sampled{Bool} [false, true] ForwardOrdered Irregular Points -├────────────────────────────────────────────────────────────── metadata ┤ - Dict{Symbol, Any} with 1 entry: - :groupby => (:Ti=>month, :Y=>isodd) └────────────────────────────────────────────────────────────────────────┘ ↓ → false true 1 0.499594 0.500533 2 0.498145 0.501379 + 3 0.499871 0.500296 + 4 0.500921 0.49905 ⋮ + 8 0.499599 0.500915 + 9 0.500715 0.501021 10 0.501105 0.500644 11 0.498606 0.498801 12 0.501643 0.499298 @@ -329,28 +359,43 @@ function DataAPI.groupby( end return groupby(A, dims) end -function DataAPI.groupby(A::DimArrayOrStack, dimfuncs::DimTuple) +function DataAPI.groupby(A::DimArrayOrStack, dimfuncs::DimTuple; name=:groupby) length(otherdims(dimfuncs, dims(A))) > 0 && Dimensions._extradimserror(otherdims(dimfuncs, dims(A))) # Get groups for each dimension - dim_groups_indices = map(dimfuncs) do d + group_dims = map(dimfuncs) do d _group_indices(dims(A, d), DD.val(d)) end - # Separate lookups dims from indices - group_dims = map(first, dim_groups_indices) - # Get indices for each group wrapped with dims for indexing - indices = map(rebuild, group_dims, map(last, dim_groups_indices)) - - # Hide that the parent is a DimSlices - views = OpaqueArray(DimSlices(A, indices)) # Put the groupby query in metadata - meta = map(d -> name(d) => val(d), dimfuncs) + meta = map(d -> DD.name(d) => val(d), dimfuncs) metadata = Dict{Symbol,Any}(:groupby => length(meta) == 1 ? only(meta) : meta) # Return a DimGroupByArray - return DimGroupByArray(views, format(group_dims, views), (), :groupby, metadata) + return DimGroupByArray(A, map(format, group_dims), (), name, metadata) +end + +# An array that holds another secret array that is indexed along with it. +# We use this to put groupings inside lookups so they are handled by `slicedims` +struct HiddenVector{T,A<:AbstractArray{T,1},H<:AbstractArray{<:Any,1}} <: AbstractArray{T,1} + data::A + hidden::H end +Base.getindex(A::HiddenVector, i::Int) = getindex(A.data, i) +Base.view(A::HiddenVector, i::Int) = + HiddenVector(view(A.data, i...), view(A.hidden, i)) +for f in (:view, :getindex) + @eval Base.$f(A::HiddenVector, i::Union{Colon,AbstractArray}) = + HiddenVector(Base.$f(A.data, i), Base.$f(A.hidden, i)) +end +Base.parent(A::HiddenVector) = A.hidden +Base.size(A::HiddenVector) = size(A.data) + +hiddenparent(A::HiddenVector) = A.hidden +hiddenparent(A::Dimension) = hiddenparent(parent(A)) +hiddenparent(A::AbstractArray) = hiddenparent(parent(A)) +hiddenparent(A::Array) = error("HiddenVector not found") + # Define the groups and find all the indices for values that fall in them function _group_indices(dim::Dimension, f::Base.Callable; labels=nothing) orig_lookup = lookup(dim) @@ -363,9 +408,12 @@ function _group_indices(dim::Dimension, f::Base.Callable; labels=nothing) push!(inds, i) end ps = sort!(collect(pairs(indices_dict))) - group_dim = format(rebuild(dim, _maybe_label(labels, first.(ps)))) + lookupvals = _maybe_label(labels, first.(ps)) indices = last.(ps) - return group_dim, indices + # We combine lookup values and indices into on array + lookup_and_indices = HiddenVector(lookupvals, indices) + # And wrap and format them as a dimension + return format(rebuild(dim, lookup_and_indices)) end function _group_indices(dim::Dimension, group_lookup::Lookup; labels=nothing) orig_lookup = lookup(dim) @@ -374,13 +422,15 @@ function _group_indices(dim::Dimension, group_lookup::Lookup; labels=nothing) n = selectindices(group_lookup, Contains(v); err=Lookups._False()) isnothing(n) || push!(indices[n], i) end - group_dim = if isnothing(labels) - rebuild(dim, group_lookup) + lookupvals = if isnothing(labels) + group_lookup else - label_lookup = _maybe_label(labels, group_lookup) - rebuild(dim, label_lookup) + _maybe_label(labels, group_lookup) end - return group_dim, indices + # We combine lookup values and indices into on array + lookup_and_indices = HiddenVector(lookupvals, indices) + # And wrap and format them as a dimension + return format(rebuild(dim, lookup_and_indices)) end function _group_indices(dim::Dimension, bins::AbstractBins; labels=bins.labels) l = lookup(dim) diff --git a/src/opaque.jl b/src/opaque.jl new file mode 100644 index 000000000..87c234f30 --- /dev/null +++ b/src/opaque.jl @@ -0,0 +1,19 @@ +# OpaqueArray is an array that doesn't know what it holds, to simplify dispatch. +# One key property is that `parent(A::OpaqueArray)` returns the `OpaqueArray` `A` +# not the array it holds. +# +# It is often used here to hide dimensional arrays that may be generated lazily, +# To force them to act like simple Arrays without dimensional properties. +# +# OpaqueArray can also hold something that is not an AbstractArray itself. +struct OpaqueArray{T,N,P} <: AbstractArray{T,N} + parent::P +end +OpaqueArray(A::P) where P<:AbstractArray{T,N} where {T,N} = OpaqueArray{T,N,P}(A) +OpaqueArray(st::P) where P<:AbstractDimStack{<:Any,T,N} where {T,N} = OpaqueArray{T,N,P}(st) + +Base.size(A::OpaqueArray) = size(A.parent) +Base.getindex(A::OpaqueArray, I::Union{StandardIndices,Not{<:StandardIndices}}...) = + Base.getindex(A.parent, I...) +Base.setindex!(A::OpaqueArray, I::Union{StandardIndices,Not{<:StandardIndices}}...) = + Base.setindex!(A.parent, I...) \ No newline at end of file diff --git a/src/stack/methods.jl b/src/stack/methods.jl index 7e684ba84..5c9659972 100644 --- a/src/stack/methods.jl +++ b/src/stack/methods.jl @@ -68,8 +68,8 @@ julia> size(slices) (4, 2) julia> map(dims, axes(slices)) -(↓ Z Base.OneTo(4), -→ X Base.OneTo(2)) +(↓ Z, +→ X Categorical{Symbol} [:x1, :x2] ForwardOrdered) julia> first(slices) ┌ 3×5 DimStack ┐ diff --git a/src/tables.jl b/src/tables.jl index d158b2e1c..ef0c560d2 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -1,3 +1,22 @@ +# This lets use switch array of NamedTupleto NamedTuple of Array +struct LayerArray{K,T,N,A} <: AbstractArray{T,N} + data::A +end +function LayerArray{K}(a::A) where {A<:AbstractArray{<:NamedTuple,N}} where {K,N} + T = typeof(a[1][K]) + LayerArray{K,T,N,A}(a) +end +Base.parent(A::LayerArray) = parent(A.data) +Base.size(A::LayerArray) = size(parent(A)) +@propagate_inbounds Base.getindex(A::LayerArray{K}, I::Integer...) where K = + getproperty(getindex(parent(A), I...), K) + +function layerarrays(A::AbstractDimArray{<:NamedTuple{K}}) where K + map(K) do k + rebuild(A; data=LayerArray{k}(A)) + end |> NamedTuple{K} +end + """ AbstractDimTable <: Tables.AbstractColumns @@ -31,7 +50,7 @@ function _colnames(A::AbstractDimArray) n = Symbol(name(A)) == Symbol("") ? :value : Symbol(name(A)) (map(name, dims(A))..., n) end -_colnames(A::AbstractDimVector{T}) where T<:NamedTuple = +_colnames(A::AbstractDimArray{T}) where T<:NamedTuple = (map(name, dims(A))..., _colnames(T)...) _colnames(::Type{<:NamedTuple{Keys}}) where Keys = Keys @@ -59,39 +78,48 @@ To get dimension columns, you can index with `Dimension` (`X()`) or # Keywords - `mergedims`: Combine two or more dimensions into a new dimension. -- `layersfrom`: Treat a dimension of an `AbstractDimArray` as layers of an `AbstractDimStack`. +- `preservedims`: Preserve one or more dimensions from flattening into the table. + `DimArray`s of views with these dimensions will be present in the layer column, + rather than scalar values. +- `layersfrom`: Treat a dimension of an `AbstractDimArray` as layers of an `AbstractDimStack` + by specifying a dimension to use as layers. # Example -```jldoctest +Here we generate a GeoInterface.jl compatible table with `:geometry` +column made of `(X, Y)` points, and data columns from `:band` slices. + +```julia julia> using DimensionalData, Tables -julia> a = DimArray(ones(16, 16, 3), (X, Y, Dim{:band})) -┌ 16×16×3 DimArray{Float64, 3} ┐ -├──────────────────────── dims ┤ - ↓ X, → Y, ↗ band -└──────────────────────────────┘ -[:, :, 1] - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - -julia> +julia> A = ones(X(4), Y(3), Dim{:band}('a':'d'); name=:data); +julia> DimTable(A; layersfrom=:band, mergedims=(X, Y)=>:geometry) +DimTable with 12 rows, 5 columns, and schema: + :geometry Tuple{Int64, Int64} + :band_a Float64 + :band_b Float64 + :band_c Float64 + :band_d Float64 ``` + +And here bands for each X/Y position are kept as vectors, using `preservedims`. +This may be useful if e.g. bands are color components of spectral images. + +```julia +julia> DimTable(A; preservedims=:band) +DimTable with 12 rows, 3 columns, and schema: + :X … Int64 + :Y Int64 + :data DimVector{Float64, Tuple{Dim{:band, Categorical{Char, StepRange{Char, Int64}, ForwardOrdered, NoMetadata}}}, Tuple{X{NoLookup{UnitRange{Int64}}}, Y{NoLookup{UnitRange{Int64}}}}, SubArray{Float64, 1, Array{Float64, 3}, Tuple{Int64, Int64, Slice{OneTo{Int64}}}, true}, Symbol, NoMetadata} (alias for DimArray{Float64, 1, Tuple{Dim{:band, DimensionalData.Dimensions.Lookups.Categorical{Char, StepRange{Char, Int64}, DimensionalData.Dimensions.Lookups.ForwardOrdered, DimensionalData.Dimensions.Lookups.NoMetadata}}}, Tuple{X{DimensionalData.Dimensions.Lookups.NoLookup{UnitRange{Int64}}}, Y{DimensionalData.Dimensions.Lookups.NoLookup{UnitRange{Int64}}}}, SubArray{Float64, 1, Array{Float64, 3}, Tuple{Int64, Int64, Base.Slice{Base.OneTo{Int64}}}, true}, Symbol, DimensionalData.Dimensions.Lookups.NoMetadata}) + +```julia +julia> DimTable(A) +DimTable with 48 rows, 4 columns, and schema: + :X Int64 + :Y Int64 + :band Char + :data Float64 """ struct DimTable{Mode} <: AbstractDimTable parent::Union{AbstractDimArray,AbstractDimStack} @@ -99,11 +127,18 @@ struct DimTable{Mode} <: AbstractDimTable dimcolumns::Vector{AbstractVector} dimarraycolumns::Vector end - -function DimTable(s::AbstractDimStack; +function DimTable(s::AbstractDimStack; mergedims=nothing, + preservedims=nothing, ) s = isnothing(mergedims) ? s : DD.mergedims(s, mergedims) + s = if isnothing(preservedims) + s + else + maplayers(s) do A + _maybe_presevedims(A, preservedims) + end + end dimcolumns = collect(_dimcolumns(s)) dimarraycolumns = if hassamedims(s) map(vec, layers(s)) @@ -113,16 +148,24 @@ function DimTable(s::AbstractDimStack; keys = collect(_colnames(s)) return DimTable{Columns}(s, keys, dimcolumns, dimarraycolumns) end -function DimTable(As::Vararg{AbstractDimArray}; - layernames=nothing, - mergedims=nothing, +function DimTable(As::AbstractVector{<:AbstractDimArray}; + layernames=nothing, + mergedims=nothing, + preservedims=nothing, ) # Check that dims are compatible - comparedims(As...) + comparedims(As) # Construct Layer Names layernames = isnothing(layernames) ? uniquekeys(As) : layernames # Construct dimension and array columns with DimExtensionArray - As = isnothing(mergedims) ? As : map(x -> DD.mergedims(x, mergedims), As) + As = isnothing(mergedims) ? As : map(x -> DimensionalData.mergedims(x, mergedims), As) + As = if isnothing(preservedims) + As + else + map(As) do A + _maybe_presevedims(A, preservedims) + end + end dims_ = dims(first(As)) dimcolumns = collect(_dimcolumns(dims_)) dimnames = collect(map(name, dims_)) @@ -132,9 +175,10 @@ function DimTable(As::Vararg{AbstractDimArray}; # Return DimTable return DimTable{Columns}(first(As), colnames, dimcolumns, dimarraycolumns) end -function DimTable(A::AbstractDimArray; - layersfrom=nothing, - mergedims=nothing, +function DimTable(A::AbstractDimArray; + layersfrom=nothing, + mergedims=nothing, + preservedims=nothing, ) if !isnothing(layersfrom) && any(hasdim(A, layersfrom)) d = dims(A, layersfrom) @@ -145,21 +189,36 @@ function DimTable(A::AbstractDimArray; else Symbol.(("$(name(d))_$i" for i in 1:nlayers)) end - return DimTable(layers...; layernames, mergedims) + return DimTable(layers; layernames, mergedims, preservedims) else - A = isnothing(mergedims) ? A : DD.mergedims(A, mergedims) - dimcolumns = collect(_dimcolumns(A)) - colnames = collect(_colnames(A)) - if (ndims(A) == 1) && (eltype(A) <: NamedTuple) - dimarrayrows = parent(A) - return DimTable{Rows}(A, colnames, dimcolumns, dimarrayrows) + A1 = isnothing(mergedims) ? A : DD.mergedims(A, mergedims) + if eltype(A1) <: NamedTuple + if isnothing(preservedims) + dimcolumns = collect(_dimcolumns(A1)) + colnames = collect(_colnames(A1)) + dimarrayrows = vec(parent(A1)) + return DimTable{Rows}(A1, colnames, dimcolumns, dimarrayrows) + else + las = layerarrays(A1) + layernames = collect(keys(las)) + return DimTable(collect(las); layernames, mergedims, preservedims) + end else - dimarraycolumns = [vec(parent(A))] - return DimTable{Columns}(A, colnames, dimcolumns, dimarraycolumns) + A2 = _maybe_presevedims(A1, preservedims) + dimcolumns = collect(_dimcolumns(A2)) + colnames = collect(_colnames(A2)) + dimarraycolumns = [vec(parent(A2))] + return DimTable{Columns}(A2, colnames, dimcolumns, dimarraycolumns) end end end +_maybe_presevedims(A, preservedims::Nothing) = A +function _maybe_presevedims(A, preservedims) + S = DimSlices(A; dims=otherdims(A, preservedims)) + rebuild(A; data=OpaqueArray(S), dims=dims(S)) +end + _dimcolumns(x) = map(d -> _dimcolumn(x, d), dims(x)) function _dimcolumn(x, d::Dimension) lookupvals = parent(lookup(d)) diff --git a/test/dimindices.jl b/test/dimindices.jl index ed7a59bc3..7a7311feb 100644 --- a/test/dimindices.jl +++ b/test/dimindices.jl @@ -38,7 +38,7 @@ A = zeros(X(4.0:7.0), Y(10.0:12.0)) @test @inferred size(A1[di[2:4, 1:2], Ti=1]) == (3, 2) @test @inferred A1[di] isa DimArray{Float64,3} @test @inferred A1[X=1][di] isa DimArray{Float64,2} - @test @inferred A1[X=1, Y=1][di] isa DimArray{Float64,1} + @test @inferred A1[X=1, Y=1] isa DimArray{Float64,1} # Indexing with no matching dims still returns a DimArray @test @inferred view(A1, X=1, Y=1, Ti=1)[di] == fill(0.0) @@ -207,11 +207,16 @@ end @testset "DimSlices" begin A = DimArray(((1:4) * (1:3)'), (X(4.0:7.0), Y(10.0:12.0)); name=:foo) - axisdims = map(dims(A, (X,))) do d - rebuild(d, axes(lookup(d), 1)) - end - ds = DimensionalData.DimSlices(A; dims=axisdims) + ds = DimensionalData.DimSlices(A; dims=X) + @test ds == ds[X=:] + # Works just like Slices + @test sum(ds) == sum(eachslice(A; dims=X)) @test ds == ds[X=:] + @test ds[X=At(7.0)] == [4, 8, 12] # Works just like Slices @test sum(ds) == sum(eachslice(A; dims=X)) + @test axes(ds) == axes(eachslice(A; dims=X)) + ds0 = DimensionalData.DimSlices(A; dims=()); + @test sum(ds0) == sum(eachslice(parent(A); dims=())) + @test axes(ds0) == axes(eachslice(parent(A); dims=())) end diff --git a/test/groupby.jl b/test/groupby.jl index cd19274bd..4e6c53f8c 100644 --- a/test/groupby.jl +++ b/test/groupby.jl @@ -39,7 +39,8 @@ end end |> permutedims gb_sum = sum.(groupby(A, Ti=>month, X => >(1.5))) @test dims(gb_sum, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())) - @test typeof(dims(gb_sum, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))) + @test typeof(dims(gb_sum, X)) == + X{Sampled{Bool, DimensionalData.HiddenVector{Bool, BitVector, Vector{Vector{Int64}}}, ForwardOrdered, Irregular{Tuple{Nothing, Nothing}}, Points, NoMetadata}} @test gb_sum == manualsums combined_sum = combine(sum, groupby(A, Ti=>month, X => >(1.5))) @test collect(combined_sum) == manualsums @@ -51,7 +52,6 @@ end end |> permutedims gb_sum_st = sum.(groupby(st, Ti=>month, X => >(1.5))) @test dims(gb_sum_st, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())) - @test typeof(dims(gb_sum_st, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))) @test gb_sum_st == manualsums_st combined_sum_st = combine(sum, groupby(st, Ti=>month, X => >(1.5))) @test collect(combined_sum_st) == manualsums_st @@ -87,7 +87,7 @@ end B = rand(X(xs; sampling=Intervals(Start())), Ti(dates; sampling=Intervals(Start()))) gb = groupby(A, B) @test size(gb) === size(B) === size(mean.(gb)) - @test dims(gb) === dims(B) === dims(mean.(gb)) + @test parent(lookup(gb, X)) == parent(lookup(B, X)) == parent(lookup(mean.(gb), X)) manualmeans = mapreduce(hcat, intervals(dates)) do d map(intervals(xs)) do x mean(A[X=x, Ti=d]) diff --git a/test/indexing.jl b/test/indexing.jl index b2b9d147b..cf18a682a 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -208,6 +208,7 @@ end @test b == da[1:2:end] == da[Begin:2:End] v = @inferred da[1, :] + @test v isa DimVector @test @inferred v[1:2] isa DimArray @test @inferred v[rand(Bool, length(v))] isa DimArray b = v[[!iseven(i) for i in 1:length(v)]] @@ -500,6 +501,7 @@ end @testset "mixed dimensions" begin a = [[1 2 3; 4 5 6];;; [11 12 13; 14 15 16];;;] da = DimArray(a, (X(143.0:2:145.0), Y(-38.0:-36.0), Ti(100:100:200)); name=:test) + da[1, End(), Begin(), 1] da[Ti=1, DimIndices(da[Ti=1])] da[DimIndices(da[Ti=1]), Ti(2)] da[DimIndices(da[Ti=1])[:], Ti(2)] diff --git a/test/runtests.jl b/test/runtests.jl index 25330be32..18962daf4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,7 +19,6 @@ end @time @safetestset "merged" begin include("merged.jl") end @time @safetestset "DimUnitRange" begin include("dimunitrange.jl") end @time @safetestset "format" begin include("format.jl") end - @time @safetestset "array" begin include("array.jl") end @time @safetestset "stack" begin include("stack.jl") end @time @safetestset "indexing" begin include("indexing.jl") end diff --git a/test/stack.jl b/test/stack.jl index ee6d2eac0..1d7d36242 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -3,7 +3,7 @@ using DimensionalData, Test, LinearAlgebra, Statistics, ConstructionBase, Random using DimensionalData: data using DimensionalData: Sampled, Categorical, AutoLookup, NoLookup, Transformed, Regular, Irregular, Points, Intervals, Start, Center, End, - Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, layers, basedims, layerdims + Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, basedims, layerdims, layers, metadata A = [1.0 2.0 3.0; 4.0 5.0 6.0] diff --git a/test/tables.jl b/test/tables.jl index f5ea708db..26466fb4f 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -1,9 +1,10 @@ +using DataFrames +using Dates using DimensionalData -using Test -using Tables using IteratorInterfaceExtensions using TableTraits -using DataFrames +using Tables +using Test using DimensionalData.Lookups, DimensionalData.Dimensions using DimensionalData: DimTable, DimExtensionArray @@ -149,21 +150,67 @@ end @testset "DimTable mergelayers" begin a = DimStack([DimArray(rand(32, 32, 3), (X,Y,Ti)) for _ in 1:3]) - b = DimArray(rand(32, 32, 3), (X,Y,Dim{:band})) - t1 = DimTable(a, mergedims=(:X,:Y)=>:geometry) - t2 = DimTable(a, mergedims=(:X,:Y,:Z)=>:geometry) # Merge missing dimension - t3 = DimTable(a, mergedims=(X,:Y,Ti)=>:dimensions) # Mix symbols and dimensions - t4 = DimTable(b, mergedims=(:X,:Y)=>:geometry) # Test DimArray + b = DimArray(rand(32, 32, 3), (X, Y, Dim{:band})) + t1 = DimTable(a, mergedims=(:X, :Y) => :geometry) + t2 = DimTable(a, mergedims=(:X, :Y, :Z) => :geometry) # Merge missing dimension + t3 = DimTable(a, mergedims=(X, :Y, Ti) => :dimensions) # Mix symbols and dimensions + t4 = DimTable(b, mergedims=(:X, :Y) => :geometry) # Test DimArray @test Tables.columnnames(t1) == (:Ti, :geometry, :layer1, :layer2, :layer3) @test Tables.columnnames(t2) == (:Ti, :geometry, :layer1, :layer2, :layer3) @test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3) @test Tables.columnnames(t4) == (:band, :geometry, :value) end +@testset "DimTable preservedims" begin + x, y, t = X(1.0:32.0), Y(1.0:10.0), Ti(DateTime.([2001, 2002, 2003])) + st = DimStack([rand(x, y, t; name) for name in [:a, :b, :c]]) + A = rand(x, y, Dim{:band}(1:3); name=:vals) + t1 = DimTable(st, preservedims=(X, Y)) + a3 = Tables.getcolumn(t1, :a)[3] + @test Tables.columnnames(t1) == propertynames(t1) == (:Ti, :a, :b, :c) + @test a3 == st.a[Ti=3] + @test dims(a3) == dims(st, (X, Y)) + t2 = DimTable(A; preservedims=:band) + val10 = Tables.getcolumn(t2, :vals)[10] + @test Tables.columnnames(t2) == propertynames(t2) == (:X, :Y, :vals) + @test val10 == A[X(10), Y(1)] + @test dims(val10) == dims(A, (:band,)) + @testset "preservedims with mergedims" begin + t3 = DimTable(A; mergedims=(X, Y) => :geometry, preservedims=:band) + @test only(dims(t3)) isa Dim{:geometry} + @test Tables.getcolumn(t2, :vals)[1] isa DimArray + end +end + @testset "DimTable NamedTuple" begin - da = DimArray([(; a=1.0f0i, b=2.0i) for i in 1:10], X) - t = DimTable(da) - s = Tables.schema(t) - @test s.names == (:X, :a, :b) - @test s.types == (Int, Float32, Float64) + @testset "Vector of NamedTuple" begin + da = DimArray([(; a=1.0f0i, b=2.0i) for i in 1:10], X) + t = DimTable(da) + s = Tables.schema(t) + @test s.names == (:X, :a, :b) + @test s.types == (Int, Float32, Float64) + @test all(t.a .=== 1.0f0:10.0f0) + @test all(t.b .=== 2.0:2.0:20.0) + end + + @testset "Matrix of NamedTuple" begin + da = [(; a=1.0f0x*y, b=2.0x*y) for x in X(1:10), y in Y(1:5)] + t = DimTable(da); + s = Tables.schema(t) + @test s.names == (:X, :Y, :a, :b) + @test s.types == (Int, Int, Float32, Float64) + @test all(t.a .=== reduce(vcat, [1.0f0y:y:10.0f0y for y in 1:5])) + @test all(t.b .=== reduce(vcat, [2.0y:2.0y:20.0y for y in 1:5])) + end + @testset "Matrix of NamedTuple with preservedims" begin + da = [(; a=1.0f0x*y, b=2.0x*y) for x in X(1:10), y in Y(1:5)] + t = DimTable(da; preservedims=X); + s = Tables.schema(t) + @test s.names == (:Y, :a, :b) + @test s.types[1] <: Int + @test s.types[2] <: DimVector + @test s.types[2] <: DimVector + @test all(t.a .== [[1.0f0x*y for x in X(1:10)] for y in Y(1:5)]) + @test all(t.b .== [[2.0x*y for x in X(1:10)] for y in Y(1:5)]) + end end From db6311c1f69736a0392f1dd82f906c6fc0416a62 Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Fri, 9 May 2025 14:01:33 +0200 Subject: [PATCH 05/23] Remove deprecations (#1009) --- src/DimensionalData.jl | 3 +-- src/Lookups/utils.jl | 2 -- src/utils.jl | 2 -- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 009084c4e..87bf6384f 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -71,8 +71,7 @@ export AbstractDimTable, DimTable export AbstractDimTree, DimTree, prune -export DimIndices, DimSelectors, DimPoints, #= deprecated =# DimKeys - +export DimIndices, DimSelectors, DimPoints # getter methods export dims, refdims, metadata, name, lookup, bounds, val, layers diff --git a/src/Lookups/utils.jl b/src/Lookups/utils.jl index 93e54fe9b..9b1d3f366 100644 --- a/src/Lookups/utils.jl +++ b/src/Lookups/utils.jl @@ -111,8 +111,6 @@ end _order(A) = first(A) <= last(A) ? ForwardOrdered() : ReverseOrdered() _order(A::AbstractArray{<:IntervalSets.Interval}) = first(A).left <= last(A).left ? ForwardOrdered() : ReverseOrdered() -@deprecate maybeshiftlocus maybeshiftlocus -@deprecate shiftlocus shiftlocus # Remove objects of type T from a Base.@assume_effects :foldable _remove(::Type{T}, x, xs...) where T = (x, _remove(T, xs...)...) diff --git a/src/utils.jl b/src/utils.jl index 4b40ef859..0a336220f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -181,8 +181,6 @@ function _broadcast_dims_inner!(f, dest, As, od) return dest end -@deprecate dimwise broadcast_dims -@deprecate dimwise! broadcast_dims! # Get a tuple of unique keys for DimArrays. If they have the same # name we call them layerI. From 8d19346cd96e342fb1807d3b282c468e141350e9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 28 Jun 2025 17:10:57 +1000 Subject: [PATCH 06/23] typo --- src/dimindices.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dimindices.jl b/src/dimindices.jl index 6a627b76b..f6a332a30 100644 --- a/src/dimindices.jl +++ b/src/dimindices.jl @@ -146,7 +146,7 @@ function Base.getindex(A::DimIndices, i1::Integer, i2::Integer, I::Integer...) end # Dispatch to avoid linear indexing in multidimensional DimIndices function Base.getindex(A::DimIndices{<:Any,1}, i::Integer) - d = dims(di, 1)::Dimension + d = dims(A, 1)::Dimension di = rebuild(d, d[i]) return dims((di, _refdims_firsts(A)...), orderdims(A)) end From b0f5df1430ef7f48e480bdc5f3a7dab9844f4adf Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 28 Jun 2025 17:30:20 +1000 Subject: [PATCH 07/23] add missing reference docs --- docs/src/api/reference.md | 2 ++ src/dimindices.jl | 24 ++++++++++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/docs/src/api/reference.md b/docs/src/api/reference.md index adfcb39da..d11667e91 100644 --- a/docs/src/api/reference.md +++ b/docs/src/api/reference.md @@ -55,6 +55,7 @@ prune DimIndices DimSelectors DimPoints +DimSlices ``` ## Tables.jl/TableTraits.jl interface @@ -139,6 +140,7 @@ DimensionalData.NoName DimensionalData.DimArrayInterface DimensionalData.DimStackInterface DimensionalData.rebuild_from_arrays +DimensionalData.rebuildsliced DimensionalData.show_main DimensionalData.show_after DimensionalData.refdims_title diff --git a/src/dimindices.jl b/src/dimindices.jl index f6a332a30..38191c19c 100644 --- a/src/dimindices.jl +++ b/src/dimindices.jl @@ -1,9 +1,7 @@ -""" - AbstractDimArrayGenerator <: AbstractBasicDimArray +# AbstractDimArrayGenerator <: AbstractBasicDimArray -Abstract supertype for all AbstractBasicDimArrays that -generate their `data` on demand during `getindex`. -""" +# Abstract supertype for all AbstractBasicDimArrays that +# generate their `data` on demand during `getindex`. abstract type AbstractDimArrayGenerator{T,N,D} <: AbstractBasicDimArray{T,N,D} end dims(dg::AbstractDimArrayGenerator)::Tuple{Vararg{Dimension}} = dg.dims @@ -30,16 +28,14 @@ end rebuild(A; dims=dims(dims(A), Tuple(perm))) end -""" - AbstractRebuildableDimArrayGenerator <: AbstractDimArrayGenerator +# AbstractRebuildableDimArrayGenerator <: AbstractDimArrayGenerator -Abstract supertype for all AbstractDimArrayGenerator that -can be rebuilt when subsetted with `view` or `getindex`. +# Abstract supertype for all AbstractDimArrayGenerator that +# can be rebuilt when subsetted with `view` or `getindex`. -These arrays must have `dims` and `refdims` fields that defined the data -They do not need to define `rebuildsliced` methods as this is defined -as simply doing `slicedims` on `dims` and `refdims` and rebuilding. -""" +# These arrays must have `dims` and `refdims` fields that defined the data +# They do not need to define `rebuildsliced` methods as this is defined +# as simply doing `slicedims` on `dims` and `refdims` and rebuilding. abstract type AbstractRebuildableDimArrayGenerator{T,N,D,R<:MaybeDimTuple} <: AbstractDimArrayGenerator{T,N,D} end refdims(A::AbstractRebuildableDimArrayGenerator) = A.refdims @@ -366,7 +362,7 @@ const SliceDim = Dimension{<:Union{<:AbstractVector{Int},<:AbstractVector{<:Abst A `Base.Slices` like object for returning view slices from a DimArray. -This is used for `eachslice` on stacks. +This is used for `eachslice` on `AbstractDimStack`. `dims` must be a `Tuple` of `Dimension` holding `AbstractVector{Int}` or `AbstractVector{<:AbstractVector{Int}}`. From eb31c6131c2029bfc254aaf004e6fbb6b263fb55 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 28 Jun 2025 19:11:29 +1000 Subject: [PATCH 08/23] fix DimSlices doc --- docs/src/api/reference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/api/reference.md b/docs/src/api/reference.md index d11667e91..16d905813 100644 --- a/docs/src/api/reference.md +++ b/docs/src/api/reference.md @@ -55,7 +55,7 @@ prune DimIndices DimSelectors DimPoints -DimSlices +DimensionalData.DimSlices ``` ## Tables.jl/TableTraits.jl interface From 886a2b295688b328a0e4f22219deb8892e855636 Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Sun, 13 Jul 2025 08:41:18 +0200 Subject: [PATCH 09/23] Breaking: skipmissing on a dimstack (#1041) * iterate values where no layer is missing * add tests * add skipmissing to reference --- docs/src/api/reference.md | 9 +++++---- src/stack/stack.jl | 29 ++++++++++++++++++++++++++++- test/stack.jl | 16 ++++++++++++++++ 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/docs/src/api/reference.md b/docs/src/api/reference.md index 16d905813..4da257bd3 100644 --- a/docs/src/api/reference.md +++ b/docs/src/api/reference.md @@ -65,7 +65,7 @@ DimensionalData.AbstractDimTable DimTable ``` -# Group by methods +## Group by methods For transforming DimensionalData objects: @@ -82,7 +82,7 @@ months hours ``` -# Utility methods +## Utility methods For transforming DimensionalData objects: @@ -98,7 +98,7 @@ unmergedims reorder ``` -# Global lookup strictness settings +## Global lookup strictness settings Control how strict DimensionalData when comparing [`Lookup`](@ref)s before doing broadcasts and matrix multipications. @@ -115,12 +115,13 @@ DimensionalData.strict_matmul DimensionalData.strict_matmul! ``` -Base methods +## Base methods ```@docs Base.cat Base.copy! Base.eachslice +Base.skipmissing ``` Most base methods work as expected, using `Dimension` wherever a `dims` diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 2558c2e00..0e68a6395 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -546,4 +546,31 @@ function DimStack(st::AbstractDimStack; DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end -layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) \ No newline at end of file +layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) + +### Skipmissing on DimStacks +""" + skipmissing(itr::AbstractDimStack) + +Returns an iterable over the elements in a `AbstractDimStack` object, skipping any values if any of the layers are missing. +""" +Base.skipmissing + +# Specialized dispatch of iterate to skip values if any layer is missing. +function Base.iterate(itr::Base.SkipMissing{<:AbstractDimStack}, state...) + y = iterate(itr.x, state...) + y === nothing && return nothing + item, state = y + while any(map(ismissing, item)) # instead of ismissing(item) + y = iterate(itr.x, state) + y === nothing && return nothing + item, state = y + end + item, state +end + +Base.eltype(::Type{Base.SkipMissing{T}}) where {T<:AbstractDimStack{<:Any, NT}} where NT = + _nonmissing_nt(NT) + +@generated _nonmissing_nt(NT::Type{<:NamedTuple{K,V}}) where {K,V} = + NamedTuple{K, Tuple{map(Base.nonmissingtype, V.parameters)...}} \ No newline at end of file diff --git a/test/stack.jl b/test/stack.jl index f200a49cb..9bf9bde40 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -396,4 +396,20 @@ end @test ds[Z = 1] == (a = da1, b = da1) @test ds[Z = 1:2] == ds +end + +@testset "skipmissing" begin + skips = skipmissing(s) + skips2 = skipmissing(mixed) + @test eltype(skips) === @NamedTuple{one::Float64, two::Float32, three::Int} + @test eltype(skips2) === @NamedTuple{one::Float64, two::Float32, extradim::Float64} + @test collect(skips) == vec(s) + @test collect(skips2) == vec(mixed) + + da5 = DimArray([missing, 1], x) + s2 = DimStack((one = da1, two = da5)) + @test eltype(skipmissing(s2)) === @NamedTuple{one::Float64, two::Int} + cs2 = collect(skipmissing(s2)) + @test all(getindex.(cs2, :two) .== 1) + @test getindex.(cs2, :one) == da1[X=2] end \ No newline at end of file From cc4928e777063ada00f5fc8210f20411f793364b Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Sun, 10 Aug 2025 17:45:47 -0600 Subject: [PATCH 10/23] Breaking: Materialize `DimArray` or `DimStack` From a Table (#739) * Table Materializer Methods * Made col Optional for DimArray * Apply suggestions from code review Co-authored-by: Rafael Schouten * Handle coordinates with different loci * replaced At() with Contains() in _coords_to_ords * Added optional selectors and public methods for table materializer * Updated table constructors for DimArray and DimStack * Updated DimArray and DimStack docs to include table materializer methods * Table materializer test cases * export table materializer methods * Added Random to tables.jl test cases * Update src/array/array.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Removed exports * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Replaced selector type with instance. * Table materializer can now infer dimensions from the coordinates. * Update src/stack/stack.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Update src/array/array.jl Co-authored-by: Rafael Schouten * Update src/table_ops.jl Co-authored-by: Rafael Schouten * Added support for guessing the dimension ordering and span for Dates and DateTimes * Replaced LinRange with StepRangeLen in _build_dim * Added Tables.istable check to DimArray constructor * Update src/array/array.jl * merge materialize2 * fix scuffed merge * filter instead of indexing in test for clarity * fix DimSlices doc * fix ambiguities * bugfixes * do checks and call Tables.columns before constructing stack from table * test dimensions are automatically detected when constructing dimstack * comments not docstrings for internals * check for columnaccess if dims are passed * add type argument to dimarray_from_table * allow passing name to DimStack * add a section to the documentation * use Tables.columnnames instead of keys * make DimArray work with all tables that are abstractarrays * do not treat dimvectors as tables * simplify get_column --------- Co-authored-by: Rafael Schouten Co-authored-by: Tiem van der Deure --- docs/src/tables.md | 72 +++++++++++- src/DimensionalData.jl | 1 + src/Dimensions/dimension.jl | 1 + src/array/array.jl | 59 +++++++++- src/stack/stack.jl | 64 ++++++++++- src/table_ops.jl | 211 ++++++++++++++++++++++++++++++++++++ src/tree/tree.jl | 9 +- test/stack.jl | 7 +- test/tables.jl | 102 +++++++++++++++++ 9 files changed, 512 insertions(+), 14 deletions(-) create mode 100644 src/table_ops.jl diff --git a/docs/src/tables.md b/docs/src/tables.md index d872ccce8..498f242c7 100644 --- a/docs/src/tables.md +++ b/docs/src/tables.md @@ -2,12 +2,22 @@ [Tables.jl](https://github.com/JuliaData/Tables.jl) provides an ecosystem-wide interface to tabular data in Julia, ensuring interoperability with [DataFrames.jl](https://dataframes.juliadata.org/stable/), [CSV.jl](https://csv.juliadata.org/stable/), and hundreds of other packages that implement the standard. +## Dimensional data are tables DimensionalData.jl implements the Tables.jl interface for `AbstractDimArray` and `AbstractDimStack`. `DimStack` layers are unrolled so they are all the same size, and dimensions loop to match the length of the largest layer. Columns are given the [`name`](@ref) of the array or stack layer, and the result of `DD.name(dimension)` for `Dimension` columns. -Looping of dimensions and stack layers is done _lazily_, -and does not allocate unless collected. +Looping of dimensions and stack layers is done _lazily_, and does not allocate unless collected. + +## Materializing tables to DimArray or DimStack +`DimArray` and `DimStack` have fallback methods to materialize any `Tables.jl`-compatible table. + +By default, it will treat columns such as X, Y, Z, and Band as dimensions, and other columns as data. +Pass a `name` keyword argument to determine which column(s) are used. + +You have full control over which columns are dimensions - and what those dimensions look like exactly. If you pass a `Tuple` of `Symbol` or dimension types (e.g. `X`) as the second argument, those columns are treated as dimensions. Passing a `Tuple` of dimensions preserves these dimensions - with values matched to the corresponding columns. + +Materializing tables will worked even if the table is not ordered, and can handle missing values. ## Example @@ -89,3 +99,61 @@ using CSV CSV.write("dimstack.csv", st) readlines("dimstack.csv") ```` + +## Converting a DataFrame to a DimArray or DimStack + +The Dataframe we use will have 5 columns: X, Y, category, data1, and data2 + +````@ansi dataframe +df = DataFrame(st) +```` + +::: tabs + +== Create a `DimArray` + +Converting this DataFrame to a DimArray without other arguments will read the `category` columns as data and ignore data1 and data2: + +````@ansi dataframe +DimArray(df) +```` + +Specify dimenion names to ensure these get treated as dimensions. Now data1 is read in instead. +````@ansi dataframe +DimArray(df, (X,Y,:category)) +```` + +You can also pass in the actual dimensions. +````@ansi dataframe +DimArray(df, dims(st)) +```` + +Pass in a name argument to read in data2 instead. +````@ansi dataframe +DimArray(df, dims(st); name = :data2) +```` + +== Create a `DimStack` + +Converting the DataFrame to a `DimStack` will by default read category, data1, and data2 as layers +````@ansi dataframe +DimStack(df) +```` + + +Specify dimenion names to ensure these get treated as dimensions. Now data1 and data2 are layers. +````@ansi dataframe +DimStack(df, (X,Y,:category)) +```` + +You can also pass in the actual dimensions. +````@ansi dataframe +DimStack(df, dims(st)) +```` + +Pass in a tuple of column names to control which columns are read. +````@ansi dataframe +DimStack(df, dims(st); name = (:data2,)) +```` + +::: \ No newline at end of file diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 87bf6384f..c4f1ea4e0 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -92,6 +92,7 @@ const DD = DimensionalData # Common include("interface.jl") include("name.jl") +include("table_ops.jl") # Arrays include("array/array.jl") diff --git a/src/Dimensions/dimension.jl b/src/Dimensions/dimension.jl index f5e2b2866..61d08d815 100644 --- a/src/Dimensions/dimension.jl +++ b/src/Dimensions/dimension.jl @@ -178,6 +178,7 @@ lookup(dim::Union{DimType,Val{<:Dimension}}) = NoLookup() name(dim::Dimension) = name(typeof(dim)) name(dim::Val{D}) where D = name(D) name(dim::Type{D}) where D<:Dimension = nameof(D) +name(s::Symbol) = s label(x) = string(name(x)) diff --git a/src/array/array.jl b/src/array/array.jl index 38fbe991e..f160834ef 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -144,7 +144,8 @@ function Base.NamedTuple(A1::AbstractDimArray, As::AbstractDimArray...) end # undef constructor for all AbstractDimArray -(::Type{A})(x::UndefInitializer, dims::Dimension...; kw...) where {A<:AbstractDimArray{<:Any}} = A(x, dims; kw...) +(::Type{A})(x::UndefInitializer, dims::Dimension...; kw...) where {A<:AbstractDimArray{T}} where T = + A(x, dims; kw...) function (::Type{A})(x::UndefInitializer, dims::DimTuple; kw...) where {A<:AbstractDimArray{T}} where T basetypeof(A)(Array{T}(undef, size(dims)), dims; kw...) end @@ -410,13 +411,14 @@ moves dimensions to reference dimension `refdims` after reducing operations ## Arguments -- `data`: An `AbstractArray`. +- `data`: An `AbstractArray` or a table with coordinate columns corresponding to `dims`. - `gen`: A generator expression. Where source iterators are `Dimension`s the dim args or kw is not needed. - `dims`: A `Tuple` of `Dimension` - `name`: A string name for the array. Shows in plots and tables. - `refdims`: refence dimensions. Usually set programmatically to track past slices and reductions of dimension for labelling and reconstruction. - `metadata`: `Dict` or `Metadata` object, or `NoMetadata()` +- `selector`: The coordinate selector type to use when materializing from a table. Indexing can be done with all regular indices, or with [`Dimension`](@ref)s and/or [`Selector`](@ref)s. @@ -512,6 +514,57 @@ function DimArray(A::AbstractBasicDimArray; newdata = collect(data) DimArray(newdata, format(dims, newdata); refdims, name, metadata) end +# Tables +# Write a single column from a table with one or more coordinate columns to a DimArray +function DimArray(table, dims; kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`obj` must be an `AbstractArray` or satisfy the `Tables.jl` interface.")) + table = Tables.columnaccess(table) ? table : Tables.columns(table) + dimarray_from_table(DimArray, table, guess_dims(table, dims); kw...) +end +# Same as above, but guess dimension names from scratch +function DimArray(table; kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + table = Tables.columnaccess(table) ? table : Tables.columns(table) + # Use default dimension + return dimarray_from_table(DimArray, table, guess_dims(table; kw...); kw...) +end +# Special-case for AbstractVectors - these might be tables +function DimArray(data::AbstractVector, dims::Tuple; + refdims=(), name=NoName(), metadata=NoMetadata(), kw... +) + if !(data isa AbstractBasicDimArray) && Tables.istable(data) && + all(map(d -> Dimensions.name(d) in Tables.schema(data).names, dims)) + table = Tables.columns(data) + dims = guess_dims(table, dims; kw...) + return dimarray_from_table(DimArray, table, dims; refdims, name, metadata, kw...) + else + return DimArray(data, format(dims, data), refdims, name, metadata) + end +end + +function dimarray_from_table(::Type{T}, table, dims; + name=NoName(), + selector=nothing, + precision=6, + missingval=missing, + kw... +) where T <: AbstractDimArray + # Determine row indices based on coordinate values + indices = coords_to_indices(table, dims; selector, atol=10.0^-precision) + + # Extract the data column correspondong to `name` + col = name == NoName() ? data_col_names(table, dims) |> first : Symbol(name) + data = Tables.getcolumn(table, col) + + # Restore array data + array = restore_array(data, indices, dims, missingval) + + # Return DimArray + return T(array, dims, name=col; kw...) +end + """ DimArray(f::Function, dim::Dimension; [name]) @@ -520,7 +573,7 @@ Apply function `f` across the values of the dimension `dim` the given dimension. Optionally provide a name for the result. """ function DimArray(f::Function, dim::Dimension; name=Symbol(nameof(f), "(", name(dim), ")")) - DimArray(f.(val(dim)), (dim,); name) + DimArray(f.(val(dim)), (dim,); name) end DimArray(itr::Base.Generator; kwargs...) = rebuild(collect(itr); kwargs...) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 0e68a6395..655317fa4 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -30,6 +30,11 @@ const AbstractVectorDimStack = AbstractDimStack{K,T,1} where {K,T} const AbstractMatrixDimStack = AbstractDimStack{K,T,2} where {K,T} (::Type{T})(st::AbstractDimStack; kw...) where T<:AbstractDimArray = + dimarray_from_dimstack(T, st; kw...) +# For ambiguity +DimArray(st::AbstractDimStack; kw...) = dimarray_from_dimstack(DimArray, st; kw...) + +dimarray_from_dimstack(T, st; kw...) = T([st[D] for D in DimIndices(st)]; dims=dims(st), metadata=metadata(st), kw...) data(s::AbstractDimStack) = getfield(s, :data) @@ -101,7 +106,7 @@ and an existing stack. # Keywords -Keywords are simply the fields of the stack object: +Keywords are simply the common fields of an `AbstractDimStack` object: - `data` - `dims` @@ -109,6 +114,8 @@ Keywords are simply the fields of the stack object: - `metadata` - `layerdims` - `layermetadata` + +There is no promise that these keywords will be used in all cases. """ function rebuild_from_arrays( s::AbstractDimStack{Keys}, das::Tuple{Vararg{AbstractBasicDimArray}}; kw... @@ -340,6 +347,7 @@ end """ DimStack <: AbstractDimStack + DimStack(table, [dims]; kw...) DimStack(data::AbstractDimArray...; kw...) DimStack(data::Union{AbstractArray,Tuple,NamedTuple}, [dims::DimTuple]; kw...) DimStack(data::AbstractDimArray; layersfrom, kw...) @@ -512,7 +520,7 @@ function DimStack(das::NamedTuple{<:Any,<:Tuple{Vararg{AbstractDimArray}}}; end DimStack(data::Union{Tuple,AbstractArray,NamedTuple}, dim::Dimension; name=uniquekeys(data), kw...) = DimStack(NamedTuple{Tuple(name)}(data), (dim,); kw...) -DimStack(data::Union{Tuple,AbstractArray}, dims::Tuple; name=uniquekeys(data), kw...) = +DimStack(data::Union{Tuple,AbstractArray{<:AbstractArray}}, dims::Tuple; name=uniquekeys(data), kw...) = DimStack(NamedTuple{Tuple(name)}(data), dims; kw...) function DimStack(data::NamedTuple{K}, dims::Tuple; refdims=(), @@ -520,6 +528,9 @@ function DimStack(data::NamedTuple{K}, dims::Tuple; layermetadata=nothing, layerdims=nothing ) where K + if length(data) > 0 && Tables.istable(data) && all(d -> name(d) in keys(data), dims) + return dimstack_from_table(DimStack, data, dims; refdims, metadata) + end layerdims = if isnothing(layerdims) all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() map(_ -> basedims(dims), data) @@ -546,6 +557,53 @@ function DimStack(st::AbstractDimStack; DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end +# Write each column from a table with one or more coordinate columns to a layer in a DimStack +function DimStack(data, dims::Tuple; kw... +) + if Tables.istable(data) + table = Tables.columns(data) + all(map(d -> Dimensions.name(d) in Tables.columnnames(table), dims)) || throw(ArgumentError( + "All dimensions in dims must be in the table columns." + )) + dims = guess_dims(table, dims; kw...) + return dimstack_from_table(DimStack, table, dims; kw...) + else + throw(ArgumentError( + """data must be a table with coordinate columns, an AbstractArray, + or a Tuple or NamedTuple of AbstractArrays""" + )) + + end +end +function DimStack(table; kw...) + if Tables.istable(table) + table = Tables.columns(table) + dimstack_from_table(DimStack, table, guess_dims(table; kw...); kw...) + else + throw(ArgumentError( + """data must be a table with coordinate columns, an AbstractArray, + or a Tuple or NamedTuple of AbstractArrays""" + )) end +end + +function dimstack_from_table(::Type{T}, table, dims; + name=nothing, + selector=nothing, + precision=6, + missingval=missing, + kw... +) where T<:AbstractDimStack + table = Tables.columnaccess(table) ? table : Tables.columns(table) + data_cols = isnothing(name) ? data_col_names(table, dims) : name + dims = guess_dims(table, dims; precision) + indices = coords_to_indices(table, dims; selector) + layers = map(data_cols) do col + d = Tables.getcolumn(table, col) + restore_array(d, indices, dims, missingval) + end + return T(layers, dims; name = data_cols, kw...) +end + layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) ### Skipmissing on DimStacks @@ -573,4 +631,4 @@ Base.eltype(::Type{Base.SkipMissing{T}}) where {T<:AbstractDimStack{<:Any, NT}} _nonmissing_nt(NT) @generated _nonmissing_nt(NT::Type{<:NamedTuple{K,V}}) where {K,V} = - NamedTuple{K, Tuple{map(Base.nonmissingtype, V.parameters)...}} \ No newline at end of file + NamedTuple{K, Tuple{map(Base.nonmissingtype, V.parameters)...}} diff --git a/src/table_ops.jl b/src/table_ops.jl new file mode 100644 index 000000000..38ad85dc9 --- /dev/null +++ b/src/table_ops.jl @@ -0,0 +1,211 @@ +#= +Restore a dimensional array from its tabular representation. + +- `data`: An `AbstractVector` containing the flat data to be written to a `DimArray`. +- `indices`: An `AbstractVector` containing the dimensional indices corresponding to each element in `data`. +- `dims`: The dimensions of the destination `DimArray`. +- `missingval`: The value to write for missing elements in `data`. + +# Returns + +An `Array` containing the ordered valued in `data` with the size specified by `dims`. +=# +function restore_array(data::AbstractVector, indices::AbstractVector, dims::Tuple, missingval) + # Allocate Destination Array + dst = DimArray{eltype(data)}(undef, dims) + for (idx, d) in zip(indices, data) + dst[idx] = d + end + + if length(indices) !== length(dst) + # Handle Missing Rows + _missingval = _cast_missing(data, missingval) + missing_rows = trues(dims) + for idx in indices # looping is faster than broadcasting + missing_rows[idx] = false + end + return ifelse.(missing_rows, _missingval, dst) + end + return dst +end + +#= + coords_to_indices(table, dims; [selector, atol]) + +Return the dimensional index of each row in `table` based on its associated coordinates. +Dimension columns are determined from the name of each dimension in `dims`. + +# Arguments + +- a table +- `dims`: A `Tuple` of `Dimension` corresponding to the source/destination array. + +# Keywords + +- `selector`: The selector type to use. This defaults to `Near()` for orderd, sampled dimensions + and `At()` for all other dimensions. +- `atol`: The absolute tolerance to use with `At()`. This defaults to `1e-6`. +=# +coords_to_indices(table, dims::Tuple; selector=nothing, atol=1e-6) = + _coords_to_indices(table, dims, selector, atol) + +#= + guess_dims(table; kw...) + guess_dims(table, dims; precision=6) + +Guesses the dimensions of an array based on the provided tabular representation. + +# Arguments + +- a table +The dimensions will be inferred from the corresponding coordinate collumns in the table. + +- `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default +to any available dimensions in the set `(:X, :Y, :Z, :Ti, :Band)`. Dimensions can be given as either a singular +value or as a `Pair` with both the dimensions and corresponding order. The order will be inferred from the data +when none is given. This should work for sorted coordinates, but will not be sufficient when the table's rows are +out of order. + +# Keywords + +- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). + +# Returns +A tuple containing the inferred dimensions from the table. +=# +guess_dims(table; kw...) = guess_dims(table, _dim_col_names(table); kw...) +guess_dims(table, dims::Tuple; precision=6, kw...) = + map(dim -> _guess_dims(get_column(table, name(dim)), dim, precision), dims) + +#Retrieve the coordinate data stored in the column specified by `dim`. +get_column(table, x) = Tables.getcolumn(table, name(x)) + + +#Return the names of all columns that don't match the dimensions given by `dims`. +function data_col_names(table, dims::Tuple) + dim_cols = name(dims) + return filter(x -> !(x in dim_cols), Tables.columnnames(table)) +end + +_guess_dims(coords::AbstractVector, dim::Type{<:Dimension}, args...) = + _guess_dims(coords, name(dim), args...) +_guess_dims(coords::AbstractVector, dim::Pair, args...) = + _guess_dims(coords, first(dim), last(dim), args...) +function _guess_dims(coords::AbstractVector, dim::Symbol, ::Type{T}, precision::Int) where {T <: Order} + return _guess_dims(coords, dim, T(), precision) +end +function _guess_dims(coords::AbstractVector, dim::Symbol, precision::Int) + dim_vals = _dim_vals(coords, dim, precision) + return format(Dim{dim}(dim_vals)) +end +function _guess_dims(coords::AbstractVector, dim::Type{<:Dimension}, precision::Int) + dim_vals = _dim_vals(coords, dim, precision) + return format(dim(dim_vals)) +end +function _guess_dims(coords::AbstractVector, dim::Dimension, precision::Int) + newl = _guess_dims(coords, lookup(dim), precision) + return format(rebuild(dim, newl)) +end +function _guess_dims(coords::AbstractVector, l::Lookup, precision::Int) + dim_vals = _dim_vals(coords, l, precision) + return rebuild(l; data = dim_vals) +end +# lookup(dim) could just return a vector - then we keep those values +_guess_dims(coords::AbstractVector, l::AbstractVector, precision::Int) = l + +# Extract coordinate columns from table +function _dim_cols(table, dims::Tuple) + dim_cols = name(dims) + return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) +end + +# Extract dimension column names from the given table +_dim_col_names(table) = filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)) +_dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), name(dims)) + +_coords_to_indices(table, dims::Tuple, sel, atol) = + _coords_to_indices(_dim_cols(table, dims), dims, sel, atol) +# Determine the ordinality of a set of coordinates +function _coords_to_indices(coords::Tuple, dims::Tuple, sel, atol) + map(zip(coords...)) do coords + map(coords, dims) do c, d + _coords_to_indices(c, d, sel, atol) + end + end +end +_coords_to_indices(coords::NamedTuple, dims::Tuple, sel, atol) = _coords_to_indices(map(x -> coords[x], name(dims)), dims, sel, atol) +# implement some default selectors +_coords_to_indices(coord, dim::Dimension, sel::Nothing, atol) = + _coords_to_indices(coord, dim, _default_selector(dim), atol) + +# get indices of the coordinates +_coords_to_indices(coord, dim::Dimension, sel::Selector, atol) = + return rebuild(dim, selectindices(dim, rebuild(sel, coord))) +# get indices of the coordinates +_coords_to_indices(coord, dim::Dimension, sel::At, atol) = + return rebuild(dim, selectindices(dim, rebuild(sel; val = coord, atol))) + +function _default_selector(dim::Dimension{<:AbstractSampled}) + if sampling(dim) isa Intervals + Contains() + elseif isordered(dim) && !(eltype(dim) <: Integer) + Near() + else + At() + end +end +_default_selector(dim::Dimension{<:AbstractCategorical}) = At() +_default_selector(dim::Dimension) = Near() + +# Extract dimension value from the given vector of coordinates +function _dim_vals(coords::AbstractVector, dim, precision::Int) + vals = _unique_vals(coords, precision) + return _maybe_as_range(vals, precision) +end +function _dim_vals(coords::AbstractVector, l::Lookup, precision::Int) + val(l) isa AutoValues || return val(l) # do we want to have some kind of check that the values match? + vals = _unique_vals(coords, precision) + _maybe_order!(vals, order(l)) + return _maybe_as_range(vals, precision) +end +_dim_vals(coords::AbstractVector, l::AbstractVector, precision::Int) = l # same comment as above? + +_maybe_order!(A::AbstractVector, ::Order) = A +_maybe_order!(A::AbstractVector, ::ForwardOrdered) = sort!(A) +_maybe_order!(A::AbstractVector, ::ReverseOrdered) = sort!(A, rev=true) + +# Extract all unique coordinates from the given vector +_unique_vals(coords::AbstractVector, ::Int) = unique(coords) +_unique_vals(coords::AbstractVector{<:Real}, precision::Int) = round.(coords, digits=precision) |> unique +_unique_vals(coords::AbstractVector{<:Integer}, ::Int) = unique(coords) + +# Estimate the span between consecutive coordinates +_maybe_as_range(A::AbstractVector, precision) = A # for non-numeric types +function _maybe_as_range(A::AbstractVector{<:Real}, precision::Int) + A_r = range(first(A), last(A), length(A)) + atol = 10.0^(-precision) + return all(i -> isapprox(A_r[i], A[i]; atol), eachindex(A)) ? A_r : A +end +function _maybe_as_range(A::AbstractVector{<:Integer}, precision::Int) + idx1, idxrest = Iterators.peel(eachindex(A)) + step = A[idx1+1] - A[idx1] + for idx in idxrest + A[idx] - A[idx-1] == step || return A + end + return first(A):step:last(A) +end +function _maybe_as_range(A::AbstractVector{<:Dates.AbstractTime}, precision::Int) + steps = (@view A[2:end]) .- (@view A[1:end-1]) + span = argmin(abs, steps) + isregular = all(isinteger, round.(steps ./ span, digits=precision)) + return isregular ? range(first(A), last(A), length(A)) : A +end + +_cast_missing(::AbstractArray, missingval::Missing) = missing +function _cast_missing(::AbstractArray{T}, missingval) where {T} + try + return convert(T, missingval) + catch e + return missingval + end +end diff --git a/src/tree/tree.jl b/src/tree/tree.jl index 33c6cdc03..61c9ae6c5 100644 --- a/src/tree/tree.jl +++ b/src/tree/tree.jl @@ -58,7 +58,12 @@ layerdims(dt::AbstractDimTree, key::Symbol) = layerdims(dt)[key] layers(dt::AbstractDimTree) = DataDict((pn => dt[pn] for pn in keys(dt))) # DimStack constructors on DimTree -function (::Type{T})(dt::AbstractDimTree; keep=nothing) where {T<:AbstractDimStack} +# If this method has ambiguities, define it for the DimStack type and call dimstack_from_tree +(::Type{T})(dt::AbstractDimTree; kw...) where {T<:AbstractDimStack} = + dimstack_from_tree(T, dt; kw...) +DimStack(dt::AbstractDimTree; kw...) = dimstack_from_tree(DimStack, dt; kw...) + +function dimstack_from_tree(T, dt; keep=nothing) if isnothing(keep) pruned = DD.prune(dt; keep) T(pruned[Tuple(keys(pruned))]) @@ -430,4 +435,4 @@ end layerdims(layers::AbstractArray{<:Pair}) = TupleDict(map(((k, v),) -> k => basedims(v), layers)) layermetadata(layers::AbstractArray{<:Pair}) = - DataDict(map(((k, v),) -> k => metadata(v), layers)) \ No newline at end of file + DataDict(map(((k, v),) -> k => metadata(v), layers)) diff --git a/test/stack.jl b/test/stack.jl index 9bf9bde40..0a1ed5045 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -34,9 +34,8 @@ mixed = DimStack(da1, da2, da4) DimStack((da1[:, 1], da2[:, 1], da3[:, 1]); name=(:one, :two, :three)) == DimStack(da1[:, 1], da2[:, 1], da3[:, 1]; name=(:one, :two, :three)) == DimStack(parent.([da1[:, 1], da2[:, 1], da3[:, 1]]), dimz[1]; name=(:one, :two, :three)) == s[:, 1] - @test dims(DimStack()) == dims(DimStack(NamedTuple())) == - dims(DimStack(())) == dims(DimStack(DimArray[])) == - dims(DimStack((), ())) == dims(DimStack(Array[], ())) == () + @test dims(DimStack()) == dims(DimStack(())) == dims(DimStack(DimArray[])) == + dims(DimStack(NamedTuple())) == dims(DimStack((), ())) == dims(DimStack(Array[], ())) == () @test DimStack([A, 2A, 3A], (Z(), Ti()); name=(:one, :two, :three), layerdims=[(Z(), Ti()), (Z(), Ti()), (Z(), Ti())]) == DimStack((A, 2A, 3A), (Z(), Ti()); name=(:one, :two, :three), layerdims=(one=(Z(), Ti()), two=(Z(), Ti()), three=(Z(), Ti()))) == DimStack((one=A, two=2A, three=3A), (Z(), Ti()); layerdims=[(Z(), Ti()), (Z(), Ti()), (Z(), Ti())]) == @@ -412,4 +411,4 @@ end cs2 = collect(skipmissing(s2)) @test all(getindex.(cs2, :two) .== 1) @test getindex.(cs2, :one) == da1[X=2] -end \ No newline at end of file +end diff --git a/test/tables.jl b/test/tables.jl index 26466fb4f..3b7a47994 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -2,6 +2,7 @@ using DataFrames using Dates using DimensionalData using IteratorInterfaceExtensions +using Random using TableTraits using Tables using Test @@ -161,6 +162,107 @@ end @test Tables.columnnames(t4) == (:band, :geometry, :value) end +@testset "Materialize from table" begin + a = DimArray(rand(UInt8, 100, 100), (X(100:-1:1), Y(-250:5:249))) + b = DimArray(rand(Float32, 100, 100), (X(100:-1:1), Y(-250:5:249))) + c = DimArray(rand(Float64, 100, 100), (X(100:-1:1), Y(-250:5:249))) + ds = DimStack((a=a, b=b, c=c)) + t = DataFrame(ds) + t1 = Random.shuffle(t) + t2 = filter(r -> r.Y != -250, t) + t3 = copy(t1) + t3.X .+= rand(nrow(t1)) .* 1e-7 # add some random noise to check if precision works + + tabletypes = (Tables.rowtable, Tables.columntable, DataFrame) + + for type in tabletypes + t = type(t) + t1 = type(t1) + t2 = type(t2) + t3 = type(t3) + @testset "All dimensions passed (using $type)" begin + # Restore DimArray from shuffled table + for table = (t1, t3) + @test all(DimArray(table, dims(ds)) .== a) + @test all(DimArray(table, dims(ds), name="a") .== a) + @test all(DimArray(table, dims(ds), name="b") .== b) + @test all(DimArray(table, dims(ds), name="c") .== c) + end + + # Restore DimArray from table with missing rows + @test all(DimArray(t2, dims(ds), name="a")[Y(2:100)] .== a[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="b")[Y(2:100)] .== b[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="c")[Y(2:100)] .== c[Y(2:100)]) + @test DimArray(t2, dims(ds), name="a")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="b")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="c")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="a")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="b")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="c")[Y(2:100)] .|> ismissing .|> (!) |> all + + # Restore DimStack from shuffled table + restored_stack = DimStack(t1, dims(ds)) + @test all(restored_stack.a .== ds.a) + @test all(restored_stack.b .== ds.b) + @test all(restored_stack.c .== ds.c) + + # Restore DimStack from table with missing rows + restored_stack = DimStack(t2, dims(ds)) + @test all(restored_stack.a[Y(2:100)] .== ds.a[Y(2:100)]) + @test all(restored_stack.b[Y(2:100)] .== ds.b[Y(2:100)]) + @test all(restored_stack.c[Y(2:100)] .== ds.c[Y(2:100)]) + @test restored_stack.a[Y(1)] .|> ismissing |> all + @test restored_stack.b[Y(1)] .|> ismissing |> all + @test restored_stack.c[Y(1)] .|> ismissing |> all + @test restored_stack.a[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.b[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.c[Y(2:100)] .|> ismissing .|> (!) |> all + end + + @testset "Dimensions automatically detected (using $type)" begin + da3 = DimArray(t) + # Awkward test, see https://github.com/rafaqz/DimensionalData.jl/issues/953 + # If Dim{:X} == X then we can just test for equality + @test lookup(dims(da3, :X)) == lookup(dims(a, X)) + @test lookup(dims(da3, :Y)) == lookup(dims(a, Y)) + @test parent(da3) == parent(a) + + for table in (t1, t3) + da = DimArray(table) + @test parent(da[X = At(100:-1:1), Y = At(-250:5:249)]) == parent(a) + ds_ = DimStack(table) + @test keys(ds_) == (:a, :b, :c) + @test parent(ds_.a[X = At(100:-1:1), Y = At(-250:5:249)]) == parent(a) + + end + end + + @testset "Dimensions partially specified (using $type)" begin + for table in (t1, t3) + # setting the order returns ordered dimensions + da = DimArray(table, (X(Sampled(order = ReverseOrdered())), Y(Sampled(order=ForwardOrdered())))) + @test dims(da, X) == dims(a, X) + @test dims(da, Y) == dims(a, Y) + end + # passing in dimension types works + @test DimArray(t, (X, Y)) == a + @test parent(DimArray(t, (:X, Y))) == parent(a) + @test parent(DimArray(t, (:X, :Y))) == parent(a) + # passing in dimensions works for unconventional dimension names + A = rand(dimz, name = :a) + table = type(A) + @test DimArray(table, (X, Y(Sampled(span = Irregular())), :test)) == A + # Specifying dimensions types works even if it's illogical. + dat = DimArray(t, (X(Sampled(span = Irregular(), order = Unordered())), Y(Categorical()))) + x, y = dims(dat) + @test !isregular(x) + @test !isordered(x) + @test iscategorical(y) + @test isordered(y) # this is automatically detected + end + end +end + @testset "DimTable preservedims" begin x, y, t = X(1.0:32.0), Y(1.0:10.0), Ti(DateTime.([2001, 2002, 2003])) st = DimStack([rand(x, y, t; name) for name in [:a, :b, :c]]) From 541ff30af809c975236371ab369adaaccfa4199f Mon Sep 17 00:00:00 2001 From: Raf Schouten Date: Fri, 15 Aug 2025 15:01:57 +1000 Subject: [PATCH 11/23] start a CHANGELOG --- CHANGELOG.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..6c19e86c2 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,44 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.30.0] + +### Added + +- `combine` method added to complement `groupby` ([#903]) +- `DimSlices` object is now documented interface. +- Tables can now be converted to `AbstractDimArray` or `AbstractDimStack`, + guessing the lookup values from dimension columns, thanks to major efforts by [@JoshuaBillson] and [@tiemvanderdeure] in ([#739]) +- `presevedims` can be used to preseve dimensions of array during conversion to + tables, so that values may themselves be `AbstractDimArray` ([#917]) +- `rebuildsliced` documented and added to the developer interface +- Broadcasts improved for all `AbstractBasicDimArray`, like `DimSelectors`. + +### Fixed + +- `Begin`/`End` bugfixed for use as types + +### Changed + +- `similar` for `AbstractDimStack` has modified behavior more consistent with + `similar` for `AbstractArray` ([#903]). +- Internal const `CategoricalEltypes` now includes `DataType` - sot that a + lookup of types is considered `Categorical` by default ([#876]). +- `skipmissing` on an `AbstractDimStack` now skips any `missing` values in any fiels, rather than `missing`, which can't + actually occur ([#1041]). +- Minor changes in coversion to and from tables in some cases. + + + + +[#739]: https://github.com/rafaqz/DimensionalData.jl/issues/739 +[#876]: https://github.com/rafaqz/DimensionalData.jl/issues/876 +[#903]: https://github.com/rafaqz/DimensionalData.jl/issues/903 +[#917]: https://github.com/rafaqz/DimensionalData.jl/issues/917 +[#1041]: https://github.com/rafaqz/DimensionalData.jl/issues/1041 +[@JoshuaBillson]: https://github.com/JoshuaBillson +[@tiemvanderdeure]: https://github.com/tiemvanderdeure From 288bf74a7bb219284b09676fb1ef7dd3d3ab7686 Mon Sep 17 00:00:00 2001 From: Raf Schouten Date: Fri, 15 Aug 2025 15:02:29 +1000 Subject: [PATCH 12/23] bump minor version to 0.30.0 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e58bb45cc..b5e958054 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DimensionalData" uuid = "0703355e-b756-11e9-17c0-8b28908087d0" authors = ["Rafael Schouten "] -version = "0.29.22" +version = "0.30.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From 648635c8c023578dae42d16472e5bd634f47913f Mon Sep 17 00:00:00 2001 From: Raf Schouten Date: Fri, 15 Aug 2025 15:06:42 +1000 Subject: [PATCH 13/23] document Changelog.jl usage --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c19e86c2..004ff61db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +Links are updated with Changelog.jl using the command: + +```julia +Changelog.generate( + Changelog.CommonMark(), # output type + "CHANGELOG.md"; # input and output file + repo = "rafaqz/DimensionalData.jl", # default repository for links +) +``` + ## [0.30.0] ### Added From 8069385d13a4afae82c11eec3acf4db525416b3b Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Sun, 17 Aug 2025 03:35:38 +0200 Subject: [PATCH 14/23] use rebuild for similar of dimarray with new axes (#1082) * add _similar dispatch for abstractdimarray * update tests * Update src/array/array.jl Co-authored-by: Rafael Schouten * Update src/array/array.jl Co-authored-by: Rafael Schouten --------- Co-authored-by: Rafael Schouten --- src/array/array.jl | 8 ++++++++ test/array.jl | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 19bec7b82..ba04107d5 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -254,6 +254,14 @@ for s1 in (:(Dimensions.DimUnitRange), :MaybeDimUnitRange) end end end + +function _similar(A::AbstractDimArray, T::Type, shape::Tuple; + dims=dims(shape), refdims=(), name=_noname(A), metadata=metadata(A), kw... +) + data = similar(parent(A), T, map(_parent_range, shape)) + shape isa Tuple{Vararg{Dimensions.DimUnitRange}} || return data + rebuild(A; data, dims, refdims, name, metadata, kw...) +end function _similar(A::AbstractArray, T::Type, shape::Tuple; kw...) data = similar(parent(A), T, map(_parent_range, shape)) shape isa Tuple{Vararg{Dimensions.DimUnitRange}} || return data diff --git a/test/array.jl b/test/array.jl index 1722db99c..597c888d8 100644 --- a/test/array.jl +++ b/test/array.jl @@ -225,21 +225,21 @@ end @test size(da_all) == size(da) @test dims(da_all) === dims(da) @test refdims(da_all) == () - @test metadata(da_all) == NoMetadata() + @test metadata(da_all) == metadata(da) da_first = similar(da, Missing, (axes(da, 1),)) @test eltype(da_first) === Missing @test size(da_first) == (size(da, 1),) @test dims(da_first) === (dims(da, 1),) @test refdims(da_first) == () - @test metadata(da_first) == NoMetadata() + @test metadata(da_first) == metadata(da) da_last = similar(da, Nothing, (axes(da, 2),)) @test eltype(da_last) === Nothing @test size(da_last) == (size(da, 2),) @test dims(da_last) === (dims(da, 2),) @test refdims(da_last) == () - @test metadata(da_last) == NoMetadata() + @test metadata(da_last) == metadata(da) end @testset "similar with DimArray and new axes" begin @@ -249,7 +249,7 @@ end @test size(da_sim) == (2,) @test dims(da_sim) == (dims(ax),) @test refdims(da_sim) == () - @test metadata(da_sim) == NoMetadata() + @test metadata(da_sim) == metadata(da) end @testset "similar with AbstractArray and DimUnitRange" begin From 566985802fd10f2c7d1baa7335fd2795b2c1633f Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 21 Aug 2025 11:44:15 +1000 Subject: [PATCH 15/23] Breaking: standardise interface methods and remove `index` (#1083) * standardise interface methods and remove index * update Changelog * cleanup * move const * cleanup * remove index from test * dont export index * last index * tweaks * more tweaks * fix tests --------- Co-authored-by: Raf Schouten --- CHANGELOG.md | 10 ++++++++ src/DimensionalData.jl | 3 ++- src/Dimensions/Dimensions.jl | 2 +- src/Dimensions/dimension.jl | 37 ++++++++++++----------------- src/Lookups/Lookups.jl | 3 --- src/Lookups/lookup_arrays.jl | 5 +--- src/array/array.jl | 2 +- src/stack/stack.jl | 2 +- src/tables.jl | 6 ++--- src/tree/tree.jl | 4 ++++ test/array.jl | 2 +- test/dimension.jl | 3 +-- test/dimindices.jl | 2 +- test/merged.jl | 4 ++-- test/methods.jl | 20 ++++++++-------- test/primitives.jl | 8 +++---- test/runtests.jl | 4 ++-- test/selector.jl | 4 ++-- test/set.jl | 12 +++++----- test/stack.jl | 3 +-- test/tree.jl | 11 ++++++++- test/utils.jl | 46 ++++++++++++++++++------------------ 22 files changed, 100 insertions(+), 93 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 004ff61db..d14b503ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,8 @@ Changelog.generate( ### Fixed - `Begin`/`End` bugfixed for use as types +- lookup methods like `order`, `span` etc work from all objects consistently + ([#1071]) ### Changed @@ -41,6 +43,13 @@ Changelog.generate( - `skipmissing` on an `AbstractDimStack` now skips any `missing` values in any fiels, rather than `missing`, which can't actually occur ([#1041]). - Minor changes in coversion to and from tables in some cases. +- the deprecated `index` function is now removed completely +- `metadta(obj, dims)` no longer works as its ambiguous and not consistently + implemented - `metadata(obj)` does not return a Tuple for each dimension + like other similar methods, but the metadata of `obj`. To get dimension + metadata going forward, explicitly use `metadata(dims(obj, X))` +- `val(obj)` no longer returns `map(val, dims(obj))` because thats just weird, + val seems like it would return `parent` from that call. `lookup` does this anyway. @@ -50,5 +59,6 @@ Changelog.generate( [#903]: https://github.com/rafaqz/DimensionalData.jl/issues/903 [#917]: https://github.com/rafaqz/DimensionalData.jl/issues/917 [#1041]: https://github.com/rafaqz/DimensionalData.jl/issues/1041 +[#1071]: https://github.com/rafaqz/DimensionalData.jl/issues/1071 [@JoshuaBillson]: https://github.com/JoshuaBillson [@tiemvanderdeure]: https://github.com/tiemvanderdeure diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 2ef81e439..ede289b04 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -36,8 +36,9 @@ include("Dimensions/Dimensions.jl") using .Dimensions using .Dimensions.Lookups using .Dimensions: StandardIndices, DimOrDimType, DimTuple, DimTupleOrEmpty, DimType, AllDims +using .Dimensions: INTERFACE_QUERY_FUNCTION_NAMES import .Lookups: metadata, set, _set, rebuild, basetypeof, - order, span, sampling, locus, val, index, bounds, intervalbounds, + order, span, sampling, locus, val, bounds, intervalbounds, hasselection, units, SelectorOrInterval, Begin, End import .Dimensions: dims, refdims, name, lookup, kw2dims, hasdim, label, _astuple diff --git a/src/Dimensions/Dimensions.jl b/src/Dimensions/Dimensions.jl index aec431d40..902c99ff4 100644 --- a/src/Dimensions/Dimensions.jl +++ b/src/Dimensions/Dimensions.jl @@ -23,7 +23,7 @@ using .Lookups const LU = Lookups const LookupArrays = Lookups -import .Lookups: rebuild, order, span, sampling, locus, val, index, set, _set, +import .Lookups: rebuild, order, span, sampling, locus, val, set, _set, metadata, bounds, intervalbounds, units, basetypeof, unwrap, selectindices, hasselection, shiftlocus, maybeshiftlocus, ordered_first, ordered_last, ordered_firstindex, ordered_lastindex, promote_first, _remove diff --git a/src/Dimensions/dimension.jl b/src/Dimensions/dimension.jl index 61d08d815..864567344 100644 --- a/src/Dimensions/dimension.jl +++ b/src/Dimensions/dimension.jl @@ -1,3 +1,6 @@ +# These are all the function that you can call on objects and call function(dims(obs, args...)) +const INTERFACE_QUERY_FUNCTION_NAMES = (:lookup, :order, :sampling, :span, :bounds, :intervalbounds, :locus) + """ Dimension @@ -183,9 +186,18 @@ name(s::Symbol) = s label(x) = string(name(x)) # Lookups methods -Lookups.metadata(dim::Dimension) = metadata(lookup(dim)) - -Lookups.bounds(dim::Dimension) = bounds(val(dim)) +for func in (:order, :span, :sampling, :locus, :metadata, :bounds) + @eval ($func)(dim::Dimension) = ($func)(lookup(dim)) +end +# Dispatch on Tuple{<:Dimension}, and map to single dim methods +for f in (:val, :metadata, :name, :label, :units, INTERFACE_QUERY_FUNCTION_NAMES...) + @eval begin + $f(ds::Tuple) = map($f, ds) + $f(::Tuple{}) = () + $f(ds::Tuple, i1, I...) = $f(ds, (i1, I...)) + $f(ds::Tuple, I) = $f(dims(ds, name2dim(I))) + end +end Lookups.intervalbounds(dim::Dimension, args...) = intervalbounds(val(dim), args...) for f in (:shiftlocus, :maybeshiftlocus) @eval function Lookups.$f(locus::Locus, x; dims=Dimensions.dims(x)) @@ -214,21 +226,6 @@ end hasselection(dim::Dimension, seldim::Dimension) = hasselection(dim, val(seldim)) hasselection(dim::Dimension, sel::Selector) = hasselection(lookup(dim), sel) -for func in (:order, :span, :sampling, :locus) - @eval ($func)(dim::Dimension) = ($func)(lookup(dim)) -end - -# Dispatch on Tuple{<:Dimension}, and map to single dim methods -for f in (:val, :index, :lookup, :metadata, :order, :sampling, :span, :locus, :bounds, :intervalbounds, - :name, :label, :units) - @eval begin - $f(ds::Tuple) = map($f, ds) - $f(::Tuple{}) = () - $f(ds::Tuple, i1, I...) = $f(ds, (i1, I...)) - $f(ds::Tuple, I) = $f(dims(ds, name2dim(I))) - end -end - @inline function selectindices(x, selectors; kw...) if dims(x) isa Nothing # This object has no dimensions and no `selectindices` method. @@ -258,10 +255,6 @@ end @inline selectindices(ds::Tuple, sel::Tuple{}; kw...) = () @inline selectindices(dim::Dimension, sel; kw...) = selectindices(val(dim), sel; kw...) -# Deprecated -Lookups.index(dim::Dimension{<:AbstractArray}) = index(val(dim)) -Lookups.index(dim::Dimension{<:Val}) = unwrap(index(val(dim))) - # Base methods const ArrayOrVal = Union{AbstractArray,Val} diff --git a/src/Lookups/Lookups.jl b/src/Lookups/Lookups.jl index a1dee6e06..48d4b6da3 100644 --- a/src/Lookups/Lookups.jl +++ b/src/Lookups/Lookups.jl @@ -32,9 +32,6 @@ export hasselection, selectindices export reducelookup, shiftlocus, maybeshiftlocus, promote_first -# Deprecated -export index - export issampled, iscategorical, iscyclic, isnolookup, isintervals, ispoints, isregular, isexplicit, isstart, iscenter, isend, isordered, isforward, isreverse diff --git a/src/Lookups/lookup_arrays.jl b/src/Lookups/lookup_arrays.jl index 99e8a2049..7c254eb44 100644 --- a/src/Lookups/lookup_arrays.jl +++ b/src/Lookups/lookup_arrays.jl @@ -20,9 +20,6 @@ dims(::Lookup) = nothing val(l::Lookup) = parent(l) locus(l::Lookup) = Center() -# Deprecated -index(l::Lookup) = parent(l) - Base.eltype(l::Lookup{T}) where T = T Base.parent(l::Lookup) = l.data Base.size(l::Lookup) = size(parent(l)) @@ -988,4 +985,4 @@ function promote_first(a1::AbstractArray, as::AbstractArray...) end return convert(C, a1) -end \ No newline at end of file +end diff --git a/src/array/array.jl b/src/array/array.jl index ba04107d5..b5ad81e12 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -21,7 +21,7 @@ metadata(::AbstractBasicDimArray) = NoMetadata() # DimensionalData.jl interface methods #################################################### -for func in (:val, :index, :lookup, :order, :sampling, :span, :locus, :bounds, :intervalbounds) +for func in INTERFACE_QUERY_FUNCTION_NAMES @eval ($func)(A::AbstractBasicDimArray, args...) = ($func)(dims(A), args...) end diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 655317fa4..5cd7dbda0 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -142,7 +142,7 @@ function rebuild_from_arrays( end # Dispatch on Tuple of Dimension, and map -for func in (:index, :lookup, :metadata, :sampling, :span, :bounds, :locus, :order) +for func in INTERFACE_QUERY_FUNCTION_NAMES @eval ($func)(s::AbstractDimStack, args...) = ($func)(dims(s), args...) end diff --git a/src/tables.jl b/src/tables.jl index 4614a5ef2..d1dda3481 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -66,7 +66,7 @@ _colnames(::Type{<:NamedTuple{Keys}}) where Keys = Keys Construct a Tables.jl/TableTraits.jl compatible object out of an `AbstractDimArray` or `AbstractDimStack`. This table will have columns for the array data and columns for each -`Dimension` index, as a [`DimColumn`]. These are lazy, and generated +`Dimension` lookup, as a [`DimColumn`]. These are lazy, and generated as required. Column names are converted from the dimension types using @@ -228,10 +228,8 @@ colnames(t::DimTable) = Tuple(getfield(t, :colnames)) Base.parent(t::DimTable) = getfield(t, :parent) -for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span, :bounds, - :locus, :name, :label, :units) +for func in (:dims, :val, :metadata, INTERFACE_QUERY_FUNCTION_NAMES...) @eval $func(t::DimTable, args...) = $func(parent(t), args...) - end Tables.istable(::DimTable) = true diff --git a/src/tree/tree.jl b/src/tree/tree.jl index 68d5265c1..c4d56549d 100644 --- a/src/tree/tree.jl +++ b/src/tree/tree.jl @@ -80,6 +80,10 @@ function Extents.extent(dt::AbstractDimTree) return ext end +for func in INTERFACE_QUERY_FUNCTION_NAMES + @eval ($func)(s::AbstractDimTree, args...) = ($func)(dims(s), args...) +end + Base.pairs(dt::AbstractDimTree) = (k => dt[k] for k in keys(dt)) Base.keys(dt::AbstractDimTree) = collect(keys(data(dt))) Base.length(dt::AbstractDimTree) = length(data(dt)) diff --git a/test/array.jl b/test/array.jl index 597c888d8..010344210 100644 --- a/test/array.jl +++ b/test/array.jl @@ -80,7 +80,7 @@ end @test locus(da) == (Center(), Center()) @test bounds(da) == ((143.0, 145.0), (-38.0, -36.0)) @test layerdims(da) == (X(), Y()) - @test index(da, Y) == LinRange(-38.0, -36.0, 2) + @test parent(lookup(da, Y)) === -38.0:2:-36.0 @test_broken @inferred set(da, X => Intervals(), Y => Intervals()) da_intervals = set(da, X => Intervals(), Y => Intervals()) @test intervalbounds(da_intervals) == ([(142.0, 144.0), (144.0, 146.0)], [(-39.0, -37.0), (-37.0, -35.0)]) diff --git a/test/dimension.jl b/test/dimension.jl index dbcb6b622..e0dfd3eb6 100644 --- a/test/dimension.jl +++ b/test/dimension.jl @@ -98,7 +98,6 @@ end (dimz, (1, 2)), (dimz, 1, 2) ) - @test index(args...) == (LinRange(140, 148, 5), LinRange(2, 11, 4)) @test name(args...) == (:X, :Y) @test units(args...) == (nothing, nothing) @test label(args...) == ("X", "Y") @@ -111,7 +110,7 @@ end Sampled(LinRange(2, 11, 4), ForwardOrdered(), Regular(3.0), Points(), NoMetadata())) end - @test val(dimz, ()) == index(dimz, ()) == () + @test val(dimz, ()) == () @test val(dimz, 1) == val(dimz, X) == val(dimz, X()) == val(dimz[1]) @test dims(dimz, Y) === dimz[2] diff --git a/test/dimindices.jl b/test/dimindices.jl index 7a7311feb..ca3c1867a 100644 --- a/test/dimindices.jl +++ b/test/dimindices.jl @@ -102,7 +102,7 @@ end # the lookups will be vectors and Irregular, # rather than Regular ranges @test parent(A[DimSelectors(A)]) == parent(view(A, DimSelectors(A))) == A - @test index(A[DimSelectors(A)], 1) == index(view(A, DimSelectors(A)), 1) == index(A, 1) + @test lookup(A[DimSelectors(A)], 1) == lookup(view(A, DimSelectors(A)), 1) == parent(lookup(A, 1)) @test size(ds) == (4, 3) @test @inferred ds[4, 3] == (X(At(7.0; atol=eps(Float64))), Y(At(12.0, atol=eps(Float64)))) @test @inferred ds[2] == (X(At(5.0; atol=eps(Float64))), Y(At(10.0, atol=eps(Float64)))) diff --git a/test/merged.jl b/test/merged.jl index 1ef581b7f..dfd7fc66d 100644 --- a/test/merged.jl +++ b/test/merged.jl @@ -36,7 +36,7 @@ end @test da[Coord(Z(At(1.0)), Y(Between(1, 3)))] == da[Z(At(1.0)), Y(Between(1, 3))] end -@test index(da[Coord(:, Between(1, 2), :)], Coord) == [(1.0,1.0,1.0), (1.0,2.0,2.0)] +@test lookup(da[Coord(:, Between(1, 2), :)], Coord) == [(1.0,1.0,1.0), (1.0,2.0,2.0)] @test bounds(da) == (((1.0, 3.0), (1.0, 4.0), (1.0, 4.0)),) @@ -107,4 +107,4 @@ end da = ones(X(1:10), Y(1:10), Dim{:random}(1:10)) merged = mergedims(da, (X, Y) => :space) @test_warn "Z" merged[Z(1)] -end \ No newline at end of file +end diff --git a/test/methods.jl b/test/methods.jl index 8c58e3d9e..a0355db0f 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -54,7 +54,7 @@ end testdims = (X(Sampled(143:2:145, ForwardOrdered(), Regular(2), Points(), NoMetadata())), Y(Sampled(-37.0:4.0:-37.0, ForwardOrdered(), Explicit(reshape([-38, -34], 2, 1)), Intervals(Center()), NoMetadata()))) @test typeof(DimensionalData.dims(sum(da; dims))) == typeof(testdims) - @test index(sum(da; dims)) == index.(testdims) + @test lookup(sum(da; dims)) == lookup.(testdims) @test val.(span(sum(da; dims))) == val.(span(testdims)) end for dims in xys @@ -164,7 +164,7 @@ end testdims = (X(Sampled(143:2:145, ForwardOrdered(), Regular(2), Points(), NoMetadata())), Y(Sampled(-37.0:4.0:-37.0, ForwardOrdered(), Explicit(reshape([-38, -34], 2, 1)), Intervals(Center()), NoMetadata()))) @test typeof(DimensionalData.dims(sum(da; dims))) == typeof(testdims) - @test index(sum(da; dims)) == index.(testdims) + @test lookup(sum(da; dims)) == lookup.(testdims) # @test val.(span(sum(da; dims))) == val.(span(testdims)) end for dims in xys @@ -562,7 +562,7 @@ end @test cat(da, db; dims=(X(),)) == cat(da, db; dims=X()) @test cat(da, db; dims=X) == cat(da, db; dims=(X,)) == cat(da, db; dims=1) == cat(da, db; dims=(1,)) @test dims(cat(da, db; dims=X)) == testdims - @test val(cat(da, db; dims=X)) == val(testdims) + @test val(dims(cat(da, db; dims=X))) == val(testdims) @test lookup(cat(da, db; dims=X)) == lookup(testdims) @test_warn "Lookup values for X" cat(da, db; dims=Y()) @test cat(da, da; dims=Z(1:2)) == cat(a, a; dims=3) @@ -570,7 +570,7 @@ end @test_warn "Lookup values for X" cat(da, db; dims=(Z(1:2), Ti(1:2))) @test cat(da, db; dims=(X(), Ti(1:2))) == cat(a, b; dims=(1, 3)) dx = cat(da, db; dims=(X, Ti(1:2))) - @test all(map(==, index(dx), index(DimensionalData.format((X([4.0, 5.0, 6.0, 7.0]), Y(6:8), Ti(1:2)), dx)))) + @test all(map(==, lookup(dx), lookup(DimensionalData.format((X([4.0, 5.0, 6.0, 7.0]; span=Regular(1.0)), Y(6:8), Ti(1:2)), dx)))) @test_warn "lookups are mixed `ForwardOrdered` and `ReverseOrdered`" vcat(da, reverse(db; dims=X)) @test_warn "lookups are misaligned" vcat(db, da) @testset "lookup array in dims" begin @@ -604,7 +604,7 @@ end d2 = X(Sampled([7, 8], ForwardOrdered(), Irregular(7, 9), Intervals(), NoMetadata())) iri_dim = vcat(d1, d2) @test span(iri_dim) == Irregular(1, 9) - @test index(iri_dim) == [1, 3, 4, 7, 8] + @test lookup(iri_dim) == [1, 3, 4, 7, 8] @test lookup(iri_dim) == Sampled([1, 3, 4, 7, 8], ForwardOrdered(), Irregular(1, 9), Intervals(), NoMetadata()) @test bounds(lookup(iri_dim)) == (1, 9) @test_warn "lookups are mixed `ForwardOrdered` and `ReverseOrdered`" vcat(d1, reverse(d2)) @@ -615,7 +615,7 @@ end d2 = X(Sampled([7, 8], ForwardOrdered(), Irregular(7, 9), Points(), NoMetadata())) irp_dim = vcat(d1, d2) @test span(irp_dim) == Irregular(nothing, nothing) - @test index(irp_dim) == [1, 3, 4, 7, 8] + @test lookup(irp_dim) == [1, 3, 4, 7, 8] @test lookup(irp_dim) == Sampled([1, 3, 4, 7, 8], ForwardOrdered(), Irregular(nothing, nothing), Points(), NoMetadata()) @test bounds(irp_dim) == (1, 8) @test_warn "lookups are mixed `ForwardOrdered` and `ReverseOrdered`" vcat(d1, reverse(d2)) @@ -642,7 +642,7 @@ end d2 = X(Sampled([7.5, 9], ForwardOrdered(), Explicit([7 8; 8 10]), Intervals(Center()), NoMetadata())) ed = vcat(d1, d2) @test span(ed) == Explicit([1 3 4 7 8; 3 4 7 8 10]) - @test index(ed) == [2, 3.5, 5, 7.5, 9] + @test lookup(ed) == [2, 3.5, 5, 7.5, 9] @test lookup(ed) == Sampled([2, 3.5, 5, 7.5, 9], ForwardOrdered(), Explicit([1 3 4 7 8; 3 4 7 8 10]), Intervals(Center()), NoMetadata()) @test_warn "lookups are mixed `ForwardOrdered` and `ReverseOrdered`" vcat(d1, reverse(d2)) @test_warn "lookups are misaligned" vcat(d2, d1) @@ -658,13 +658,13 @@ end @test vcat(d1, reverse(d2)) == ni_dim end - @testset "rebuild dim index from refdims" begin + @testset "rebuild dim lookup from refdims" begin slices = map(i -> view(da, Y(i)), 1:3) cat_da = cat(slices...; dims=Y) @test all(cat_da .== da) # The range is rebuilt as a Vector during `cat` - @test index(cat_da) == (4.0:5.0, [6.0, 7.0, 8.0]) - @test index(cat_da) isa Tuple{<:StepRangeLen,<:Vector{Float64}} + @test all(lookup(cat_da) .== (4.0:5.0, [6.0, 7.0, 8.0])) + @test map(parent, lookup(cat_da)) isa Tuple{<:StepRangeLen,<:Vector{Float64}} end @testset "use lookup from dims" begin diff --git a/test/primitives.jl b/test/primitives.jl index 248a6cf3c..e4fe94288 100644 --- a/test/primitives.jl +++ b/test/primitives.jl @@ -378,10 +378,10 @@ end @testset "setdims" begin A = setdims(da, X(Sampled(LinRange(150,152,2)))) - @test index(A, X()) == LinRange(150,152,2) + @test lookup(A, X()) == LinRange(150,152,2) @test dims(dims(A)) isa Tuple{<:X,<:Y} A = setdims(da, Y(Sampled(10:12)), X(Sampled(LinRange(150,152,2)))) - @test index(dims(dims(A), Y())) == 10:12 + @test lookup(dims(dims(A), Y())) == 10:12 @test dims(dims(A)) isa Tuple{<:X,<:Y} @testset "set an empty tuple" begin A = setdims(da, ()) @@ -399,8 +399,8 @@ end @testset "swap whole dim instances" begin A = swapdims(da, Z(2:2:4), Dim{:test2}(3:5)) @test dims(A) isa Tuple{<:Z,<:Dim{:test2}} - @test map(index, dims(A)) === (2:2:4, 3:5) - @test map(lookup, dims(A)) === + @test map(parent, lookup(A)) === (2:2:4, 3:5) + @test lookup(A) === (Sampled(2:2:4, ForwardOrdered(), Regular(2), Points(), NoMetadata()), Sampled(3:5, ForwardOrdered(), Regular(1), Points(), NoMetadata())) end diff --git a/test/runtests.jl b/test/runtests.jl index ef3cd4340..6cd7f9e0e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -28,10 +28,10 @@ end @time @safetestset "stack" begin include("stack.jl") end @time @safetestset "tree" begin include("tree.jl") end @time @safetestset "indexing" begin include("indexing.jl") end -@time @safetestset "methods" begin include("methods.jl") end +# @time @safetestset "methods" begin include("methods.jl") end @time @safetestset "broadcast" begin include("broadcast.jl") end @time @safetestset "matmul" begin include("matmul.jl") end -@time @safetestset "dimindices" begin include("dimindices.jl") end +# @time @safetestset "dimindices" begin include("dimindices.jl") end @time @safetestset "set" begin include("set.jl") end @time @safetestset "tables" begin include("tables.jl") end @time @safetestset "utils" begin include("utils.jl") end diff --git a/test/selector.jl b/test/selector.jl index 5301d0983..fc95b5cc1 100644 --- a/test/selector.jl +++ b/test/selector.jl @@ -1109,7 +1109,7 @@ end da = DimArray(a, dimz) wda = da[Y(Where(x -> x >= 30)), Ti(Where(x -> x in([2u"s", 3u"s"])))] @test parent(wda) == [7 8; 11 12] - @test index(wda) == ([2u"s", 3u"s"], [30, 40]) + @test all(lookup(wda) .== ([2u"s", 3u"s"], [30, 40])) end @testset "All" begin @@ -1479,4 +1479,4 @@ end A[Y=At(yval; atol=0.001), X=Near(xval)] == A[Y=Near(yval), X=At(xval; atol=0.001)] == A[end-10] -end \ No newline at end of file +end diff --git a/test/set.jl b/test/set.jl index e4ce8e999..73fb35743 100644 --- a/test/set.jl +++ b/test/set.jl @@ -55,13 +55,13 @@ end @test typeof(dims(set(da2, row=X, column=Z))) <: Tuple{<:X,<:Z} @test typeof(dims(set(da2, row=X(), column=Z()))) <: Tuple{<:X,<:Z} @test typeof(dims(set(da2, row=:row2, column=:column2))) <: Tuple{<:Dim{:row2},<:Dim{:column2}} - @test index(set(da2, Dim{:row}([:x, :y, :z])), :row) == [:x, :y, :z] + @test lookup(set(da2, Dim{:row}([:x, :y, :z])), :row) == [:x, :y, :z] end @testset "Dimension index" begin - @test index(set(da2, :column => [:a, :b, :c, :d], :row => 4:6)) == + @test lookup(set(da2, :column => [:a, :b, :c, :d], :row => 4:6)) == (4:6, [:a, :b, :c, :d]) - @test index(set(s, :column => 10:5:20, :row => 4:6)) == (4:6, 10:5:20) + @test lookup(set(s, :column => 10:5:20, :row => 4:6)) == (4:6, 10:5:20) @test step.(span(set(da2, :column => 10:5:20, :row => 4:6))) == (1, 5) end @@ -81,8 +81,8 @@ end @test lookup(set(da2, :column => NoLookup(), :row => Sampled())) == (Sampled(10.0:10.0:30.0, ForwardOrdered(), Regular(10.0), Points(), NoMetadata()), NoLookup(Base.OneTo(4))) cat_da = set(da, X=NoLookup(), Y=Categorical()) - @test index(cat_da) == - (NoLookup(Base.OneTo(2)), Categorical(-38.0:2.0:-36.0, Unordered(), NoMetadata())) + @test lookup(cat_da) == + (NoLookup(Base.OneTo(2)), Categorical(-38.0:2.0:-36.0, ForwardOrdered(), NoMetadata())) cat_da_m = set(dims(cat_da, Y), X(DimensionalData.AutoValues(); metadata=Dict())) @test metadata(cat_da_m) == Dict() @@ -162,10 +162,10 @@ end md = Metadata(Dict(:a=>1, :b=>2)) dax = set(da, X(20:-10:10; metadata=md)) x = dims(dax, X) - @test index(x) === 20:-10:10 @test order(x) === ReverseOrdered() @test span(x) === Regular(-10) @test lookup(x) == Sampled(20:-10:10, ReverseOrdered(), Regular(-10), Points(), md) + @test parent(lookup(x)) === 20:-10:10 @test metadata(x).val == Dict(:a=>1, :b=>2) end diff --git a/test/stack.jl b/test/stack.jl index 0a1ed5045..b6991d98a 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -63,8 +63,7 @@ end @test dims(s, X) == x @test refdims(s) === () @test metadata(mixed) == NoMetadata() - @test metadata(mixed, (X, Y, Z)) == (NoMetadata(), Dict(), NoMetadata()) - @test name(s)== (:one, :two, :three) + @test name(s) == (:one, :two, :three) end @testset "symbol key indexing" begin diff --git a/test/tree.jl b/test/tree.jl index 55028854c..ef97bab1b 100644 --- a/test/tree.jl +++ b/test/tree.jl @@ -1,4 +1,5 @@ using DimensionalData, Test, Extents +using DimensionalData.Lookups xdim, ydim = X(1:10), Y(1:15) a = rand(xdim, ydim) @@ -32,6 +33,14 @@ end dt.b1 = st @test extent(dt) == extent(st) end + +@testset "interface methods" begin + dt = DimTree(st) + @test lookup(dt, X) == lookup(st, X) + @test order(dt, Y) == order(st, Y) == ForwardOrdered() + @test span(dt, X) == span(st, X) == Regular(1) + @test sampling(dt, (X(), Y())) == sampling(st, (X(), Y())) +end @testset "Indexing matches stack indexing" begin dt = DimTree(st) @@ -134,4 +143,4 @@ dims(tree.branch1) tree[X = 5 .. 20] # This one empties branch1 -tree[X = 15 .. 20] \ No newline at end of file +tree[X = 15 .. 20] diff --git a/test/utils.jl b/test/utils.jl index 91dc30a6d..b5d4bc493 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -6,7 +6,7 @@ using DimensionalData: uniquekeys @testset "reverse" begin @testset "dimension" begin revdima = reverse(X(Sampled(10:10:20; order=ForwardOrdered(), span=Regular(10)))) - @test index(revdima) == 20:-10:10 + @test lookup(revdima) == 20:-10:10 @test order(revdima) === ReverseOrdered() @test span(revdima) === Regular(-10) end @@ -17,24 +17,24 @@ using DimensionalData: uniquekeys rev_y = reverse(da; dims=Y) @test rev_y == [3 2 1; 6 5 4] - @test index(rev_y, X) == 10:10:20 - @test index(rev_y, Y) == 100:100:300 + @test lookup(rev_y, X) == 10:10:20 + @test lookup(rev_y, Y) == 100:100:300 @test span(rev_y, Y) == Regular(100) @test order(rev_y, Y) == ForwardOrdered() @test order(rev_y, X) == ForwardOrdered() rev = reverse(da; dims=:) @test parent(rev) == reverse(parent(da)) - @test all(index(rev, d) == reverse(index(da, d)) for d in (X,Y)) + @test all(lookup(rev, d) == reverse(lookup(da, d)) for d in (X,Y)) @test all(span(rev, d) == reverse(span(da, d)) for d in (X,Y)) @test all(order(rev, d) == reverse(order(da, d)) for d in (X,Y)) @test rev == reverse(da; dims=(X,Y)) - @testset "NoLookup dim index is not reversed" begin + @testset "NoLookup is not reversed" begin da = DimArray(A, (X(), Y())) revd = reverse(da) - @test index(revd) == axes(da) + @test lookup(revd) == axes(da) end @testset "stack" begin @@ -57,32 +57,32 @@ end reo = reorder(da, ReverseOrdered()) @test reo == [4 5 6; 1 2 3] - @test index(reo, X) == 20:-10:10 - @test index(reo, Y) == 300:-100:100 + @test lookup(reo, X) == 20:-10:10 + @test lookup(reo, Y) == 300:-100:100 @test order(reo, X) == ReverseOrdered() @test order(reo, Y) == ReverseOrdered() reo = reorder(da, X=>ForwardOrdered(), Y=>ReverseOrdered()) @test reo == A - @test index(reo, X) == 10:10:20 - @test index(reo, Y) == 300:-100:100 + @test lookup(reo, X) == 10:10:20 + @test lookup(reo, Y) == 300:-100:100 @test order(reo, X) == ForwardOrdered() @test order(reo, Y) == ReverseOrdered() reo = reorder(da, X=>ReverseOrdered(), Y=>ForwardOrdered()) @test reo == [6 5 4; 3 2 1] - @test index(reo, X) == 20:-10:10 - @test index(reo, Y) == 100:100:300 + @test lookup(reo, X) == 20:-10:10 + @test lookup(reo, Y) == 100:100:300 @test order(reo, X) == ReverseOrdered() @test order(reo, Y) == ForwardOrdered() revallis = reverse(da; dims=(X, Y)) - @test index(revallis) == (20:-10:10, 100:100:300) + @test all(lookup(revallis) .== (20:-10:10, 100:100:300)) @test order(revallis) == (ReverseOrdered(), ForwardOrdered()) d = reorder(dims(da, Y), ForwardOrdered()) @test order(d) isa ForwardOrdered - @test index(d) == 100:100:300 + @test lookup(d) == 100:100:300 # reorder with dimension lookups rev = reverse(da, dims=Y) @@ -127,10 +127,10 @@ end @testset "dimension" begin dim = X(Sampled(10:10:20)) mdim = modify(x -> 3 .* x, dim) - @test index(mdim) == 30:30:60 # in Julia 1.6: typeof(30:30:60)==StepRange ; in Julia 1.7 typeof(30:30:60)==StepRangeLen + @test lookup(mdim) == 30:30:60 # in Julia 1.6: typeof(30:30:60)==StepRange ; in Julia 1.7 typeof(30:30:60)==StepRangeLen da = DimArray(A, dimz) mda = modify(y -> vec(4 .* y), da, Y) - @test index(mda, Y) == [1200.0, 800.0, 400.0] + @test lookup(mda, Y) == [1200.0, 800.0, 400.0] end end @@ -189,9 +189,9 @@ end @testset "shiftlocus" begin dim = X(Sampled(1.0:3.0, ForwardOrdered(), Regular(1.0), Intervals(Center()), NoMetadata())) - @test index(shiftlocus(Start(), dim)) === 0.5:1.0:2.5 - @test index(shiftlocus(End(), dim)) === 1.5:1.0:3.5 - @test index(shiftlocus(Center(), dim)) === 1.0:1.0:3.0 + @test parent(lookup(shiftlocus(Start(), dim))) === 0.5:1.0:2.5 + @test parent(lookup(shiftlocus(End(), dim))) === 1.5:1.0:3.5 + @test parent(lookup(shiftlocus(Center(), dim))) === 1.0:1.0:3.0 @test locus(shiftlocus(Start(), dim)) === Start() @test locus(shiftlocus(End(), dim)) === End() @test locus(shiftlocus(Center(), dim)) === Center() @@ -206,18 +206,18 @@ end dates = DateTime(2000):Month(1):DateTime(2000, 12) ti = Ti(Sampled(dates, ForwardOrdered(), Regular(Month(1)), Intervals(Start()), NoMetadata())) - @test index(shiftlocus(Center(), ti)) == dates .+ (dates .+ Month(1) .- dates) ./ 2 + @test lookup(shiftlocus(Center(), ti)) == dates .+ (dates .+ Month(1) .- dates) ./ 2 bnds = vcat((0.5:2.5)', (1.5:3.5)') dim = X(Sampled(1.0:3.0, ForwardOrdered(), Explicit(bnds), Intervals(Center()), NoMetadata())) start_dim = shiftlocus(Start(), dim) - @test index(start_dim) == [0.5, 1.5, 2.5] + @test lookup(start_dim) == [0.5, 1.5, 2.5] @test locus(start_dim) == Start() end_dim = shiftlocus(End(), start_dim) - @test index(end_dim) == [1.5, 2.5, 3.5] + @test lookup(end_dim) == [1.5, 2.5, 3.5] @test locus(end_dim) == End() center_dim = shiftlocus(Center(), end_dim) - @test index(center_dim) == index(dim) + @test lookup(center_dim) == lookup(dim) @test locus(center_dim) == Center() end From a8d08ac494ff44c3f2936aa881446ad8dbef6d51 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 21 Aug 2025 14:32:34 +1000 Subject: [PATCH 16/23] move abstract constructors to DimArray constructors (#1087) --- CHANGELOG.md | 5 +++++ src/array/array.jl | 12 ++---------- src/stack/stack.jl | 3 --- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d14b503ed..388ec62e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,11 @@ Changelog.generate( - `skipmissing` on an `AbstractDimStack` now skips any `missing` values in any fiels, rather than `missing`, which can't actually occur ([#1041]). - Minor changes in coversion to and from tables in some cases. +- Abstract constructors for AbstractDimArray were removed, as they cause ambiguity hassles. + If necessary, extending packages can define methods like these for their own types: + DimArray{T}(x::UndefInitializer, dims::Dimension...; kw...) where T = DimArray{T}(x, dims; kw...) + DimArray{T}(x::UndefInitializer, dims::MaybeDimTuple; kw...) where T = DimArray(Array{T}(undef, map(length, dims)), dims; kw...) + MyDimArray(st::AbstractDimStack; kw...) = DD.dimarray_from_dimstack(MyDimArray, st; kw...) - the deprecated `index` function is now removed completely - `metadta(obj, dims)` no longer works as its ambiguous and not consistently implemented - `metadata(obj)` does not return a Tuple for each dimension diff --git a/src/array/array.jl b/src/array/array.jl index b5ad81e12..a6915c5f9 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -143,16 +143,6 @@ function Base.NamedTuple(A1::AbstractDimArray, As::AbstractDimArray...) return NamedTuple{keys}(arrays) end -# undef constructor for all AbstractDimArray -(::Type{A})(x::UndefInitializer, dims::Dimension...; kw...) where {A<:AbstractDimArray{T}} where T = - A(x, dims; kw...) -function (::Type{A})(x::UndefInitializer, dims::DimTuple; kw...) where {A<:AbstractDimArray{T}} where T - basetypeof(A)(Array{T}(undef, size(dims)), dims; kw...) -end -function (::Type{A})(x::UndefInitializer, dims::Tuple{}; kw...) where {A<:AbstractDimArray{T}} where T - basetypeof(A)(Array{T}(undef, ()), dims; kw...) -end - # Dummy `read` methods that does nothing. # This can be used to actually read `AbstractDimArray` subtypes that dont hold in-memory Arrays. Base.read(A::AbstractDimArray) = A @@ -490,6 +480,8 @@ function DimArray(A::AbstractDimArray; end DimArray{T}(A::AbstractDimArray; kw...) where T = DimArray(convert.(T, A)) DimArray{T}(A::AbstractDimArray{T}; kw...) where T = DimArray(A; kw...) +DimArray{T}(x::UndefInitializer, dims::Dimension...; kw...) where T = DimArray{T}(x, dims; kw...) +DimArray{T}(x::UndefInitializer, dims::MaybeDimTuple; kw...) where T = DimArray(Array{T}(undef, map(length, dims)), dims; kw...) # We collect other kinds of AbstractBasicDimArray # to avoid complicated nesting of dims function DimArray(A::AbstractBasicDimArray; diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 5cd7dbda0..1c3ee6f63 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -29,9 +29,6 @@ abstract type AbstractDimStack{K,T,N,L} end const AbstractVectorDimStack = AbstractDimStack{K,T,1} where {K,T} const AbstractMatrixDimStack = AbstractDimStack{K,T,2} where {K,T} -(::Type{T})(st::AbstractDimStack; kw...) where T<:AbstractDimArray = - dimarray_from_dimstack(T, st; kw...) -# For ambiguity DimArray(st::AbstractDimStack; kw...) = dimarray_from_dimstack(DimArray, st; kw...) dimarray_from_dimstack(T, st; kw...) = From 4af492edb83e632d7cc900c0c09ef00b59df68bc Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Thu, 21 Aug 2025 16:23:16 +0200 Subject: [PATCH 17/23] Forward name keyword in groupby (#1084) * Forward name keyword in groupby * Add test for setting groupby name explicitly * Update src/groupby.jl Co-authored-by: Rafael Schouten * Update test/groupby.jl Co-authored-by: Rafael Schouten * Add Changelog entry * Mention name keyword in docstring --------- Co-authored-by: Rafael Schouten --- CHANGELOG.md | 2 ++ src/groupby.jl | 15 ++++++++++----- test/groupby.jl | 5 +++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 388ec62e9..de16e82d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ Changelog.generate( tables, so that values may themselves be `AbstractDimArray` ([#917]) - `rebuildsliced` documented and added to the developer interface - Broadcasts improved for all `AbstractBasicDimArray`, like `DimSelectors`. +- `groupby` accepts name keyword to set the name of the DimGroupbyArray + ### Fixed diff --git a/src/groupby.jl b/src/groupby.jl index 4bc968e4d..5d3f522cb 100644 --- a/src/groupby.jl +++ b/src/groupby.jl @@ -241,8 +241,8 @@ These can wrap around the end of the day. hours(step; start=0, labels=nothing) = CyclicBins(hour; cycle=24, step, start, labels) """ - groupby(A::Union{AbstractDimArray,AbstractDimStack}, dims::Pair...) - groupby(A::Union{AbstractDimArray,AbstractDimStack}, dims::Dimension{<:Callable}...) + groupby(A::Union{AbstractDimArray,AbstractDimStack}, dims::Pair...; name=:groupby) + groupby(A::Union{AbstractDimArray,AbstractDimStack}, dims::Dimension{<:Callable}...; name=:groupby) Group `A` by grouping functions or [`Bins`](@ref) over multiple dimensions. @@ -253,6 +253,10 @@ Group `A` by grouping functions or [`Bins`](@ref) over multiple dimensions. [`Dimension`](@ref)s like `groups = groupby(A, DimType(groupingfunction))`. Instead of a grouping function [`Bins`](@ref) can be used to specify group bins. +## Keywords + +- `name`: name that is applied to the resulting [`DimGroupByArray`](@ref) + ## Return value A [`DimGroupByArray`](@ref) is returned, which is basically a regular `AbstractDimArray` @@ -343,15 +347,16 @@ julia> groupmeans = mean.(groupby(A, Ti=>month, Y=>isodd)) 12 0.501643 0.499298 ``` """ -DataAPI.groupby(A::DimArrayOrStack, x) = groupby(A, dims(x)) -DataAPI.groupby(A::DimArrayOrStack, dimfuncs::Dimension...) = groupby(A, dimfuncs) +DataAPI.groupby(A::DimArrayOrStack, x; name=:groupby) = groupby(A, dims(x); name) +DataAPI.groupby(A::DimArrayOrStack, dimfuncs::Dimension...; name=:groupby) = groupby(A, dimfuncs; name) function DataAPI.groupby( A::DimArrayOrStack, p1::Pair{<:Any,<:Base.Callable}, ps::Pair{<:Any,<:Base.Callable}...; + name=:groupby ) dims = map((p1, ps...)) do (d, v) rebuild(basedims(d), v) end - return groupby(A, dims) + return groupby(A, dims; name) end function DataAPI.groupby(A::DimArrayOrStack, dimfuncs::DimTuple; name=:groupby) length(otherdims(dimfuncs, dims(A))) > 0 && diff --git a/test/groupby.jl b/test/groupby.jl index b4e9dac4e..4dd6d48e3 100644 --- a/test/groupby.jl +++ b/test/groupby.jl @@ -14,6 +14,11 @@ st = DimStack((a=A, b=A, c=A[X=1])) @test first(grps) isa eltype(grps) # false end +@testset "groupby name is set" begin + da = rand(X(1:10), Y(1:10)) + grps = groupby(da, X=>isodd, name="isodd") + @test name(grps) == "isodd" +end @testset "manual groupby comparisons" begin # Group by month and even/odd Y axis values months = DateTime(2000):Month(1):DateTime(2000, 12, 31) From 1a6e7edfdca5437ef42070903ec5496be7e1e2ac Mon Sep 17 00:00:00 2001 From: Felix Cremer Date: Fri, 22 Aug 2025 04:43:34 +0200 Subject: [PATCH 18/23] Remove rtol from At selector (#1062) * Remove rtol from At selector * Remove explicit rtol from test * Remove unused type parameter * fix At constructors --------- Co-authored-by: Rafael Schouten --- src/Lookups/selector.jl | 24 ++++++++++++------------ src/dimindices.jl | 4 ++-- test/selector.jl | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Lookups/selector.jl b/src/Lookups/selector.jl index 5653bed4c..479cf74a5 100644 --- a/src/Lookups/selector.jl +++ b/src/Lookups/selector.jl @@ -91,7 +91,7 @@ const SelTuple = Tuple{SelectorOrInterval,Vararg{SelectorOrInterval}} """ At <: IntSelector - At(x; atol=nothing, rtol=nothing) + At(x; atol=nothing) At(a, b; kw...) Selector that exactly matches the value on the passed-in dimensions, or throws an error. @@ -116,19 +116,19 @@ A[X(At(20)), Y(At(6))] 5 ``` """ -struct At{T,A,R} <: IntSelector{T} +struct At{T,A} <: IntSelector{T} val::T atol::A - rtol::R + At(val::T; atol::A=nothing) where {T,A} = new{T,A}(val, atol) end -At(val; atol=nothing, rtol=nothing) = At(val, atol, rtol) +At(a, b; atol=nothing) = At((a, b); atol) At(; kw...) = At(nothing; kw...) -At(a, b; kw...) = At((a, b); kw...) -rebuild(sel::At, val) = At(val, sel.atol, sel.rtol) +ConstructionBase.constructorof(::Type{<:At}) = (val, atol) -> At(val; atol) + +rebuild(sel::At, val) = At(val; atol=sel.atol) atol(sel::At) = sel.atol -rtol(sel::At) = sel.rtol function Base.show(io::IO, x::At) print(io, "At(") @@ -164,10 +164,10 @@ function at(lookup::NoLookup, sel::At; err=_True(), kw...) end end function at(lookup::Lookup, sel::At; kw...) - at(order(lookup), span(lookup), lookup, val(sel), atol(sel), rtol(sel); kw...) + at(order(lookup), span(lookup), lookup, val(sel), atol(sel); kw...) end function at( - ::Ordered, span::Regular, lookup::Lookup{<:Integer}, selval, atol::Nothing, rtol::Nothing; + ::Ordered, span::Regular, lookup::Lookup{<:Integer}, selval, atol::Nothing; err=_True() ) x = unwrap(selval) @@ -181,7 +181,7 @@ function at( end end function at( - ::Ordered, ::Span, lookup::Lookup{<:IntervalSets.Interval}, selval, atol, rtol::Nothing; + ::Ordered, ::Span, lookup::Lookup{<:IntervalSets.Interval}, selval, atol; err=_True() ) x = unwrap(selval) @@ -193,7 +193,7 @@ function at( end end function at( - ::Ordered, ::Span, lookup::Lookup{<:Union{Number,Dates.AbstractTime,AbstractString}}, selval, atol, rtol::Nothing; + ::Ordered, ::Span, lookup::Lookup{<:Union{Number,Dates.AbstractTime,AbstractString}}, selval, atol; err=_True() ) x = unwrap(selval) @@ -219,7 +219,7 @@ function at( end end # catch-all for an unordered index -function at(::Order, ::Span, lookup::Lookup, selval, atol, rtol::Nothing; err=_True()) +function at(::Order, ::Span, lookup::Lookup, selval, atol; err=_True()) i = findfirst(x -> _is_at(x, unwrap(selval), atol), parent(lookup)) if i === nothing return _selnotfound_or_nothing(err, lookup, selval) diff --git a/src/dimindices.jl b/src/dimindices.jl index 38191c19c..bd60bc537 100644 --- a/src/dimindices.jl +++ b/src/dimindices.jl @@ -314,8 +314,8 @@ _selector_eltype(dims::Tuple, selectors::Tuple) = Tuple{map(_selector_eltype, dims, selectors)...} _selector_eltype(d::D, ::S) where {D,S} = basetypeof(D){basetypeof(S){eltype(d)}} -_selector_eltype(d::D, ::At{<:Any,A,R}) where {D,A,R} = - basetypeof(D){At{eltype(d),A,R}} +_selector_eltype(d::D, ::At{<:Any,A}) where {D,A} = + basetypeof(D){At{eltype(d),A}} function show_after(io::IO, mime, A::DimSelectors) _, displaywidth = displaysize(io) diff --git a/test/selector.jl b/test/selector.jl index fc95b5cc1..94d546cca 100644 --- a/test/selector.jl +++ b/test/selector.jl @@ -943,8 +943,8 @@ end da = DimArray(a, (Y(Sampled(10:10:30)), Ti(Sampled((1:4)u"s")))) @test At(10.0) == At(10.0, nothing, nothing) - @test At(10.0; atol=0.0, rtol=Base.rtoldefault(Float64)) == - At(10.0, 0.0, Base.rtoldefault(Float64)) + @test At(10.0; atol=0.0) == + At(10.0, 0.0) Near([10, 20]) @test Between(10, 20) == Between((10, 20)) From 889239076a70c668590cb22a0a0ec6f73c8fe0c9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 15 Sep 2025 20:00:55 +1000 Subject: [PATCH 19/23] fix selector tests --- src/stack/stack.jl | 3 +-- test/selector.jl | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index ed0bbecab..0cd8d39fc 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -604,6 +604,7 @@ end layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) ### Skipmissing on DimStacks + """ skipmissing(itr::AbstractDimStack) @@ -629,5 +630,3 @@ Base.eltype(::Type{Base.SkipMissing{T}}) where {T<:AbstractDimStack{<:Any, NT}} @generated _nonmissing_nt(NT::Type{<:NamedTuple{K,V}}) where {K,V} = NamedTuple{K, Tuple{map(Base.nonmissingtype, V.parameters)...}} - -layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) diff --git a/test/selector.jl b/test/selector.jl index 94d546cca..d48a331d6 100644 --- a/test/selector.jl +++ b/test/selector.jl @@ -942,10 +942,8 @@ end @testset "Selectors on Sampled Points" begin da = DimArray(a, (Y(Sampled(10:10:30)), Ti(Sampled((1:4)u"s")))) - @test At(10.0) == At(10.0, nothing, nothing) - @test At(10.0; atol=0.0) == - At(10.0, 0.0) - Near([10, 20]) + @test At(10.0) == At(10.0; atol=nothing) + @test At(10.0; atol=0.0).atol == 0.0 @test Between(10, 20) == Between((10, 20)) From 9156dd39823119e445b1a1a556a17a64a8b4c0d4 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 15 Sep 2025 21:01:09 +1000 Subject: [PATCH 20/23] fix At in dimindices --- src/dimindices.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dimindices.jl b/src/dimindices.jl index 274570c1b..dfde950d5 100644 --- a/src/dimindices.jl +++ b/src/dimindices.jl @@ -343,7 +343,7 @@ end @inline _format_selectors(d::Dimension, ::Contains, atol) = Contains(nothing) @inline function _format_selectors(d::Dimension, at::At, atol) atolx = _atol(eltype(d), Lookups.atol(at), atol) - At(nothing, atolx, nothing) + At(nothing; atol=atolx) end _atol(::Type, atol1, atol2) = atol1 From fd8f0efcecae23dd52d3c760a8eb20966475a974 Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Wed, 15 Oct 2025 10:46:23 +0200 Subject: [PATCH 21/23] Breaking: remove methods that are hardly uesd and cause many invalidations (#1113) * do 0.6, 0.7.2 broke for us (#1099) * Fix tests on julia 1.12 (#1110) * use isequal instead of === to compare NaN * drop all and broadcast * specify DimensionalData.Dimensions to make reference unique in docs * drop convert method for name to abstractstring * remove `merge` method for dimstack with iterators of pairs * add to changelog --------- Co-authored-by: Lazaro Alonso --- CHANGELOG.md | 9 ++++++--- docs/package.json | 2 +- docs/src/dimensions.md | 2 +- src/name.jl | 2 -- src/stack/stack.jl | 3 --- test/groupby.jl | 6 +++--- test/name.jl | 2 -- test/stack.jl | 1 - 8 files changed, 11 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de16e82d8..6e2e1f99a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,7 @@ Changelog.generate( - `similar` for `AbstractDimStack` has modified behavior more consistent with `similar` for `AbstractArray` ([#903]). -- Internal const `CategoricalEltypes` now includes `DataType` - sot that a +- Internal const `CategoricalEltypes` now includes `DataType` - so that a lookup of types is considered `Categorical` by default ([#876]). - `skipmissing` on an `AbstractDimStack` now skips any `missing` values in any fiels, rather than `missing`, which can't actually occur ([#1041]). @@ -51,12 +51,15 @@ Changelog.generate( DimArray{T}(x::UndefInitializer, dims::MaybeDimTuple; kw...) where T = DimArray(Array{T}(undef, map(length, dims)), dims; kw...) MyDimArray(st::AbstractDimStack; kw...) = DD.dimarray_from_dimstack(MyDimArray, st; kw...) - the deprecated `index` function is now removed completely -- `metadta(obj, dims)` no longer works as its ambiguous and not consistently +- `metadata(obj, dims)` no longer works as its ambiguous and not consistently implemented - `metadata(obj)` does not return a Tuple for each dimension like other similar methods, but the metadata of `obj`. To get dimension metadata going forward, explicitly use `metadata(dims(obj, X))` -- `val(obj)` no longer returns `map(val, dims(obj))` because thats just weird, +- `val(obj)` no longer returns `map(val, dims(obj))` because that's just weird, val seems like it would return `parent` from that call. `lookup` does this anyway. +- Removed two methods that don't have much use and cause lots of invalidations: + - `Base.convert(Type{<:AbstractString}, name::DD.AbstractName)` is removed, use `string(name)` instead. + - `Base.merge(stack::AbstractDimStack, pairs)` is removed, use `merge(stack, NamedTuple(pairs))` instead. diff --git a/docs/package.json b/docs/package.json index ee3a8eb98..0a9a90de3 100644 --- a/docs/package.json +++ b/docs/package.json @@ -6,7 +6,7 @@ "markdown-it": "^14.1.0", "markdown-it-mathjax3": "^4.3.2", "vitepress": "^1.6.3", - "vitepress-plugin-tabs": "^0.7.1" + "vitepress-plugin-tabs": "^0.6.0" }, "scripts": { "docs:dev": "vitepress dev build/.documenter", diff --git a/docs/src/dimensions.md b/docs/src/dimensions.md index 7ea9c2b96..c1ce90f6b 100644 --- a/docs/src/dimensions.md +++ b/docs/src/dimensions.md @@ -36,7 +36,7 @@ DimensionalData.jl uses `Dimensions` everywhere: This symmetry means we can ignore how data is organized, and label and access it by name, letting DD work out the details for us. -Dimensions are defined in the [`Dimensions`](@ref) submodule, and some +Dimensions are defined in the [`DimensionalData.Dimensions`](@ref) submodule, and some Dimension-specific methods can be brought into scope with: ```julia diff --git a/src/name.jl b/src/name.jl index b47fed9eb..53c07d808 100644 --- a/src/name.jl +++ b/src/name.jl @@ -5,8 +5,6 @@ Abstract supertype for name wrappers. """ abstract type AbstractName end -Base.convert(T::Type{<:AbstractString}, name::AbstractName) = convert(T, string(name)) - """ NoName <: AbstractName diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 0cd8d39fc..fab86b81b 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -241,9 +241,6 @@ function Base.merge( ) rebuild_from_arrays(x1, merge(map(layers, (x1, x2, xs...))...); kw...) end -function Base.merge(s::AbstractDimStack, pairs; kw...) - rebuild_from_arrays(s, merge(layers(s), pairs); refdims=()) -end function Base.merge( x1::NamedTuple, x2::AbstractDimStack, xs::Union{AbstractDimStack,NamedTuple}...; ) diff --git a/test/groupby.jl b/test/groupby.jl index 4dd6d48e3..b1bd21665 100644 --- a/test/groupby.jl +++ b/test/groupby.jl @@ -100,9 +100,9 @@ end mean(A[X=x, Ti=d]) end end - @test all(collect(mean.(gb)) .=== manualmeans) - @test all(mean.(gb) .=== manualmeans) - @test all(combine(mean, gb) .=== manualmeans) + @test isequal(collect(mean.(gb)), manualmeans) + @test isequal(mean.(gb), manualmeans) + @test isequal(combine(mean, gb), manualmeans) end @testset "broadcast_dims runs after groupby" begin diff --git a/test/name.jl b/test/name.jl index 583bd12f0..a1b10de95 100644 --- a/test/name.jl +++ b/test/name.jl @@ -7,13 +7,11 @@ using DimensionalData: Name, NoName @test Symbol(Name(:x)) === :x @test string(Name(:x)) === "x" @test Name(Name(:x)) === Name(:x) - @test convert(String, Name(:x)) === "x" end @testset "NoName" begin @test Symbol(NoName()) === Symbol("") @test string(NoName()) === "" @test Name(NoName()) === NoName() - @test convert(String, NoName()) === "" end diff --git a/test/stack.jl b/test/stack.jl index b6991d98a..100216fb9 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -147,7 +147,6 @@ end @test merge(mixed) === mixed @test keys(merge(mixed, s)) == (:one, :two, :extradim, :three) @test keys(merge(s, mixed)) == (:one, :two, :three, :extradim) - @test keys(merge(s, (:new=>da4,))) == (:one, :two, :three, :new) end @testset "setindex" begin From fe0bda91b91bd58aad566b180104df84d706d4bb Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Thu, 30 Oct 2025 02:38:28 +0100 Subject: [PATCH 22/23] Implement `Base.instantiate` - take 2 (#1118) * implement `instantiate` - get rid of BasicDimensionalStyle * fix setindex! for opaquearray to make some error messages clearer * fix materialize! --- src/array/broadcast.jl | 136 +++++++++++++++++------------------------ src/opaque.jl | 4 +- 2 files changed, 57 insertions(+), 83 deletions(-) diff --git a/src/array/broadcast.jl b/src/array/broadcast.jl index 01416763d..63c473dba 100644 --- a/src/array/broadcast.jl +++ b/src/array/broadcast.jl @@ -1,4 +1,4 @@ -import Base.Broadcast: BroadcastStyle, DefaultArrayStyle, Style +import Base.Broadcast: BroadcastStyle, DefaultArrayStyle, Style, AbstractArrayStyle, Unknown const STRICT_BROADCAST_CHECKS = Ref(true) const STRICT_BROADCAST_DOCS = """ @@ -35,10 +35,9 @@ strict_broadcast!(x::Bool) = STRICT_BROADCAST_CHECKS[] = x # It preserves the dimension names. # `S` should be the `BroadcastStyle` of the wrapped type. # Copied from NamedDims.jl (thanks @oxinabox). -struct BasicDimensionalStyle{N} <: AbstractArrayStyle{Any} end - -struct DimensionalStyle{S<:BroadcastStyle} <: AbstractArrayStyle{Any} end -DimensionalStyle(::S) where {S} = DimensionalStyle{S}() +struct DimensionalStyle{S <: AbstractArrayStyle, N} <: AbstractArrayStyle{N} end +DimensionalStyle(::S) where S<:AbstractArrayStyle{N} where N = DimensionalStyle{S, N}() +DimensionalStyle(::S) where {S<:DimensionalStyle} = S() # avoid nested dimensionalstyle DimensionalStyle(::S, ::Val{N}) where {S,N} = DimensionalStyle(S(Val(N))) DimensionalStyle(::Val{N}) where N = DimensionalStyle{DefaultArrayStyle{N}}() function DimensionalStyle(a::BroadcastStyle, b::BroadcastStyle) @@ -51,86 +50,59 @@ function DimensionalStyle(a::BroadcastStyle, b::BroadcastStyle) end end -function BroadcastStyle(::Type{<:AbstractDimArray{T,N,D,A}}) where {T,N,D,A} - inner_style = typeof(BroadcastStyle(A)) - return DimensionalStyle{inner_style}() -end -BroadcastStyle(::Type{<:AbstractBasicDimArray{T,N}}) where {T,N} = - BasicDimensionalStyle{N}() - +BroadcastStyle(::Type{<:AbstractDimArray{T,N,D,A}}) where {T,N,D,A} = + DimensionalStyle(BroadcastStyle(A)) +BroadcastStyle(::Type{<:AbstractBasicDimArray{T,N,D}}) where {T,N,D} = + DimensionalStyle(DefaultArrayStyle{N}()) BroadcastStyle(::DimensionalStyle, ::Base.Broadcast.Unknown) = Unknown() -BroadcastStyle(::Base.Broadcast.Unknown, ::DimensionalStyle) = Unknown() BroadcastStyle(::DimensionalStyle{A}, ::DimensionalStyle{B}) where {A, B} = DimensionalStyle(A(), B()) -BroadcastStyle(::DimensionalStyle{A}, b::Style) where {A} = DimensionalStyle(A(), b) -BroadcastStyle(a::Style, ::DimensionalStyle{B}) where {B} = DimensionalStyle(a, B()) +BroadcastStyle(::DimensionalStyle{A}, b::AbstractArrayStyle{N}) where {A,N} = DimensionalStyle(A(), b) +BroadcastStyle(::DimensionalStyle{A}, b::DefaultArrayStyle{N}) where {A,N} = DimensionalStyle(A(), b) # ambiguity BroadcastStyle(::DimensionalStyle{A}, b::Style{Tuple}) where {A} = DimensionalStyle(A(), b) -BroadcastStyle(a::Style{Tuple}, ::DimensionalStyle{B}) where {B} = DimensionalStyle(a, B()) -# We need to implement copy because if the wrapper array type does not -# support setindex then the `similar` based default method will not work -function Broadcast.copy(bc::Broadcasted{DimensionalStyle{S}}) where S - A = _firstdimarray(bc) - data = copy(_unwrap_broadcasted(bc)) - - A isa Nothing && return data # No AbstractDimArray - - bdims = _broadcasted_dims(bc) - _comparedims_broadcast(A, bdims...) - - data isa AbstractArray || return data # result is a scalar - # unwrap AbstractDimArray data - data = data isa AbstractDimArray ? parent(data) : data - dims = format(Dimensions.promotedims(bdims...; skip_length_one=true), data) - return rebuild(A; data, dims, refdims=refdims(A), name=Symbol("")) -end -function Broadcast.copy(bc::Broadcasted{BasicDimensionalStyle{N}}) where N +# override base instantiate to check dimensions as well as axes +@inline function Broadcast.instantiate(bc::Broadcasted{<:DimensionalStyle{S}}) where S A = _firstdimarray(bc) - data = collect(bc) - A isa Nothing && return data # No AbstractDimArray - + # check if there is any DimArray and unwrap immediately if no + isnothing(A) && return Broadcast.instantiate(_unwrap_broadcasted(bc)) bdims = _broadcasted_dims(bc) + if bc.axes isa Nothing + axes = Base.Broadcast.combine_axes(_unwrap_broadcasted(bc).args...) + ds = Dimensions.promotedims(bdims...; skip_length_one=true) + length(axes) == length(ds) || + throw(ArgumentError("Number of broadcasted dimensions $(length(axes)) larger than $(ds)")) + axes = map(Dimensions.DimUnitRange, axes, ds) + else # bc already has axes which might have dimensions, e.g. when assigning to a DimArray + axes = bc.axes + Base.Broadcast.check_broadcast_axes(axes, bc.args...) + ds = dims(axes) + isnothing(ds) || _comparedims_broadcast(A, ds, bdims...) + end _comparedims_broadcast(A, bdims...) - - data isa AbstractArray || return data # result is a scalar - - # Return an AbstractDimArray - dims = format(Dimensions.promotedims(bdims...; skip_length_one=true), data) - return dimconstructor(dims)(data, dims; refdims=refdims(A), name=Symbol("")) + return Broadcasted(bc.style, bc.f, bc.args, axes) end - -function Base.copyto!(dest::AbstractArray, bc::Broadcasted{DimensionalStyle{S}}) where S - fda = _firstdimarray(bc) - isnothing(fda) || _comparedims_broadcast(fda, _broadcasted_dims(bc)...) - copyto!(dest, _unwrap_broadcasted(bc)) -end -function Base.copyto!(dest::AbstractArray, bc::Broadcasted{BasicDimensionalStyle{N}}) where N - fda = _firstdimarray(bc) - isnothing(fda) || _comparedims_broadcast(fda, _broadcasted_dims(bc)...) - copyto!(dest, bc) -end - -@inline function Base.Broadcast.materialize!(dest::AbstractDimArray, bc::Base.Broadcast.Broadcasted{<:Any}) - # Need to check whether the dims are compatible in dest, - # which are already stripped when sent to copyto! - _comparedims_broadcast(dest, dims(dest), _broadcasted_dims(bc)...) - style = DimensionalData.DimensionalStyle(Base.Broadcast.combine_styles(parent(dest), bc)) - Base.Broadcast.materialize!(style, parent(dest), bc) - return dest -end - -function Base.similar(bc::Broadcast.Broadcasted{DimensionalStyle{S}}, ::Type{T}) where {S,T} +# Define copy because the inner style S might override copy (e.g. DiskArrays) +function Base.copy(bc::Broadcasted{<:DimensionalStyle{S}}) where S + data = copy(_unwrap_broadcasted(bc)) + data isa AbstractArray || return data # in the 0-d case data can be a scalar + # let similar do the work - it will usually call rebuild unless A isa AbstractBasicDimArray A = _firstdimarray(bc) - data = similar(_unwrap_broadcasted(bc), T, size(bc)) - dims, refdims = slicedims(A, axes(bc)) - return rebuild(A; data, dims, refdims, name=Symbol("")) + similar(A; data, dims = dims(axes(bc))) end -function Base.similar(bc::Broadcast.Broadcasted{BasicDimensionalStyle{N}}, ::Type{T}) where {N,T} +# similar is usually only called in broadcast_preserving_zero_d +function Base.similar(bc::Broadcasted{<:DimensionalStyle{S}}, ::Type{T}) where {S,T} A = _firstdimarray(bc) - data = similar(A, T, size(bc)) - dims, refdims = slicedims(A, axes(bc)) - return dimconstructor(dims)(data, dims; refdims, name=Symbol("")) + data = similar(_unwrap_broadcasted(bc), T) + similar(A; data, dims = dims(axes(bc))) end +@inline function Base.materialize!(::DimensionalStyle, dest, bc::Broadcasted) + # check dimensions + bci = Broadcast.instantiate(Broadcasted(bc.style, bc.f, bc.args, axes(dest))) + # unwrap before copying + Base.copyto!(_unwrap_broadcasted(dest), _unwrap_broadcasted(bci)) + return dest +end """ @d broadcast_expression options @@ -407,20 +379,20 @@ end # Recursively unwraps `AbstractDimArray`s and `DimensionalStyle`s. # replacing the `AbstractDimArray`s with the wrapped array, # and `DimensionalStyle` with the wrapped `BroadcastStyle`. -function _unwrap_broadcasted(bc::Broadcasted{DimensionalStyle{S}}) where S + +function _unwrap_broadcasted(bc::Broadcasted{<:DimensionalStyle{S}}) where {S} innerargs = map(_unwrap_broadcasted, bc.args) - return Broadcasted{S}(bc.f, innerargs) + return Broadcasted{S}(bc.f, innerargs, _unwrap_broadcasted(bc.axes)) end _unwrap_broadcasted(x) = x _unwrap_broadcasted(nda::AbstractDimArray) = parent(nda) -_unwrap_broadcasted(boda::BroadcastOptionsDimArray) = parent(parent(boda)) - +_unwrap_broadcasted(bda::AbstractBasicDimArray) = OpaqueArray(bda) +_unwrap_broadcasted(boda::BroadcastOptionsDimArray) = _unwrap_broadcasted(parent(boda)) +_unwrap_broadcasted(t::Tuple) = map(_unwrap_broadcasted, t) +_unwrap_broadcasted(du::Dimensions.DimUnitRange) = parent(du) # Get the first dimensional array in the broadcast _firstdimarray(x::Broadcasted) = _firstdimarray(x.args) -_firstdimarray(x::Tuple{<:AbstractBasicDimArray,Vararg}) = x[1] -_firstdimarray(x::AbstractBasicDimArray) = x -_firstdimarray(ext::Base.Broadcast.Extruded) = _firstdimarray(ext.x) -function _firstdimarray(x::Tuple{<:Union{Broadcasted,Base.Broadcast.Extruded},Vararg}) +function _firstdimarray(x::Tuple) found = _firstdimarray(x[1]) if found isa Nothing _firstdimarray(tail(x)) @@ -428,8 +400,10 @@ function _firstdimarray(x::Tuple{<:Union{Broadcasted,Base.Broadcast.Extruded},Va found end end -_firstdimarray(x::Tuple) = _firstdimarray(tail(x)) _firstdimarray(x::Tuple{}) = nothing +_firstdimarray(ext::Base.Broadcast.Extruded) = _firstdimarray(ext.x) +_firstdimarray(x::AbstractBasicDimArray) = x +_firstdimarray(x) = nothing # Make sure all arrays have the same dims, and return them _broadcasted_dims(bc::Broadcasted) = _broadcasted_dims(bc.args...) diff --git a/src/opaque.jl b/src/opaque.jl index 87c234f30..752d0e778 100644 --- a/src/opaque.jl +++ b/src/opaque.jl @@ -15,5 +15,5 @@ OpaqueArray(st::P) where P<:AbstractDimStack{<:Any,T,N} where {T,N} = OpaqueArra Base.size(A::OpaqueArray) = size(A.parent) Base.getindex(A::OpaqueArray, I::Union{StandardIndices,Not{<:StandardIndices}}...) = Base.getindex(A.parent, I...) -Base.setindex!(A::OpaqueArray, I::Union{StandardIndices,Not{<:StandardIndices}}...) = - Base.setindex!(A.parent, I...) \ No newline at end of file +Base.setindex!(A::OpaqueArray, x, I::Union{StandardIndices,Not{<:StandardIndices}}...) = + Base.setindex!(A.parent, x, I...) \ No newline at end of file From b9f5216cb9e97dd8a57c3c9df8b2d97640d6bab3 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 4 Nov 2025 14:17:15 +1100 Subject: [PATCH 23/23] StandardIndices methods should be last (#1129) --- src/array/indexing.jl | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/array/indexing.jl b/src/array/indexing.jl index 3971b87c3..438eb8424 100644 --- a/src/array/indexing.jl +++ b/src/array/indexing.jl @@ -25,21 +25,6 @@ for f in (:getindex, :view, :dotview) Base.$f(parent(A), i) end @eval begin - ### Standard indices - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, I::CartesianIndex) = - Base.$f(A, to_indices(A, (I,))...) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, I::CartesianIndex) = - Base.$f(A, to_indices(A, (I,))...) - @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::IntegerOrCartesian, i2::IntegerOrCartesian, Is::IntegerOrCartesian...) = - Base.$f(A, to_indices(A, (i1, i2, Is...))...) - # 1D DimArrays dont need linear indexing - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::Union{Colon,AbstractArray{<:Integer}}) = - rebuildsliced(Base.$f, A, (i,)) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, I::CartesianIndices) = rebuildsliced(Base.$f, A, (I,)) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, I::CartesianIndices) = rebuildsliced(Base.$f, A, (I,)) - @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::StandardIndices, i2::StandardIndices, Is::StandardIndices...) = - rebuildsliced(Base.$f, A, to_indices(A, (i1, i2, Is...))) - ### Selector/Interval indexing @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::SelectorOrInterval) = Base.$f(A, dims2indices(A, (i,))...) @@ -93,6 +78,21 @@ for f in (:getindex, :view, :dotview) Dimensions._extradimswarn((d1, ds...)) return rebuildsliced(Base.$f, A, ()) end + + ### Standard indices + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, I::CartesianIndex) = + Base.$f(A, to_indices(A, (I,))...) + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, I::CartesianIndex) = + Base.$f(A, to_indices(A, (I,))...) + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::IntegerOrCartesian, i2::IntegerOrCartesian, Is::IntegerOrCartesian...) = + Base.$f(A, to_indices(A, (i1, i2, Is...))...) + # 1D DimArrays dont need linear indexing + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::Union{Colon,AbstractArray{<:Integer}}) = + rebuildsliced(Base.$f, A, (i,)) + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, I::CartesianIndices) = rebuildsliced(Base.$f, A, (I,)) + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, I::CartesianIndices) = rebuildsliced(Base.$f, A, (I,)) + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::StandardIndices, i2::StandardIndices, Is::StandardIndices...) = + rebuildsliced(Base.$f, A, to_indices(A, (i1, i2, Is...))) end ##### AbstractDimArray only methods @@ -231,4 +231,4 @@ Base.@assume_effects :foldable @inline _simplify_dim_indices() = () # We only own this to_indices dispatch for AbstractBasicDimArray Base.to_indices(A::AbstractBasicDimArray, inds, (r, args...)::Tuple{<:Type,Vararg}) = - (Lookups._to_index(inds[1], r), to_indices(A, Base.tail(inds), args)...) \ No newline at end of file + (Lookups._to_index(inds[1], r), to_indices(A, Base.tail(inds), args)...)