diff --git a/Project.toml b/Project.toml index 392bf01df..d25c8f06c 100644 --- a/Project.toml +++ b/Project.toml @@ -16,6 +16,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Flatten = "4c728ea3-d9ee-5c9a-9642-b6f7d7dc04fa" GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f" GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +GeometryOps = "3251bfac-6a57-4b6d-aa61-ac1fef2975ab" GeometryOpsCore = "05efe853-fabf-41c8-927e-7063c8b9f013" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" @@ -25,6 +26,7 @@ ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" +SortTileRecursiveTree = "746ee33f-1797-42c2-866d-db2fce69d14d" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [weakdeps] @@ -69,6 +71,7 @@ GeoDataFrames = "0.3" GeoFormatTypes = "0.4" GeoInterface = "1.0" GeometryBasics = "0.4" +GeometryOps = "0.1.19" GeometryOpsCore = "0.1.1" Makie = "0.20, 0.21, 0.22" Missings = "0.4, 1" @@ -83,6 +86,7 @@ Reexport = "0.2, 1.0" SafeTestsets = "0.1" Setfield = "0.6, 0.7, 0.8, 1" Shapefile = "0.10, 0.11" +SortTileRecursiveTree = "0.1.1" Statistics = "1" StatsBase = "0.34" Test = "1" @@ -112,3 +116,8 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", "GeoDataFrames", "GeometryBasics", "GRIBDatasets", "Makie", "NCDatasets", "Plots", "Proj", "RasterDataSources", "SafeTestsets", "Shapefile", "StableRNGs", "StatsBase", "Test", "ZarrDatasets"] + + +[sources] +DimensionalData = {url = "https://github.com/rafaqz/DimensionalData.jl", rev = "as/individualindexing"} +GeometryOps = {url = "https://github.com/JuliaGeo/GeometryOps.jl", rev = "as/extentforwarding_for_predicates"} \ No newline at end of file diff --git a/ext/RastersMakieExt/plotrecipes.jl b/ext/RastersMakieExt/plotrecipes.jl index fac962fb9..342b1c13b 100644 --- a/ext/RastersMakieExt/plotrecipes.jl +++ b/ext/RastersMakieExt/plotrecipes.jl @@ -332,3 +332,34 @@ _prepare_dimarray(A) = DimArray(map(x -> _convert_with_missing(x, missingval(A)) _convert_with_missing(x::Real, missingval) = isequal(x, missingval) || ismissing(x) ? NaN32 : Float32(x) _convert_with_missing(x, missingval) = isequal(x, missingval) ? missing : x + +Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} = nothing +Makie.expand_dimensions(::Makie.NoConversion, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} = nothing + +function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} + geometries = lookup(only(dims(A))).data + color = replace_missing(A, NaN).data + label = string(Rasters.DD.name(A)) + isempty(label) && (label = string(Rasters.DD.name(only(dims(A))))) + + return Makie.SpecApi.Poly( + geometries; + color = color, + label = label + ) +end + + +Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Base.SkipMissing{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing +Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Rasters.SkipMissingVal{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing + +Makie.expand_dimensions(::Makie.NoConversion, A::Base.SkipMissing{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing +Makie.expand_dimensions(::Makie.NoConversion, A::Rasters.SkipMissingVal{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing + +function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Base.SkipMissing{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} + return Makie.convert_arguments(Makie.Poly, A.x[ismissing.(A.x)]) +end + +function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Rasters.SkipMissingVal{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} + return Makie.convert_arguments(Makie.Poly, A.x[ismissing.(A.x)]) +end \ No newline at end of file diff --git a/src/Rasters.jl b/src/Rasters.jl index 7480ae533..afd5db110 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -21,6 +21,7 @@ import Adapt, FillArrays, Flatten, GeoInterface, + GeometryOps, GeometryOpsCore, OffsetArrays, ProgressMeter, @@ -29,6 +30,7 @@ import Adapt, RecipesBase, Reexport, Setfield, + SortTileRecursiveTree, Statistics Reexport.@reexport using DimensionalData, GeoFormatTypes @@ -62,8 +64,8 @@ export Planar, Spherical export AbstractRaster, Raster export AbstractRasterStack, RasterStack export AbstractRasterSeries, RasterSeries -export Projected, Mapped -export Band +export Projected, Mapped, GeometryLookup +export Band, Geometry export missingval, boolmask, missingmask, replace_missing, replace_missing!, aggregate, aggregate!, disaggregate, disaggregate!, mask, mask!, resample, warp, zonal, crop, extend, trim, slice, combine, points, @@ -76,6 +78,7 @@ export Extent, extent const DD = DimensionalData const DA = DiskArrays const GI = GeoInterface +const GO = GeometryOps const LA = Lookups # DimensionalData documentation urls @@ -119,6 +122,11 @@ const RasterSeriesOrStack = Union{AbstractRasterSeries,AbstractRasterStack} include("utils.jl") include("skipmissing.jl") +include("geometry_lookup/geometry_lookup.jl") +include("geometry_lookup/lookups.jl") +include("geometry_lookup/methods.jl") +include("geometry_lookup/io.jl") + include("table_ops.jl") include("create.jl") include("read.jl") diff --git a/src/crs.jl b/src/crs.jl index 9cd094596..d32d828d4 100644 --- a/src/crs.jl +++ b/src/crs.jl @@ -9,12 +9,26 @@ coordinate reference system at all. See [`setcrs`](@ref) to set it manually. """ function GeoInterface.crs(obj::Union{<:AbstractRaster,<:AbstractRasterStack,<:AbstractRasterSeries, <:DimTuple}) - if hasdim(obj, Y) - crs(dims(obj, Y)) - elseif hasdim(obj, X) - crs(dims(obj, X)) + each_dim_crs = map(crs, dims(obj)) + firstcrs = findfirst(!isnothing, each_dim_crs) + if isnothing(firstcrs) + return nothing else - nothing + for (dim, crs) in zip(dims(obj), each_dim_crs) + if !isnothing(crs) && crs !== each_dim_crs[firstcrs] + throw(ArgumentError(""" + All dimensions must have the same crs, but dims $(name(dim)) and $(name(dims(obj, firstcrs))) + have different CRS: + $(each_dim_crs[firstcrs]) + + and + + $(crs) + """ + )) + end + end + return each_dim_crs[firstcrs] end end GeoInterface.crs(dim::Dimension) = crs(lookup(dim)) diff --git a/src/dimensions.jl b/src/dimensions.jl index 56f2cc7fb..4fec2e82c 100644 --- a/src/dimensions.jl +++ b/src/dimensions.jl @@ -8,7 +8,7 @@ const SpatialDim = Union{XDim,YDim,ZDim} Band [`Dimension`]($DDdimdocs) for multi-band rasters. -## Example: +## Example ```julia banddim = Band(10:10:100) # Or @@ -18,3 +18,23 @@ mean(A; dims=Band) ``` """ @dim Band + +""" + Geometry <: Dimension + + Geometry(geoms) + +Geometry [`Dimension`]($DDdimdocs) for vector data cubes. + +## Example +```julia +geomdim = Geometry(GeometryLookup(polygons)) +# Or +val = A[Geometry(1)] +# Or +val = A[Geometry(Touches(other_geom))] # this is automatically accelerated by spatial trees! +# Or +mean(A; dims=Geometry) +``` +""" +@dim Geometry \ No newline at end of file diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl new file mode 100644 index 000000000..762bf451c --- /dev/null +++ b/src/geometry_lookup/geometry_lookup.jl @@ -0,0 +1,169 @@ +""" + GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) + + +A lookup type for geometry dimensions in vector data cubes. + +`GeometryLookup` provides efficient spatial indexing and lookup for geometries using an STRtree (Sort-Tile-Recursive tree). +It is used as the lookup type for geometry dimensions in vector data cubes, enabling fast spatial queries and operations. + +It spans the dimensions given to it in `dims`, as well as the dimension it's wrapped in - you would construct a DimArray with a GeometryLookup +like `DimArray(data, Geometry(GeometryLookup(data, dims)))`. Here, `Geometry` is a dimension - but selectors in X and Y will also eventually work! + + +# Examples + +```julia +using Rasters + +using NaturalEarth +import GeometryOps as GO + +# construct the polygon lookup +polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry +polygon_lookup = GeometryLookup(polygons, (X(), Y())) + +# create a DimArray with the polygon lookup +dv = rand(Geometry(polygon_lookup)) + +# select the polygon with the centroid of the 88th polygon +dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] # true +``` + +""" +struct GeometryLookup{T, A <: AbstractVector{T}, D, M <: GO.Manifold, Tree, CRS} <: DD.Dimensions.MultiDimensionalLookup{T} + manifold::M + data::A + tree::Tree + dims::D + crs::CRS +end + +function GeometryLookup(data, dims=(X(), Y()); geometrycolumn=nothing, crs=nokw, tree=nokw) + + # First, retrieve the geometries - from a table, vector of geometries, etc. + geometries = _get_geometries(data, geometrycolumn) + geometries = Missings.disallowmissing(geometries) + + if isnokw(crs) + crs = GI.crs(data) + if isnothing(crs) + crs = GI.crs(first(geometries)) + end + end + + # Check that the geometries are actually geometries + if any(!GI.isgeometry, geometries) + throw(ArgumentError(""" + The collection passed in to `GeometryLookup` has some elements that are not geometries + (`GI.isgeometry(x) == false` for some `x` in `data`). + """)) + end + # Make sure there are only two dimensions + if length(dims) != 2 + throw(ArgumentError(""" + The `dims` argument to `GeometryLookup` must have two dimensions, but it has $(length(dims)) dimensions (`$(dims)`). + Please make sure that it has only two dimensions, like `(X(), Y())`. + """)) + end + # Build the lookup accelerator tree + tree = if isnokw(tree) + SortTileRecursiveTree.STRtree(geometries) + elseif GO.SpatialTreeInterface.isspatialtree(tree) + if tree isa DataType + tree(geometries) + else + tree + end + elseif isnothing(tree) + nothing + else + throw(ArgumentError(""" + Got an argument for `tree` which is not a valid spatial tree (according to `GeometryOps.SpatialTreeInterface`) + nor `nokw` or `nothing` + + Type is $(typeof(tree)) + """)) + end + # TODO: auto manifold detection and best tree type for that manifold + GeometryLookup(GO.Planar(), geometries, tree, dims, crs) +end + +GeoInterface.crs(l::GeometryLookup) = l.crs +setcrs(l::GeometryLookup, crs) = rebuild(l; crs) + +#= + +## DD methods for the lookup + +Here we define DimensionalData's methods for the lookup. +This is broadly standard except for the `rebuild` method, which is used to update the tree accelerator when the data changes. +=# + +DD.dims(l::GeometryLookup) = l.dims +DD.dims(d::DD.Dimension{<: GeometryLookup}) = val(d).dims +DD.order(::GeometryLookup) = Lookups.Unordered() +DD.parent(lookup::GeometryLookup) = lookup.data +# TODO: format for geometry lookup +DD.Dimensions.format(l::GeometryLookup, D::Type, values, axis::AbstractRange) = l + +# Make sure that the tree is rebuilt if the data changes +function DD.rebuild( + lookup::GeometryLookup; + data=lookup.data, tree=nokw, + dims=lookup.dims, crs=nokw, + manifold=nokw, metadata=nokw + ) + # TODO: metadata support for geometry lookup + new_tree = if isnokw(tree) + if data == lookup.data + lookup.tree + elseif isempty(data) + nothing + else + SortTileRecursiveTree.STRtree(data) + end + elseif GO.SpatialTreeInterface.isspatialtree(tree) + if tree isa DataType + tree(data) + else + tree + end + else + SortTileRecursiveTree.STRtree(data) + end + new_crs = if isnokw(crs) + data_crs = GI.crs(data) + if isnothing(data_crs) + lookup.crs + else + data_crs + end + else + crs + end + + new_manifold = if isnokw(manifold) + lookup.manifold + else + manifold + end + + GeometryLookup(new_manifold, Missings.disallowmissing(data), new_tree, dims, new_crs) +end + + +# total_area_of_intersection = 0.0 +# current_area_of_intersection = 0.0 +# last_point = nothing +# apply_with_signal(trait, geom) do subgeom, state +# if state == :start +# total_area_of_intersection += current_area_of_intersection +# current_area_of_intersection = 0.0 +# last_point = nothing +# elseif state == :continue +# # shoelace formula for this point +# elseif state == :end +# # finish off the shoelace formula +# end +# end \ No newline at end of file diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl new file mode 100644 index 000000000..4986f99db --- /dev/null +++ b/src/geometry_lookup/io.jl @@ -0,0 +1,334 @@ +#= +# Geometry encoding and decoding from CF conventions + +Encode functions will always return a named tuple with the standard +=# +function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) + if all(x -> GI.trait(x) isa GI.PointTrait, geoms) + return (; + node_coordinates_x = GI.x.(geoms), + node_coordinates_y = GI.y.(geoms), + ) + end + # else: we have some multipolygons in here + npoints = sum(GI.npoint, geoms) + flat_xs = Vector{Float64}(undef, npoints) + flat_ys = Vector{Float64}(undef, npoints) + + i::Int = 0 + # flatten is guaranteed to iterate in order. + flattener = GO.flatten(GI.PointTrait, geoms) do point + i += 1 + flat_xs[i] = GI.x(point) + flat_ys[i] = GI.y(point) + end + # iterate over flattener to populate the arrays, + # without allocating an extra array. + foreach(identity, flattener) + + return (; + node_coordinates_x = flat_xs, + node_coordinates_y = flat_ys, + node_count = GI.npoint.(geoms) + ) +end + + +function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) + # There is a fast path without encoding part_node_count if all geoms are linestrings. + npoints = sum(GI.npoint, geoms) + flat_xs = Vector{Float64}(undef, npoints) + flat_ys = Vector{Float64}(undef, npoints) + + i::Int = 0 + # flatten is guaranteed to iterate in order. + flattener = GO.flatten(GI.PointTrait, geoms) do point + i += 1 + flat_xs[i] = GI.x(point) + flat_ys[i] = GI.y(point) + end + # iterate over flattener to populate the arrays, + # without allocating an extra array. + foreach(identity, flattener) + + # If all geoms are linestrings, we can take a fast path. + if all(x -> GI.trait(x) isa GI.LineStringTrait, geoms) + return (; + node_coordinates_x = flat_xs, + node_coordinates_y = flat_ys, + node_count = GI.npoint.(geoms) + ) + end + + # Otherwise, we need to encode part_node_count for multilinestrings. + return (; + node_coordinates_x = flat_xs, + node_coordinates_y = flat_ys, + part_node_count = collect(GO.flatten(GI.npoint, GI.LineStringTrait, geoms)), + node_count = GI.npoint.(geoms) + ) +end + +function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) + + ngeoms = length(geoms) + nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0, threaded = false) + n_points_per_geom_vec = GI.npoint.(geoms) + total_n_points = sum(n_points_per_geom_vec) - nrings + + # Create a vector of the total number of points + xs = fill(0.0, total_n_points) + ys = fill(0.0, total_n_points) + + node_count_vec = fill(0, ngeoms) + part_node_count_vec = fill(0, nrings) + interior_ring_vec = fill(0, nrings) + + current_xy_index = 1 + current_ring_index = 1 + + for (i, geom) in enumerate(geoms) + + this_geom_npoints = GI.npoint(geom) + # Bear in mind, that the last point (which == first point) + # of the linear ring is removed when encoding, so not included + # in the node count. + node_count_vec[i] = this_geom_npoints - GI.nring(geom) + + # push individual components of the ring + for poly in GO.flatten(GI.PolygonTrait, geom) + exterior_ring = GI.getexterior(poly) + for point_idx in 1:GI.npoint(exterior_ring)-1 + point = GI.getpoint(exterior_ring, point_idx) + xs[current_xy_index] = GI.x(point) + ys[current_xy_index] = GI.y(point) + current_xy_index += 1 + end + # Main.@infiltrate + part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring)-1 + interior_ring_vec[current_ring_index] = 0 + current_ring_index += 1 + + if GI.nring(poly) == 1 + continue + else + for hole in GI.gethole(poly) + for point_idx in 1:GI.npoint(hole)-1 + point = GI.getpoint(hole, point_idx) + xs[current_xy_index] = GI.x(point) + ys[current_xy_index] = GI.y(point) + current_xy_index += 1 + end + part_node_count_vec[current_ring_index] = GI.npoint(hole)-1 + interior_ring_vec[current_ring_index] = 1 + current_ring_index += 1 + end + end + end + end + # Note: this does not encode the `lat` and `lon` variables that might hold a representative point of the polygon, like a centroid. + # The names in this named tuple are standard CF conventions. + # node_coordinates_x and node_coordinates_y are the coordinates of the nodes of the rings. + # but cf encodes them as a space separated string. That's the only difference. + return (; + node_coordinates_x = xs, + node_coordinates_y = ys, + node_count = node_count_vec, + part_node_count = part_node_count_vec, + interior_ring = interior_ring_vec + ) +end + +#= +function _def_dim_var!(ds::AbstractDataset, dim::Dimension{<: GeometryLookup}) + dimname = lowercase(string(DD.name(dim))) + haskey(ds.dim, dimname) && return nothing + CDM.defDim(ds, dimname, length(dim)) + lookup(dim) isa NoLookup && return nothing + attribdict = _attribdict(NoMetadata()) + + + CDM.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib=attrib) + +end +=# + + +function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, ds, geometry_container_attribs; crs = nothing) + # First of all, we assert certain things about the geometry container and what it has. + @assert haskey(ds, geometry_container_attribs["node_count"]) + node_count_var = ds[geometry_container_attribs["node_count"]] + # only(CDM.dimnames(node_count_var)) != u_dim_name && throw(ArgumentError("node_count variable $u_dim_name does not match the unknown dimension $u_dim_name")) + + # Load and create all the data we need. + node_count = collect(node_count_var) + node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) + + # We can take a fast path for polygons, if we know that there are no multipart polygons. + if !haskey(geometry_container_attribs, "part_node_count") + node_count_stops = cumsum(node_count) + node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] + return map(node_count_starts, node_count_stops) do start, stop + GI.Polygon([GI.LinearRing(node_coordinates[start:stop]; crs)]; crs) + end + end + + + part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) + interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) + + # First, we assemble all the rings. That's the slightly complex part. + # After rings are assembled, we assemble the polygons and multipolygons from the rings. + + # Initialize variables for ring assembly + start = 1 + stop = part_node_count[1] + rings = [node_coordinates[start:stop]] + push!(rings[end], node_coordinates[start]) + + # Assemble all rings + for i in 2:length(part_node_count) + start = stop + 1 + stop = start + part_node_count[i] - 1 + push!(rings, node_coordinates[start:stop]) + # Ensure rings are closed by adding the first point at the end + push!(rings[end], node_coordinates[start]) + end + + # Now, we proceed to assemble the polygons and multipolygons from the rings. + # TODO: no better way to get the tuple type, at least for now. + _lr = GI.LinearRing([(0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0)]; crs) + _p = GI.Polygon([_lr]; crs) + _mp = GI.MultiPolygon([_p]; crs) + geoms = Vector{typeof(_mp)}(undef, length(node_count)) + # Assemble multipolygons + current_ring = 1 + for (geom_idx, total_nodes) in enumerate(node_count) + # Find all rings that belong to this polygon + polygon_rings = Tuple{typeof(_lr), Int8}[] + accumulated_nodes = 0 + + while current_ring <= length(part_node_count) && accumulated_nodes < total_nodes + ring = rings[current_ring] + push!(polygon_rings, (GI.LinearRing(ring; crs), interior_ring[current_ring])) + accumulated_nodes += part_node_count[current_ring] + current_ring += 1 + end + + # Create polygons from rings + polygons = typeof(_p)[] + current_exterior = nothing + current_holes = typeof(_lr)[] + + for (ring, is_interior) in polygon_rings + if is_interior == 0 + # If we have a previous exterior ring, create a polygon with it and its holes + if !isnothing(current_exterior) + push!(polygons, GI.Polygon([current_exterior, current_holes...]; crs)) + current_holes = typeof(_lr)[] + end + current_exterior = ring + else + push!(current_holes, ring) + end + end + + # Add the last polygon if we have an exterior ring + if !isnothing(current_exterior) + push!(polygons, GI.Polygon([current_exterior, current_holes...]; crs)) + end + + # Create multipolygon from all polygons + geoms[geom_idx] = GI.MultiPolygon(polygons; crs) + end + + return geoms + +end + + + +function _geometry_cf_decode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, ds, geometry_container_attribs; crs = nothing) + @assert haskey(ds, geometry_container_attribs["node_count"]) + node_count_var = ds[geometry_container_attribs["node_count"]] + + # Load and create all the data we need. + node_count = collect(node_count_var) + node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) + + # we can use a fast path for lines, if we know that there are no multipart lines. + if !haskey(geometry_container_attribs, "part_node_count") + node_count_stops = cumsum(node_count) + node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] + return GI.LineString.(getindex.((node_coordinates,), (:).(node_count_starts, node_count_stops)); crs) + end + + # otherwise, we need to decode the multipart lines. + part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) + + # Initialize variables for line assembly + start = 1 + stop = part_node_count[1] + lines = [node_coordinates[start:stop]] + + # Assemble all lines + for i in 2:length(part_node_count) + start = stop + 1 + stop = start + part_node_count[i] - 1 + push!(lines, node_coordinates[start:stop]) + end + + # Now assemble the multilinestrings + _ls = GI.LineString(node_coordinates[1:2]; crs) + _mls = GI.MultiLineString([_ls]; crs) + geoms = Vector{typeof(_mls)}(undef, length(node_count)) + + # Assemble multilinestrings + current_line = 1 + for (geom_idx, total_nodes) in enumerate(node_count) + # Find all lines that belong to this multilinestring + multilinestring_lines = typeof(_ls)[] + accumulated_nodes = 0 + + while current_line <= length(part_node_count) && accumulated_nodes < total_nodes + line = lines[current_line] + push!(multilinestring_lines, GI.LineString(line; crs)) + accumulated_nodes += part_node_count[current_line] + current_line += 1 + end + + # Create multilinestring from all lines + geoms[geom_idx] = GI.MultiLineString(multilinestring_lines; crs) + end + + return geoms +end + +function _geometry_cf_decode(::Union{GI.PointTrait, GI.MultiPointTrait}, ds, geometry_container_attribs; crs = nothing) + + node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) + # We can take a fast path for points, if we know that there are no multipoints + if haskey(geometry_container_attribs, "node_count") + @assert haskey(ds, geometry_container_attribs["node_count"]) + node_count_var = ds[geometry_container_attribs["node_count"]] + node_count = collect(node_count_var) + # The code below could be a fast path, but we don't want + # to arbitrarily change the output type of the decoder. + # MultiPoints should always roundtrip and write as multipoints. + # if !all(==(1), node_count) + # do nothing + # else + # return a fast path + # end + # we have multipoints + node_count_stops = cumsum(node_count) + node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] + return map(node_count_starts, node_count_stops) do start, stop + GI.MultiPoint(node_coordinates[start:stop]; crs) + end + end + + # finally, if we have no node count, or all node counts are 1, we just return the points + return GI.Point.(node_coordinates; crs) + +end \ No newline at end of file diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl new file mode 100644 index 000000000..3d63ea93e --- /dev/null +++ b/src/geometry_lookup/lookups.jl @@ -0,0 +1,157 @@ +#= +# Lookups methods + +Here we define the methods for the Lookups API. +The main entry point is `selectindices`, which is used to select the indices of the geometries that match the selector. + +We need to define methods that take selectors and convert them to extents, then GeometryOps needs +=# + +# Bounds - get the bounds of the lookup +Lookups.bounds(lookup::GeometryLookup) = if isempty(lookup.data) + Extents.Extent(NamedTuple{DD.name.(lookup.dims)}(ntuple(2) do i; (nothing, nothing); end)) +else + if isnothing(lookup.tree) + mapreduce(GI.extent, Extents.union, lookup.data) + else + GI.extent(lookup.tree) + end +end + +# Return an `Int` or Vector{Bool} +# Base case: got a standard index that can go into getindex on a base Array +Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel +# other cases: +# - decompose selectors +function Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) + selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) +end +function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K + dimsel = map(rebuild, map(name2dim, K), values(sel)) + selectindices(lookup, dimsel) +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) + if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) + i = findfirst(x -> all(map(Lookups._matches, sel, x)), lookup) + isnothing(i) && _coord_not_found_error(sel) + return i + else + return [Lookups._matches(sel, x) for x in lookup] + end +end + +# Selector implementations that use geometry (like Contains, Touches, etc.) +""" + _maybe_get_candidates(lookup, selector_extent) + +Get the candidates for the selector extent. If the selector extent is disjoint from the tree rootnode extent, return an error. +""" +function _maybe_get_candidates(lookup::GeometryLookup, selector_extent) + tree = lookup.tree + isnothing(tree) && return 1:length(lookup) + Extents.disjoint(GI.extent(tree), selector_extent) && return Int[] + potential_candidates = GO.SpatialTreeInterface.query( + tree, + Base.Fix1(Extents.intersects, selector_extent) + ) + isempty(potential_candidates) && return Int[] + return potential_candidates +end + +function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + filter(potential_candidates) do candidate + GO.contains(lookup.data[candidate], val(sel)) + end +end + +function Lookups.selectindices(lookup::GeometryLookup, sel::At) + if GI.trait(val(sel)) isa GI.PointTrait + Lookups.selectindices(lookup, (At(GI.x(val(sel))), At(GI.y(val(sel))))) + else # invoke the default method + Lookups.at(lookup, sel) + end +end + +function Lookups.selectindices(lookup::GeometryLookup, sel::Near) + geom = val(sel) + @assert GI.isgeometry(geom) + # TODO: temporary + @assert GI.trait(geom) isa GI.PointTrait "Only point geometries are supported for the near lookup at this point! We will add more geometry support in the future." + + # Get the nearest geometry + # TODO: this sucks! Use some branch and bound algorithm + # on the spatial tree instead. + # if pointtrait + return findmin(x -> GO.distance(geom, x), lookup.data)[2] + # else + # findmin(x -> GO.distance(GO.GEOS(), geom, x), lookup.data)[2] + # end + # this depends on LibGEOS being installed. + +end + + +function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) + sel_ext = GI.extent(val(sel)) + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + return filter(potential_candidates) do candidate + GO.intersects(lookup.data[candidate], val(sel)) + end +end + +function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{ <: Touches}, Union{ <: Touches}}) + target_ext = Extents.Extent(X = (first(xs), last(xs)), Y = (first(ys), last(ys))) + potential_candidates = _maybe_get_candidates(lookup, target_ext) + return filter(potential_candidates) do candidate + GO.intersects(lookup.data[candidate], target_ext) + end +end + + +function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: DD.IntervalSets.ClosedInterval}, Union{<: DD.IntervalSets.ClosedInterval}}) + target_ext = Extents.Extent(X = extrema(xs), Y = extrema(ys)) + potential_candidates = _maybe_get_candidates(lookup, target_ext) + filter(potential_candidates) do candidate + GO.covers(target_ext, lookup.data[candidate]) + end +end + +function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: At, <: Contains, <: Real}, Union{<: At, <: Contains, <: Real}}) + xval, yval = val(xs), val(ys) + + lookup_ext = Lookups.bounds(lookup) + + if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] + potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) + isempty(potential_candidates) && return Int[] + filter(potential_candidates) do candidate + GO.contains(lookup.data[candidate], (xval, yval)) + end + else + return Int[] + end +end + +@inline Lookups.reducelookup(l::GeometryLookup) = NoLookup(OneTo(1)) + +function Lookups.show_properties(io::IO, mime, lookup::GeometryLookup) + print(io, " ") + show(IOContext(io, :inset => "", :dimcolor => 244), mime, DD.basedims(lookup)) +end + +# Dimension methods + +@inline _reducedims(lookup::GeometryLookup, dim::DD.Dimension) = + rebuild(dim, [map(x -> zero(x), dim.val[1])]) + +function DD._format(dim::DD.Dimension{<:GeometryLookup}, axis::AbstractRange) + checkaxis(dim, axis) + return dim +end + +# Local functions +_val_or_nothing(::Nothing) = nothing +_val_or_nothing(d::DD.Dimension) = val(d) + + diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl new file mode 100644 index 000000000..d1a6cd79a --- /dev/null +++ b/src/geometry_lookup/methods.jl @@ -0,0 +1,75 @@ +#= +## Reproject + +Reproject just forwards to `GO.reproject`. Since regular reproject here in Rasters and GO reproject both need Proj, +this is not too bad. +=# + +function reproject(target::GeoFormat, l::GeometryLookup) + source = l.crs + # TODO: allow GDAL reproject for its antimeridian cutting + return rebuild(l; data = GO.reproject(l.data; source_crs = source, target_crs = target, always_xy = true), crs = target) +end + +#= +## Zonal + +Zonal with a geometry lookup or a geometry dimension should return a vector data cube. +=# +function _zonal(f, x::RasterStackOrArray, ::Nothing, data::GeometryLookup; kw...) + return _zonal(f, x, nothing, Geometry(data); kw...) +end +function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dimension{<: GeometryLookup}; + progress=true, threaded=true, geometrycolumn=nothing, spatialslices, kw... +) + geoms = data.val.data + # TODO: deliberately filter geoms based on extent and tree + # so that we don't waste time descending through the pipeline + # for geometries that are outside the extent of the raster + # but that is for later. + + if istrue(spatialslices) && data.val.dims != (X(), Y()) && dims(x, data.val.dims) != dims(x, (Val{DD.XDim}(), Val{DD.YDim}())) + spatialslices = dims(x, data.val.dims) # use the dimensions in the geometry lookup! + end + + n = length(geoms) + n == 0 && return [] + zs, start_index = _alloc_zonal(f, x, geoms, n; spatialslices, kw...) + if start_index != n + 1 + _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i + zs[i] = _zonal(f, x, geoms[i]; spatialslices, kw...) + end + end + + return_lookup_dims = if spatialslices isa DD.AllDims + dims(data, spatialslices) + elseif istrue(spatialslices) + dims(data, (Val{DD.XDim}(), Val{DD.YDim}())) + else # fallback + (X(), Y()) + end + # Note here that e.g. `X()` is actually `X(:)`. That's why we rebuild the dims with colons - + # so that we get a "neutral materialized dimension" out of it. + return_lookup = rebuild(lookup(data); dims = rebuild.(return_lookup_dims, (:,))) + + return_dimension = rebuild(data, return_lookup) + + if zs isa AbstractVector{<: Union{<: AbstractDimArray, Missing}} + return _cat_and_rebuild_parent(x, zs, return_dimension) + elseif zs isa AbstractVector{<: Union{<: AbstractDimStack, Missing}} + dimarrays = NamedTuple{names(st)}( + ntuple(length(names(st))) do i + _cat_and_rebuild_parent(layers(st)[i], (layers(z)[i] for z in zs), return_dimension) + end + ) + return rebuild(x; data = dimarrays, dims = (dims(first(zs))..., return_dimension)) + else + return Raster(zs, (return_dimension,)) + end +end + +#= +## Crop + +Cropping a geometry lookup to either a geometry lookup, a geometry, or a bounding box should return a geometry lookup. +=# diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl new file mode 100644 index 000000000..d5ebc7c20 --- /dev/null +++ b/src/geometry_lookup/nc_io.jl @@ -0,0 +1,130 @@ +using NCDatasets +import NCDatasets.CommonDataModel as CDM +import NCDatasets.NetCDF_jll + +using Test + +import Rasters +import GeoInterface as GI + +#= +var = ds["someData"] + +knowndims = Rasters._dims(var) + +unknowndims_idxs = findall(Rasters.isnolookup ∘ Rasters.lookup, knowndims) + +if length(unknowndims_idxs) > 1 + throw(ArgumentError("Only one unknown dimension is supported")) +elseif length(unknowndims_idxs) == 0 + return knowndims +end + +u_idx = only(unknowndims_idxs) + +u_dim_name = CDM.dimnames(var)[u_idx] + +has_geometry = haskey(CDM.attribs(var), "geometry") +if !has_geometry + throw(ArgumentError("Variable $u_dim_name does not have a geometry attribute")) +end +=# + +# generate dataset +output_path = tempname() * ".nc" +testfile = "multipolygons.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] + +geometry_container_attribs = CDM.attribs(geometry_container_var) + +haskey(geometry_container_attribs, "geometry_type") && +geometry_container_attribs["geometry_type"] == "polygon" || +throw(ArgumentError("We only support polygon geometry types at this time, got $geometry_type")) + +geoms = Rasters._geometry_cf_decode(GI.PolygonTrait(), ds, geometry_container_attribs) + +encoded = Rasters._geometry_cf_encode(GI.PolygonTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.node_count == ds["node_count"] +@test encoded.part_node_count == ds["part_node_count"] +@test encoded.interior_ring == ds["interior_ring"] + +# lines now + +output_path = tempname() * ".nc" +testfile = "lines.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] +geometry_container_attribs = CDM.attribs(geometry_container_var) +geoms = Rasters._geometry_cf_decode(GI.LineStringTrait(), ds, geometry_container_attribs) + +encoded = Rasters._geometry_cf_encode(GI.LineStringTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.node_count == ds["node_count"] +@test !hasproperty(encoded, :part_node_count) + + +output_path = tempname() * ".nc" +testfile = "multilines.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] +geometry_container_attribs = CDM.attribs(geometry_container_var) +geoms = Rasters._geometry_cf_decode(GI.MultiLineStringTrait(), ds, geometry_container_attribs) + +encoded = Rasters._geometry_cf_encode(GI.MultiLineStringTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.part_node_count == ds["part_node_count"] +@test encoded.node_count == ds["node_count"] + + + + +output_path = tempname() * ".nc" +testfile = "multipoints.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) + +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] +geometry_container_attribs = CDM.attribs(geometry_container_var) +geoms = Rasters._geometry_cf_decode(GI.MultiPointTrait(), ds, geometry_container_attribs) + +encoded = Rasters._geometry_cf_encode(GI.MultiPointTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.node_count == ds["node_count"] + + +output_path = tempname() * ".nc" +testfile = "points.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) + +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] +geometry_container_attribs = CDM.attribs(geometry_container_var) +geoms = Rasters._geometry_cf_decode(GI.PointTrait(), ds, geometry_container_attribs) + +encoded = Rasters._geometry_cf_encode(GI.PointTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test !hasproperty(encoded, :node_count) \ No newline at end of file diff --git a/src/methods/extract.jl b/src/methods/extract.jl index d54fecbcd..3b120271f 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -170,10 +170,46 @@ Base.@constprop :aggressive @inline function extract(x::RasterStackOrArray, data gs = _get_geometries(data, geometrycolumn) gs, (isempty(gs) || all(ismissing, gs)) ? missing : first(Base.skipmissing(gs)) end + # Taken from zonal.jl and _mapspatialslices there. + spatialdims = (Val{DD.XDim}(), Val{DD.YDim}()) + dimswewant = DD.otherdims(x, spatialdims) + + if isempty(dimswewant) # the raster is 2 dimensional in spatial dims only + e = Extractor(x, g1; name, skipmissing, flatten, id, geometry, index) + id_init = 1 + return _extract(x, e, id_init, g; kw...) + else # the raster has other dims than spatial dims, so we need to slice through them + # and return...that's right...a VECTOR DATA CUBE! + # Taken from zonal.jl and _mapspatialslices there. + # just get the first index of the "other" dims. + # This assumes they are nonzero in length but that seems reasonable, for now. + slicedims = rebuild.(dims(x, dimswewant), 1) + ras_for_extractor_construction = view(x, slicedims...) + @assert isfalse(skipmissing) "skipmissing must be false for >2d data cubes" + # TODO: this is inefficient, because it's doing the burning once per slice + # instead, it should do it once total and then reuse, even if that allocates. + extractor = Extractor(ras_for_extractor_construction, g1; name, skipmissing = false, flatten, id, geometry, index) + id_init = 1 + result_for_each_spatial_slice = _mapspatialslices(x; spatialdims) do xy_ras + id_init = 1 + res1 = _extract(xy_ras, extractor, id_init, g; kw...) + # Remove the geometry from the result + if isa(res1, AbstractArray) + return Base.structdiff.(res1, ((; geometry=1,),)) + else + return Base.structdiff(res1, (; geometry=1,),) + end + end - e = Extractor(x, g1; name, skipmissing, flatten, id, geometry, index) - id_init = 1 - return _extract(x, e, id_init, g; kw...) + if GI.isgeometry(data) || GI.isfeature(data) + rebuild(result_for_each_spatial_slice; refdims = Geometry(data)) + else # featurecollection, table, vector. + backing_array = __do_cat_with_last_dim_multidim_version(result_for_each_spatial_slice) + backing_dim = Geometry(GeometryLookup(g)) + return rebuild(result_for_each_spatial_slice; data = backing_array, dims = (backing_dim, dims(result_for_each_spatial_slice)...)) + end + end + end # TODO use a GeometryOpsCore method like `applyreduce` here? diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index f522a4ad2..1f91c5189 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -23,6 +23,11 @@ These can be used when `of` is or contains (a) GeoInterface.jl compatible object `f` will be passed an iterator over the values, which loses all spatial information. if `false` `f` will be passes a masked `Raster` or `RasterStack`, and will be responsible for handling missing values itself. The default value is `true`. +- `spatialslices`: if `true`, and the input Raster has more dimensions than `X` and `Y`, then we will + apply the function `f` to each spatial slice of the raster (literally, `mapslices(f, x; dims = (X, Y))`), + and return a vector data cube or stack of the results. + if `false`, then we will apply `f` to the full cropped raster, and return a vector of results (one per geometry) + as usual. # Example @@ -60,15 +65,15 @@ insertcols!(january_stats, 1, :country => first.(split.(countries.ADMIN, r"[^A-Z 3 columns and 243 rows omitted ``` """ -function zonal(f, x::RasterStack; of, skipmissing=true, kw...) +function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=_False(), missingval=isnothing(missingval(x)) ? missing : missingval(x), kw...) # TODO: open currently doesn't work so well for large rasterstacks, # we need to fix that before we can go back to this being a single method # on `RasterStackOrArray`. - _zonal(f, _prepare_for_burning(x), of; skipmissing, kw...) + _zonal(f, _prepare_for_burning(x), of; skipmissing, spatialslices, missingval, kw...) end -function zonal(f, x::Raster; of, skipmissing=true, kw...) +function zonal(f, x::Raster; of, skipmissing=true, spatialslices=_False(), missingval=isnothing(missingval(x)) ? missing : missingval(x), kw...) open(x) do xo - _zonal(f, _prepare_for_burning(xo), of; skipmissing, kw...) + _zonal(f, _prepare_for_burning(xo), of; skipmissing, spatialslices, missingval, kw...) end end @@ -77,13 +82,14 @@ _zonal(f, x::RasterStackOrArray, of::RasterStackOrArray; kw...) = _zonal(f, x::RasterStackOrArray, of::DimTuple; kw...) = _zonal(f, x, Extents.extent(of); kw...) # We don't need to `mask` with an extent, it's square so `crop` will do enough. -_zonal(f, x::Raster, of::Extents.Extent; skipmissing) = - _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) -function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing) +_zonal(f, x::Raster, of::Extents.Extent; skipmissing, spatialslices, missingval) = _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), crop(x; to=of, touches=true), skipmissing) +function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing, spatialslices, missingval) cropped = crop(x; to=ext, touches=true) - length(cropped) > 0 || return missing + if length(cropped) == 0 && skipmissing == true + return map(_ -> missingval, x) + end return maplayers(cropped) do A - _maybe_skipmissing_call(f, A, skipmissing) + _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices, missingval), A, skipmissing) end end # Otherwise of is a geom, table or vector @@ -94,57 +100,199 @@ _zonal(f, x, ::GI.AbstractFeatureCollectionTrait, fc; kw...) = _zonal(f, x::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...) = _zonal(f, x, GI.geometry(feature); kw...) function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; - skipmissing, kw... + skipmissing, spatialslices, missingval, kw... ) cropped = crop(x; to=geom, touches=true) - length(cropped) > 0 || return missing - masked = mask(cropped; with=geom, kw...) - return _maybe_skipmissing_call(f, masked, skipmissing) + masked = if length(cropped) == 0 + if istrue(skipmissing) && isfalse(spatialslices) + return missingval + end + cropped # don't mask if we know there is nothing to mask, otherwise it errors + else + mask(cropped; with=geom, kw...) + end + return _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices, missingval), masked, skipmissing) end function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; - skipmissing, kw... + skipmissing, spatialslices, missingval, kw... ) cropped = crop(st; to=geom, touches=true) - length(cropped) > 0 || return map(_ -> missing, st) - masked = mask(cropped; with=geom, kw...) + masked = if length(cropped) == 0 + if istrue(skipmissing) && isfalse(spatialslices) + return map(_ -> missingval, st) + end + cropped # don't mask if we know there is nothing to mask, otherwise it errors + else + mask(cropped; with=geom, kw...) + end return maplayers(masked) do A - length(A) > 0 || return missing - _maybe_skipmissing_call(f, A, skipmissing) + if length(A) == 0 && (istrue(skipmissing) && isfalse(spatialslices)) + return missingval + end + _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices, missingval), A, skipmissing) end end function _zonal(f, x::RasterStackOrArray, ::Nothing, data; - progress=true, threaded=true, geometrycolumn=nothing, kw... + progress=true, threaded=true, geometrycolumn=nothing, missingval, kw... ) geoms = _get_geometries(data, geometrycolumn) n = length(geoms) n == 0 && return [] - zs, start_index = _alloc_zonal(f, x, geoms, n; kw...) + zs, start_index = _alloc_zonal(f, x, geoms, n; missingval, kw...) start_index == n + 1 && return zs _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i - zs[i] = _zonal(f, x, geoms[i]; kw...) + zs[i] = _zonal(f, x, geoms[i]; missingval, kw...) + end + + return_dimension = Dim{:Geometry}(axes(zs, 1)) + + if zs isa AbstractVector{<: Union{<: AbstractDimArray, Missing}} + return _cat_and_rebuild_parent(x, zs, return_dimension) + elseif zs isa AbstractVector{<: Union{<: AbstractDimStack, Missing}} + dimarrays = NamedTuple{names(st)}( + ntuple(length(names(st))) do i + _cat_and_rebuild_parent(layers(st)[i], (layers(z)[i] for z in zs), return_dimension) + end + ) + return rebuild(x; data = dimarrays, dims = (dims(first(zs))..., return_dimension)) end return zs end -function _alloc_zonal(f, x, geoms, n; kw...) - # Find first non-missing entry and count number of missing entries +function _alloc_zonal(f, x, geoms, n; spatialslices = _True(), missingval, kw...) n_missing::Int = 0 - z1 = _zonal(f, x, first(geoms); kw...) - for geom in geoms - z1 = _zonal(f, x, geom; kw...) - if !ismissing(z1) - break + if isfalse(spatialslices) + # Find first non-missing entry and count number of missing entries + z1 = _zonal(f, x, first(geoms); spatialslices, missingval, kw...) + for geom in geoms + z1 = _zonal(f, x, geom; spatialslices, missingval, kw...) + if !(ismissing(z1) || z1 === missingval) + break + end + n_missing += 1 end - n_missing += 1 - end - zs = Vector{Union{Missing,typeof(z1)}}(undef, n) - zs[1:n_missing] .= missing - # Exit early when all elements are missing - if n_missing == n + zs = Vector{Union{typeof(missingval), typeof(z1)}}(undef, n) + zs[1:n_missing] .= missingval + # Exit early when all elements are missing + if n_missing == n + return zs, n_missing + 1 + end + zs[n_missing + 1] = z1 return zs, n_missing + 1 + else # spatialslices is true, we know we need an output raster + z1 = _zonal(f, x, first(geoms); spatialslices, missingval, kw...) + _missing_array = z1 + for geom in geoms + z1 = _zonal(f, x, geom; spatialslices, missingval, kw...) + if !all(ismissing, z1) # here, z1 is a raster, so it's fine - but maybe we should have a better check... + break + end + n_missing += 1 + end + # TODO: just bite the bullet and use map. alloc_zonal is a mess. + zs = Vector{Union{typeof(z1), typeof(_missing_array)}}(undef, n) + for i in 1:n + zs[i] = _missing_array + end + # Exit early when all elements are missing + if n_missing == n + return zs, n_missing + 1 + end + zs[n_missing + 1] = z1 + return zs, n_missing + 1 + end +end + +# Optionally wrap the input argument in `skipmissing(A)` is `sm` is true. +_maybe_skipmissing_call(f, A, sm) = istrue(sm) ? f(skipmissing(A)) : f(A) + +# the only reason we have AbstractDimArray here is to make sure that DD.otherdims is available. +# We could probably get away with just AbstractArray here otherwise. +# The reason this is not just mapslices is because this drops the sliced dimensions automatically, +# which is what we want. +function _mapspatialslices(f, x::AbstractDimArray; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}()), missingval = missingval(x)) + dimswewant = DD.otherdims(x, spatialdims) + if isempty(dimswewant) + return f(x) end - zs[n_missing + 1] = z1 - return zs, n_missing + 1 + slicedims = rebuild.(dims(x, dimswewant), axes.((x,), dimswewant)) + if any(isempty, DD.dims(x, spatialdims)) + # If any of the spatial dims are empty, we can just return a constant missing array + # this way we don't construct the dimslices at all... + missing_array = FillArrays.Fill{Union{typeof(missingval), eltype(x)}, length(dimswewant)}(missingval, length.(dimswewant)) + return rebuild(x; data = missing_array, dims = dimswewant, refdims = ()) + end + iterator = (rebuild(x; data = d, dims = dims(d)) for d in DD.DimSlices(x; dims = slicedims, drop = true)) + return rebuild(x; data = f.(iterator), dims = dimswewant, refdims = ()) +end +# SkipMissingVal and SkipMissing both store the initial value in the `x` property, +# so we can use the same thing to extract it. +function _mapspatialslices(f, s::Union{SkipMissingVal, Base.SkipMissing}; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}()), missingval = missingval(s.x)) + return _mapspatialslices(f ∘ skipmissing, s.x; spatialdims, missingval) +end + + +_maybe_spatialsliceify(f, spatialslices, missingval = missing) = istrue(spatialslices) ? _SpatialSliceify(f, (Val{DD.XDim}(), Val{DD.YDim}()), missingval) : f +_maybe_spatialsliceify(f, spatialslices::DD.AllDims, missingval = missing) = _SpatialSliceify(f, spatialslices, missingval) + +""" + _SpatialSliceify(f, dims) + +A callable struct that applies `mapslices(f, x; dims = spatialdims)` to the input array `x`, and removes empty dimensions. + +```jldoctest +data = ones(10, 10, 10, 10); +f = _SpatialSliceify(sum, (1, 2)) +size(f(data)) + +# output +(10, 10) +``` +""" +struct _SpatialSliceify{F, D, M} + f::F + dims::D + missingval::M +end + +(r::_SpatialSliceify{F, D, M})(x) where {F, D, M} = _mapspatialslices(r.f, x; spatialdims = r.dims, missingval = r.missingval) + +# This is a helper function that concatenates an array of arrays along their last dimension. +# and returns a ConcatDiskArray so that it doesn't allocate at all.\ +# Users can always rechunk later. But this saves us a lot of time when doing datacube ops. +# And the chunk pattern is available in the concat diskarray. +function __do_cat_with_last_dim(input_arrays) + # This assumes that the input array is a vector of arrays. + As = Missings.disallowmissing(collect(input_arrays)) + dims = ndims(first(As)) + 1 + sz = ntuple(dims) do i + i == dims ? length(As) : 1 + end + cdas = reshape(As, sz) + backing_array = DiskArrays.ConcatDiskArray(cdas) + return backing_array +end + +function __do_cat_with_last_dim_multidim_version(As) + # This CANNOT assume that the input array is a vector of arrays. + new_n_dims = ndims(As) + ndims(first(As)) + sz = ntuple(new_n_dims) do i + i <= ndims(first(As)) ? 1 : size(As, i-1) + end + cdas = reshape(As, sz) + backing_array = DiskArrays.ConcatDiskArray(cdas) + return backing_array +end +# This is a wrapper around the helper function that performs the final cat and rebuild, but on +# a dimarray. +function _cat_and_rebuild_parent(parent, children, newdim) + backing_array = __do_cat_with_last_dim(children) # see zonal.jl for implementation + children_dims = dims(first(children)) + final_dims = DD.format((children_dims..., newdim), backing_array) + return rebuild(parent; data = backing_array, dims = final_dims) end -_maybe_skipmissing_call(f, A, sm) = sm ? f(skipmissing(A)) : f(A) +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 1}},)) +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 2}},)) +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 3}},)) +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 4}},)) diff --git a/src/utils.jl b/src/utils.jl index fb2329544..ef6c4e081 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -459,15 +459,15 @@ _progress(args...; kw...) = ProgressMeter.Progress(args...; dt=0.1, color=:blue, @noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) # Run `f` threaded or not, w -function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) +function _run(f, range::OrdinalRange, threaded, progress::Bool, desc::String) p = progress ? _progress(length(range); desc) : nothing - if threaded - Threads.@threads :static for i in range + if isfalse(threaded) + for i in range f(i) isnothing(p) || ProgressMeter.next!(p) end else - for i in range + Threads.@threads for i in range f(i) isnothing(p) || ProgressMeter.next!(p) end @@ -553,6 +553,13 @@ istrue(::_True) = true istrue(::_False) = false istrue(::Type{_True}) = true istrue(::Type{_False}) = false +istrue(x) = x == true + +isfalse(::_True) = false +isfalse(::_False) = true +isfalse(::Type{_True}) = false +isfalse(::Type{_False}) = true +isfalse(x) = x == false # skipinvalid: can G and I be missing. skipmissing: can nametypes be missing _rowtype(x, g, args...; kw...) = _rowtype(x, typeof(g), args...; kw...) diff --git a/test/data/lines.ncgen b/test/data/lines.ncgen new file mode 100644 index 000000000..e277f32bc --- /dev/null +++ b/test/data/lines.ncgen @@ -0,0 +1,51 @@ +netcdf lines { +dimensions: + instance = 2 ; + node = 5 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + int datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:longitude_of_prime_meridian = 0.0 ; + datum:semi_major_axis = 6378137.0 ; + datum:inverse_flattening = 298.257223563 ; + int geometry_container ; + geometry_container:geometry_type = "line" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + int node_count(instance) ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + lat = 30, 50 ; + lon = 10, 60 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + node_count = 3, 2 ; + x = 30, 10, 40, 50, 50 ; + y = 10, 30, 40, 60, 50 ; +} \ No newline at end of file diff --git a/test/data/multilines.ncgen b/test/data/multilines.ncgen new file mode 100644 index 000000000..4f1873185 --- /dev/null +++ b/test/data/multilines.ncgen @@ -0,0 +1,60 @@ +netcdf multilines { +dimensions: + node = 12 ; + instance = 2 ; + part = 4 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + float geometry_container ; + geometry_container:geometry_type = "line" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + geometry_container:grid_mapping = "datum" ; + geometry_container:coordinates = "lat lon" ; + geometry_container:part_node_count = "part_node_count" ; + geometry_container:interior_ring = "interior_ring" ; + int node_count(instance) ; + int part_node_count(part) ; + int interior_ring(part) ; + float datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:semi_major_axis = 6378137. ; + datum:inverse_flattening = 298.257223563 ; + datum:longitude_of_prime_meridian = 0. ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + x = 20, 10, 0, 5, 10, 15, 20, 10, 0, 50, 40, 30 ; + y = 0, 15, 0, 5, 10, 5, 20, 35, 20, 0, 15, 0 ; + lat = 25, 7 ; + lon = 10, 40 ; + node_count = 9, 3 ; + part_node_count = 3, 3, 3, 3 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + +} diff --git a/test/data/multipoints.ncgen b/test/data/multipoints.ncgen new file mode 100644 index 000000000..5eff70acf --- /dev/null +++ b/test/data/multipoints.ncgen @@ -0,0 +1,51 @@ +netcdf multipoints { +dimensions: + instance = 2 ; + node = 5 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + int datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:longitude_of_prime_meridian = 0.0 ; + datum:semi_major_axis = 6378137.0 ; + datum:inverse_flattening = 298.257223563 ; + int node_count(instance) ; + int geometry_container ; + geometry_container:geometry_type = "point" ; + geometry_container:node_coordinates = "x y" ; + geometry_container:node_count = "node_count" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + lat = 30, 50 ; + lon = 10, 60 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + node_count = 3, 2 ; + x = 30, 10, 40, 50, 50 ; + y = 10, 30, 40, 60, 50 ; +} \ No newline at end of file diff --git a/test/data/multipolygons.ncgen b/test/data/multipolygons.ncgen new file mode 100644 index 000000000..599f4371e --- /dev/null +++ b/test/data/multipolygons.ncgen @@ -0,0 +1,61 @@ +netcdf multipolygons { +dimensions: + node = 12 ; + instance = 2 ; + part = 4 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + float geometry_container ; + geometry_container:geometry_type = "polygon" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + geometry_container:grid_mapping = "datum" ; + geometry_container:coordinates = "lat lon" ; + geometry_container:part_node_count = "part_node_count" ; + geometry_container:interior_ring = "interior_ring" ; + int node_count(instance) ; + int part_node_count(part) ; + int interior_ring(part) ; + float datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:semi_major_axis = 6378137. ; + datum:inverse_flattening = 298.257223563 ; + datum:longitude_of_prime_meridian = 0. ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + x = 20, 10, 0, 5, 10, 15, 20, 10, 0, 50, 40, 30 ; + y = 0, 15, 0, 5, 10, 5, 20, 35, 20, 0, 15, 0 ; + lat = 25, 7 ; + lon = 10, 40 ; + node_count = 9, 3 ; + part_node_count = 3, 3, 3, 3 ; + interior_ring = 0, 1, 0, 0 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + +} diff --git a/test/data/points.ncgen b/test/data/points.ncgen new file mode 100644 index 000000000..1b8e56c9f --- /dev/null +++ b/test/data/points.ncgen @@ -0,0 +1,48 @@ +netcdf points { +dimensions: + instance = 2 ; + node = 2 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + int datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:longitude_of_prime_meridian = 0.0 ; + datum:semi_major_axis = 6378137.0 ; + datum:inverse_flattening = 298.257223563 ; + int geometry_container ; + geometry_container:geometry_type = "point" ; + geometry_container:node_coordinates = "x y" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + lat = 30, 50 ; + lon = 10, 60 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + x = 30, 50; + y = 10, 60; +} \ No newline at end of file diff --git a/test/geometry_lookup.jl b/test/geometry_lookup.jl new file mode 100644 index 000000000..9b4f33314 --- /dev/null +++ b/test/geometry_lookup.jl @@ -0,0 +1,76 @@ +using NaturalEarth +using Test + +using Rasters, DimensionalData +using Rasters.Lookups +import Proj +import GeometryOps as GO, GeoInterface as GI +using Extents + +@testset "construction" begin + # fetch land polygons from Natural Earth + country_polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry + # create a DimVector from the land polygons + gl = GeometryLookup(country_polygons) + @test crs(gl) == EPSG(4326) + @test all(GO.equals, zip(val(gl), country_polygons)) +end + +@testset "reprojecting a GeometryLookup" begin + # fetch land polygons from Natural Earth + country_polygons = NaturalEarth.naturalearth("admin_0_countries", 110) + # create a DimVector from the land polygons + dv = Raster(rand(Dim{:Geometry}(GeometryLookup(country_polygons)))) + # reproject the full vector data cube (vector data vector, in this case :D) + target_crs = ProjString("+proj=wintri +type=crs") + reprojected_via_rasters = val(dims(reproject(target_crs, dv), Dim{:Geometry})) + reprojected_via_geometryops = GO.reproject(country_polygons; source_crs = EPSG(4326), target_crs = target_crs) + # test that the reprojected geometries are the same + @test all(splat(GO.equals), zip( + reprojected_via_rasters, # reproject the vdc, get the geometries from it + reprojected_via_geometryops # reproject the geometries directly + ) + ) +end + +# @testset "Tree types" begin +ras2d = Raster( + rand( + X(Sampled(-180:179, sampling = Intervals(Start()))), + Y(Sampled(-90:89, sampling = Intervals(Start()))), + ); + crs = EPSG(4326) +) + +@testset "Different tree types" begin + country_polygons = NaturalEarth.naturalearth("admin_0_countries", 110) + country_lookup = GeometryLookup(country_polygons) + country_lookup_notree = GeometryLookup(country_polygons; tree = nothing) + country_lookup_flat_notree = GeometryLookup(country_polygons; tree = GO.SpatialTreeInterface.FlatNoTree) + + results = zonal(sum, ras2d; of = country_polygons, threaded = false) + + results_regular = zonal(sum, ras2d; of = country_lookup) + results_notree = zonal(sum, ras2d; of = country_lookup_notree) + results_flat_notree = zonal(sum, ras2d; of = country_lookup_flat_notree) + + @testset "Zonal" begin + @test results_regular isa Raster + @test hasdim(results_regular, Geometry) + @test lookup(results_regular, Geometry) isa GeometryLookup + @test val(lookup(results_regular, Geometry)) == country_lookup + @test val(lookup(results_regular, Geometry)) == country_polygons.geometry + + @test results == results_regular + @test results_regular == results_notree + @test results_regular == results_flat_notree + end + + @testset "Selectors" begin + @testset "Touches that is contained in a geometry" begin + usa_extent = Extent(X = (103, 104), Y = (44, 45)) # squarely within the USA + # broken because of GeometryOps????? + @test_broken only(DimensionalData.dims2indices(results_regular, Geometry(Touches(usa_extent)))) == findfirst(==("USA"), country_polygons.ADM0_A3) + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 9b897b63f..b7678c441 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -29,6 +29,7 @@ end @time @safetestset "warp" begin include("warp.jl") end @time @safetestset "resample" begin include("resample.jl") end @time @safetestset "cellarea" begin include("cellarea.jl") end +@time @safetestset "geometrylookups" begin include("geometry_lookups.jl") end @time @safetestset "sources" begin include("sources/sources.jl") end @time @safetestset "commondatamodel" begin include("sources/commondatamodel.jl") end