From de5548a2b4d474f1e6f488c6c954fbd9901a5b15 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 26 Nov 2024 20:10:45 +0100 Subject: [PATCH 01/11] faster line extract --- src/methods/burning/line.jl | 102 +++++++++++++++++---------------- src/methods/burning/polygon.jl | 14 ++--- src/methods/extract.jl | 50 +++++++++++++++- 3 files changed, 109 insertions(+), 57 deletions(-) diff --git a/src/methods/burning/line.jl b/src/methods/burning/line.jl index 9155479bb..2584d888e 100644 --- a/src/methods/burning/line.jl +++ b/src/methods/burning/line.jl @@ -1,33 +1,52 @@ # _burn_lines! # Fill a raster with `fill` where pixels touch lines in a geom # Separated for a type stability function barrier -function _burn_lines!(B::AbstractRaster, geom; fill=true, kw...) - _check_intervals(B) +function _burn_lines!(B::AbstractRaster, geom; fill=true, verbose=false, kw...) + _check_intervals(B, verbose) B1 = _prepare_for_burning(B) - return _burn_lines!(B1, geom, fill) + + xdim, ydim = dims(B, DEFAULT_POINT_ORDER) + regular = map((xdim, ydim)) do d + # @assert (parent(lookup(d)) isa Array) + lookup(d) isa AbstractSampled && span(d) isa Regular + end + msg = """ + Can only fill lines where dimensions have `Regular` lookups. + Consider using `boundary=:center`, reprojecting the crs, + or make an issue in Rasters.jl on github if you need this to work. + """ + all(regular) || throw(ArgumentError(msg)) + + # For arbitrary dimension indexing + + _burn_lines!(dims(B), geom) do D + @inbounds B[D] = fill + end end -_burn_lines!(B, geom, fill) = - _burn_lines!(B, GI.geomtrait(geom), geom, fill) -function _burn_lines!(B::AbstractArray, ::Union{GI.MultiLineStringTrait}, geom, fill) +_burn_lines!(f::F, dims::Tuple, geom) where F<:Function = + _burn_lines!(f, dims, GI.geomtrait(geom), geom) +function _burn_lines!( + f::F, dims::Tuple, ::Union{GI.MultiLineStringTrait}, geom +) where F<:Function n_on_line = 0 for linestring in GI.getlinestring(geom) - n_on_line += _burn_lines!(B, linestring, fill) + n_on_line += _burn_lines!(f, dims, linestring) end return n_on_line end function _burn_lines!( - B::AbstractArray, ::Union{GI.MultiPolygonTrait,GI.PolygonTrait}, geom, fill -) + f::F, dims::Tuple, ::Union{GI.MultiPolygonTrait,GI.PolygonTrait}, geom +) where F<:Function n_on_line = 0 for ring in GI.getring(geom) - n_on_line += _burn_lines!(B, ring, fill) + n_on_line += _burn_lines!(f, dims, ring) end return n_on_line end function _burn_lines!( - B::AbstractArray, ::GI.AbstractCurveTrait, linestring, fill -) + f::F, dims::Tuple, ::GI.AbstractCurveTrait, linestring +) where F<:Function isfirst = true local firstpoint, laststop n_on_line = 0 @@ -46,19 +65,19 @@ function _burn_lines!( stop=(x=GI.x(point), y=GI.y(point)), ) laststop = line.stop - n_on_line += _burn_line!(B, line, fill) + n_on_line += _burn_line!(f, dims, line) end return n_on_line end function _burn_lines!( - B::AbstractArray, t::GI.LineTrait, line, fill -) + f::F, dims::Tuple, t::GI.LineTrait, line +) where F<:Function p1, p2 = GI.getpoint(t, line) line1 = ( start=(x=GI.x(p1), y=GI.y(p1)), stop=(x=GI.x(p2), y=GI.y(p2)), ) - return _burn_line!(B, line1, fill) + return _burn_line!(f, dims, line1) end # _burn_line! @@ -67,25 +86,18 @@ end # Burns a single line into a raster with value where pixels touch a line # # TODO: generalise to Irregular spans? -function _burn_line!(A::AbstractRaster, line, fill) - xdim, ydim = dims(A, DEFAULT_POINT_ORDER) - regular = map((xdim, ydim)) do d - @assert (parent(lookup(d)) isa Array) - lookup(d) isa AbstractSampled && span(d) isa Regular - end - msg = """ - Can only fill lines where dimensions have `Regular` lookups. - Consider using `boundary=:center`, reprojecting the crs, - or make an issue in Rasters.jl on github if you need this to work. - """ - all(regular) || throw(ArgumentError(msg)) +function _burn_line!(f::Function, dims::Tuple, line::NamedTuple) + xdim, ydim = dims + + @assert xdim isa XDim + @assert ydim isa YDim @assert order(xdim) == order(ydim) == Lookups.ForwardOrdered() @assert locus(xdim) == locus(ydim) == Lookups.Center() - raster_x_step = abs(step(span(A, X))) - raster_y_step = abs(step(span(A, Y))) + raster_x_step = abs(step(span(xdim))) + raster_y_step = abs(step(span(ydim))) raster_x_offset = @inbounds xdim[1] - raster_x_step / 2 # Shift from center to start of pixel raster_y_offset = @inbounds ydim[1] - raster_y_step / 2 @@ -117,15 +129,14 @@ function _burn_line!(A::AbstractRaster, line, fill) # Int starting points for the line. +1 converts to julia indexing j, i = trunc(Int, relstart.x) + 1, trunc(Int, relstart.y) + 1 # Int - # For arbitrary dimension indexing - dimconstructors = map(DD.basetypeof, (xdim, ydim)) + n_on_line = 0 if manhattan_distance == 0 - D = map((d, o) -> d(o), dimconstructors, (j, i)) - if checkbounds(Bool, A, D...) - @inbounds A[D...] = fill + D = map(rebuild, dims, (j, i)) + if checkbounds(Bool, DimIndices(dims), D...) + f(D) + n_on_line += 1 end - n_on_line = 1 return n_on_line end @@ -153,18 +164,17 @@ function _burn_line!(A::AbstractRaster, line, fill) n_on_line = 0 countx = county = 0 - # Int steps to move allong the line step_j = signbit(diff_x) * -2 + 1 step_i = signbit(diff_y) * -2 + 1 # Travel one grid cell at a time. Start at zero for the current cell for _ in 0:manhattan_distance - D = map((d, o) -> d(o), dimconstructors, (j, i)) - if checkbounds(Bool, A, D...) - @inbounds A[D...] = fill + D = map(rebuild, dims, (j, i)) + if checkbounds(Bool, DimIndices(dims), D...) + f(D) + n_on_line += 1 end - # Only move in either X or Y coordinates, not both. if abs(max_x) < abs(max_y) max_x += delta_x @@ -176,12 +186,6 @@ function _burn_line!(A::AbstractRaster, line, fill) county +=1 end end - return n_on_line -end -function _burn_line!(A::AbstractRaster, line, fill, order::Tuple{Vararg{Dimension}}) - msg = """" - Converting a `:line` geometry to raster is currently only implemented for 2d lines. - Make a Rasters.jl github issue if you need this for more dimensions. - """ - throw(ArgumentError(msg)) + + return n_on_line end diff --git a/src/methods/burning/polygon.jl b/src/methods/burning/polygon.jl index 2def91428..3fffe83d1 100644 --- a/src/methods/burning/polygon.jl +++ b/src/methods/burning/polygon.jl @@ -24,14 +24,14 @@ function _burn_polygon!(B::AbstractDimArray, trait, geom; # Lines n_on_line = 0 if boundary !== :center - _check_intervals(B, boundary) + _check_intervals(B, boundary, verbose) if boundary === :touches - if _check_intervals(B, boundary) + if _check_intervals(B, boundary, verbose) # Add line pixels n_on_line = _burn_lines!(B, geom; fill)::Int end elseif boundary === :inside - if _check_intervals(B, boundary) + if _check_intervals(B, boundary, verbose) # Remove line pixels n_on_line = _burn_lines!(B, geom; fill=!fill)::Int end @@ -135,9 +135,9 @@ end const INTERVALS_INFO = "makes more sense on `Intervals` than `Points` and will have more correct results. You can construct dimensions with a `X(values; sampling=Intervals(Center()))` to achieve this" -@noinline _check_intervals(B) = - _chki(B) ? true : (@info "burning lines $INTERVALS_INFO"; false) -@noinline _check_intervals(B, boundary) = - _chki(B) ? true : (@info "`boundary=:$boundary` $INTERVALS_INFO"; false) +@noinline _check_intervals(B, verbose::Bool) = + _chki(B) ? true : (verbose && @info "burning lines $INTERVALS_INFO"; false) +@noinline _check_intervals(B, boundary, verbose::Bool) = + _chki(B) ? true : (verbose && @info "`boundary=:$boundary` $INTERVALS_INFO"; false) _chki(B) = all(map(s -> s isa Intervals, sampling(dims(B)))) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index ef41b04e7..30c0116eb 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -71,6 +71,7 @@ function extract end ) end +# TODO use a GeometryOpsCore method like `applyreduce` here? function _extract(A::RasterStackOrArray, geom::Missing, names, kw...) T = _rowtype(A, geom; names, kw...) [_maybe_add_fields(T, map(_ -> missing, names), missing, missing)] @@ -91,7 +92,7 @@ function _extract(A::RasterStackOrArray, ::Nothing, data; # We need to split out points from other geoms # TODO this will fail with mixed point/geom vectors if trait1 isa GI.PointTrait - rows = Vector{T}(undef, length(geoms)) + rows = Vector{T}(undef, length(geoms)) if istrue(skipmissing) j = 1 for i in eachindex(geoms) @@ -172,6 +173,47 @@ function _extract(A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom; end _extract(::Type{T}, A::RasterStackOrArray, trait::GI.PointTrait, point; skipmissing, kw...) where T = _extract_point(T, A, point, skipmissing; kw...) +# function _extract(A::RasterStackOrArray, t::GI.AbstractLineStringTrait, geom; +# names, skipmissing, kw... +# ) +# # Make a raster mask of the geometry +# ods = otherdims(A, DEFAULT_POINT_ORDER) +# p = first(GI.getpoint(geom)) +# tuplepoint = if GI.is3d(geom) +# GI.x(p), GI.y(p), GI.z(p) +# else +# GI.x(p), GI.y(p) +# end +# template = view(A, Touches(GI.extent(geom))) +# offset = CartesianIndex(map(x -> first(x) - 1, parentindices(parent(template)))) +# B = _init_bools(template, BitArray; kw...) +# T = _rowtype(A, tuplepoint; names, skipmissing, kw...) +# # Add a row for each pixel that is `true` in the mask +# rows = _direct_extract(T, B, geom, offset, names, skipmissing) + +# # Maybe skip missing rows +# if istrue(skipmissing) +# return collect(Base.skipmissing(rows)) +# else +# return collect(rows) +# end +# end + +# Skip defining a Bool template for all kinds of lines, +# and just push result rows directly to a vector. +function _direct_extract(::Type{T}, B::AbstractRaster, geom, offset, names, skipmissing) where T + rows = T[] + _burn_lines!(dims(B), geom) do D + # Make sure we only hit this pixel once + B[D] && return nothing + B[D] = true + I = Tuple(CartesianIndex(map(val, D)) + offset) + row = _missing_or_fields(T, B, I, names, skipmissing) + push!(rows, row) + return nothing + end + return rows +end function _missing_or_fields(::Type{T}, A, I, names, skipmissing) where T props = _prop_nt(A, I, names) @@ -192,13 +234,16 @@ _ismissingval(mvs::NamedTuple, props::NamedTuple{K}) where K = _ismissingval(mv, props::NamedTuple) = any(x -> ismissing(x) || x === mv, props) _ismissingval(mv, prop) = (mv === prop) +# We always return NamedTuple @inline _prop_nt(st::AbstractRasterStack, I, ::NamedTuple{K}) where K = st[I...][K] @inline _prop_nt(A::AbstractRaster, I, ::NamedTuple{K}) where K = NamedTuple{K}((A[I...],)) # Extract a single point +# Missing point to remove @inline function _extract_point(::Type, x::RasterStackOrArray, point::Missing, skipmissing::_True; kw...) return missing end +# Missing point to keep @inline function _extract_point(::Type{T}, x::RasterStackOrArray, point::Missing, skipmissing::_False; names, kw... ) where T @@ -207,6 +252,7 @@ end I = missing return _maybe_add_fields(T, props, geom, I) end +# Normal point with missing / out of bounds data removed @inline function _extract_point(::Type{T}, x::RasterStackOrArray, point, skipmissing::_True; dims, names::NamedTuple{K}, atol=nothing, kw... ) where {T,K} @@ -240,6 +286,7 @@ end end end end +# Normal point with missing / out of bounds data kept with `missing` fields @inline function _extract_point(::Type{T}, x::RasterStackOrArray, point, skipmissing::_False; dims, names::NamedTuple{K}, atol=nothing, kw... ) where {T,K} @@ -272,6 +319,7 @@ end end # Maybe add optional fields +# It is critically important for performance that this is type stable Base.@assume_effects :total function _maybe_add_fields(::Type{T}, props::NamedTuple, point, I)::T where {T<:NamedTuple{K}} where K if :geometry in K :index in K ? merge((; geometry=point, index=I), props) : merge((; geometry=point), props) From 52d131daefc9dfa5b3b1cd1ddb2a1edc6404e63a Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 1 Dec 2024 03:31:11 +0100 Subject: [PATCH 02/11] performance --- Project.toml | 4 +- src/methods/burning/allocs.jl | 24 +- src/methods/burning/array_init.jl | 23 +- src/methods/burning/geometry.jl | 26 +- src/methods/burning/line.jl | 44 +-- src/methods/extract.jl | 475 ++++++++++++++++++------------ src/methods/mask.jl | 6 +- src/methods/rasterize.jl | 15 +- src/methods/shared_docstrings.jl | 13 +- src/utils.jl | 11 +- test/methods.jl | 188 +----------- test/runtests.jl | 5 +- 12 files changed, 392 insertions(+), 442 deletions(-) diff --git a/Project.toml b/Project.toml index a7ba6b713..984572e75 100644 --- a/Project.toml +++ b/Project.toml @@ -58,8 +58,8 @@ CommonDataModel = "0.2.3, 0.3" ConstructionBase = "1" CoordinateTransformations = "0.6.2" DataFrames = "1" -DimensionalData = "0.28.2" -DiskArrays = "0.3, 0.4" +DimensionalData = "0.29.0" +DiskArrays = "0.4" Extents = "0.1" FillArrays = "0.12, 0.13, 1" Flatten = "0.4" diff --git a/src/methods/burning/allocs.jl b/src/methods/burning/allocs.jl index 5d916cf76..7956e266f 100644 --- a/src/methods/burning/allocs.jl +++ b/src/methods/burning/allocs.jl @@ -11,22 +11,36 @@ function Allocs(buffer) return Allocs(buffer, edges, scratch, crossings) end -function _burning_allocs(x; nthreads=_nthreads(), threaded=true, kw...) - dims = commondims(x, DEFAULT_POINT_ORDER) +function _burning_allocs(x; + nthreads=_nthreads(), + threaded=true, + burncheck_metadata=Metadata(), + kw... +) if threaded - [Allocs(_init_bools(dims; metadata=Metadata())) for _ in 1:nthreads] + if isnothing(x) + [Allocs(nothing) for _ in 1:nthreads] + else + dims = commondims(x, DEFAULT_POINT_ORDER) + [Allocs(_init_bools(dims; metadata=deepcopy(burncheck_metadata))) for _ in 1:nthreads] + end else - Allocs(_init_bools(dims; metadata=Metadata())) + if isnothing(x) + Allocs() + else + dims = commondims(x, DEFAULT_POINT_ORDER) + Allocs(_init_bools(dims; metadata=burncheck_metadata)) + end end end _get_alloc(allocs::Vector{<:Allocs}) = _get_alloc(allocs[Threads.threadid()]) _get_alloc(allocs::Allocs) = allocs - # TODO include these in Allocs _alloc_burnchecks(n::Int) = fill(false, n) _alloc_burnchecks(x::AbstractArray) = _alloc_burnchecks(length(x)) + function _set_burnchecks(burnchecks, metadata::Metadata{<:Any,<:Dict}, verbose) metadata["missed_geometries"] = .!burnchecks verbose && _burncheck_info(burnchecks) diff --git a/src/methods/burning/array_init.jl b/src/methods/burning/array_init.jl index 10ddd5937..8cc8a6200 100644 --- a/src/methods/burning/array_init.jl +++ b/src/methods/burning/array_init.jl @@ -1,14 +1,19 @@ -# Like `create` but without disk writes, mostly for Bool/Union{Missing,Boo}, +# Like `create` but without disk writes, mostly for Bool or Union{Missing,Bool}, # and uses `similar` where possible # TODO merge this with `create` somehow _init_bools(to; kw...) = _init_bools(to, BitArray; kw...) _init_bools(to, T::Type; kw...) = _init_bools(to, T, nothing; kw...) -_init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) -_init_bools(to::AbstractRasterStack, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) -_init_bools(to::AbstractRaster, T::Type, data; kw...) = _init_bools(to, dims(to), T, data; kw...) -_init_bools(to::Extents.Extent, T::Type, data; kw...) = _init_bools(to, _extent2dims(to; kw...), T, data; kw...) -_init_bools(to::DimTuple, T::Type, data; kw...) = _init_bools(to, to, T, data; kw...) +_init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = + _init_bools(dims(first(to)), T, data; kw...) +_init_bools(to::AbstractRasterStack, T::Type, data; kw...) = + _init_bools(first(to), dims(to), T, data; kw...) +_init_bools(to::AbstractRaster, T::Type, data; kw...) = + _init_bools(to, dims(to), T, data; kw...) +_init_bools(to::Extents.Extent, T::Type, data; kw...) = + _init_bools(to, _extent2dims(to; kw...), T, data; kw...) +_init_bools(to::DimTuple, T::Type, data; kw...) = + _init_bools(to, to, T, data; kw...) function _init_bools(to::Nothing, T::Type, data; geometrycolumn=nothing,kw...) # Get the extent of the geometries ext = _extent(data; geometrycolumn, kw...) @@ -38,7 +43,7 @@ function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; missingval::Bool=fal end function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; missingval=false, metadata=NoMetadata(), kw...) where T # Use an `Array` - data = fill!(Raster{T}(undef, dims), missingval) + data = fill!(Raster{T}(undef, dims), missingval) return rebuild(data; missingval, metadata) end @@ -53,7 +58,9 @@ function _prepare_for_burning(B, locus=Center()) end # Convert to Array if its not one already -_lookup_as_array(d::Dimension) = parent(lookup(d)) isa Array ? d : modify(Array, d) +_lookup_as_array(x) = setdims(x, _lookup_as_array(dims(x))) +_lookup_as_array(dims::Tuple) = map(_lookup_as_array, dims) +_lookup_as_array(d::Dimension) = parent(lookup(d)) isa Array ? d : modify(Array, d) function _forward_ordered(B) reduce(dims(B); init=B) do A, d diff --git a/src/methods/burning/geometry.jl b/src/methods/burning/geometry.jl index fa61c1f24..2ee27211b 100644 --- a/src/methods/burning/geometry.jl +++ b/src/methods/burning/geometry.jl @@ -22,10 +22,11 @@ function _burn_geometry!(B::AbstractRaster, trait::Nothing, data; lock=Threads.SpinLock(), verbose=true, progress=true, - threaded=true, + threaded=false, fill=true, - allocs=_burning_allocs(B; threaded), geometrycolumn=nothing, + burncheck_metadata=Metadata(), + allocs=_burning_allocs(B; threaded, burncheck_metadata), kw... )::Bool geoms = _get_geometries(data, geometrycolumn) @@ -36,8 +37,8 @@ function _burn_geometry!(B::AbstractRaster, trait::Nothing, data; geom = _getgeom(geoms, i) ismissing(geom) && return nothing a = _get_alloc(allocs) - B1 = a.buffer - burnchecks[i] = _burn_geometry!(B1, geom; fill, allocs=a, lock, kw...) + buffer = a.buffer + burnchecks[i] = _burn_geometry!(buffer, geom; fill, allocs=a, lock, kw...) return nothing end if fill @@ -99,14 +100,17 @@ function _burn_geometry!(B::AbstractRaster, ::GI.AbstractGeometryTrait, geom; return false end # Take a view of the geometry extent - B1 = view(B, Touches(geomextent)) - buf1 = _init_bools(B1; missingval=false) + V = view(B, Touches(geomextent)) + buffer = _init_bools(V; missingval=false) # Burn the polygon into the buffer - allocs = isnothing(allocs) ? Allocs(B) : allocs - hasburned = _burn_polygon!(buf1, geom; shape, geomextent, allocs, boundary, kw...) - @inbounds for i in eachindex(B1) - if buf1[i] - B1[i] = fill + # We allocate a new bitarray for the view for performance + # and always fill with `true`. + allocs = isnothing(allocs) ? Allocs(nothing) : allocs + hasburned = _burn_polygon!(buffer, geom; shape, geomextent, allocs, boundary, kw...) + # We then transfer burned `true` values to B via the V view + @inbounds for i in eachindex(V) + if buffer[i] + V[i] = fill end end else diff --git a/src/methods/burning/line.jl b/src/methods/burning/line.jl index 2584d888e..6dc8c0baf 100644 --- a/src/methods/burning/line.jl +++ b/src/methods/burning/line.jl @@ -1,7 +1,9 @@ # _burn_lines! # Fill a raster with `fill` where pixels touch lines in a geom # Separated for a type stability function barrier -function _burn_lines!(B::AbstractRaster, geom; fill=true, verbose=false, kw...) +function _burn_lines!( + B::AbstractRaster, geom; fill=true, verbose=false, kw... +) _check_intervals(B, verbose) B1 = _prepare_for_burning(B) @@ -19,34 +21,34 @@ function _burn_lines!(B::AbstractRaster, geom; fill=true, verbose=false, kw...) # For arbitrary dimension indexing - _burn_lines!(dims(B), geom) do D + _burn_lines!(identity, dims(B), geom) do D @inbounds B[D] = fill end end -_burn_lines!(f::F, dims::Tuple, geom) where F<:Function = - _burn_lines!(f, dims, GI.geomtrait(geom), geom) +_burn_lines!(f::F, c::C, dims::Tuple, geom) where {F<:Function,C<:Function} = + _burn_lines!(f, c, dims, GI.geomtrait(geom), geom) function _burn_lines!( - f::F, dims::Tuple, ::Union{GI.MultiLineStringTrait}, geom -) where F<:Function + f::F, c::C, dims::Tuple, ::Union{GI.MultiLineStringTrait}, geom +) where {F<:Function,C<:Function} n_on_line = 0 for linestring in GI.getlinestring(geom) - n_on_line += _burn_lines!(f, dims, linestring) + n_on_line += _burn_lines!(f, c, dims, linestring) end return n_on_line end function _burn_lines!( - f::F, dims::Tuple, ::Union{GI.MultiPolygonTrait,GI.PolygonTrait}, geom -) where F<:Function + f::F, c::C, dims::Tuple, ::Union{GI.MultiPolygonTrait,GI.PolygonTrait}, geom +) where {F<:Function,C<:Function} n_on_line = 0 for ring in GI.getring(geom) - n_on_line += _burn_lines!(f, dims, ring) + n_on_line += _burn_lines!(f, c, dims, ring) end return n_on_line end function _burn_lines!( - f::F, dims::Tuple, ::GI.AbstractCurveTrait, linestring -) where F<:Function + f::F, c::C, dims::Tuple, ::GI.AbstractCurveTrait, linestring +) where {F<:Function,C<:Function} isfirst = true local firstpoint, laststop n_on_line = 0 @@ -65,19 +67,19 @@ function _burn_lines!( stop=(x=GI.x(point), y=GI.y(point)), ) laststop = line.stop - n_on_line += _burn_line!(f, dims, line) + n_on_line += _burn_line!(f, c, dims, line) end return n_on_line end function _burn_lines!( - f::F, dims::Tuple, t::GI.LineTrait, line -) where F<:Function + f::F, c::C, dims::Tuple, t::GI.LineTrait, line +) where {F<:Function,C<:Function} p1, p2 = GI.getpoint(t, line) line1 = ( start=(x=GI.x(p1), y=GI.y(p1)), stop=(x=GI.x(p2), y=GI.y(p2)), ) - return _burn_line!(f, dims, line1) + return _burn_line!(f, c, dims, line1) end # _burn_line! @@ -87,8 +89,9 @@ end # # TODO: generalise to Irregular spans? -function _burn_line!(f::Function, dims::Tuple, line::NamedTuple) +function _burn_line!(f::Function, c::Function, dims::Tuple, line::NamedTuple) xdim, ydim = dims + di = DimIndices(dims) @assert xdim isa XDim @assert ydim isa YDim @@ -130,10 +133,13 @@ function _burn_line!(f::Function, dims::Tuple, line::NamedTuple) j, i = trunc(Int, relstart.x) + 1, trunc(Int, relstart.y) + 1 # Int n_on_line = 0 + + # inform m of number of runs of `f` + c(manhattan_distance + 1) if manhattan_distance == 0 D = map(rebuild, dims, (j, i)) - if checkbounds(Bool, DimIndices(dims), D...) + if checkbounds(Bool, di, D...) f(D) n_on_line += 1 end @@ -171,7 +177,7 @@ function _burn_line!(f::Function, dims::Tuple, line::NamedTuple) # Travel one grid cell at a time. Start at zero for the current cell for _ in 0:manhattan_distance D = map(rebuild, dims, (j, i)) - if checkbounds(Bool, DimIndices(dims), D...) + if checkbounds(Bool, di, D...) f(D) n_on_line += 1 end diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 30c0116eb..51fa1e316 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -1,9 +1,80 @@ +# Object to hold extract keywords +struct Extractor{T,P,K,N<:NamedTuple{K},G,I,S,F} + names::N + geometry::G + index::I + skipmissing::S + flatten::F +end +function Extractor(x, data; + name::NTuple{<:Any,Symbol}, + skipmissing, + flatten, + geometry, + index, + kw... +) + nt = NamedTuple{name}(name) + g = _booltype(geometry) + i = _booltype(index) + sm = _booltype(skipmissing) + f = _booltype(flatten) + T = _geom_rowtype(x, data; geometry=g, index=i, names=nt, skipmissing=sm) + P = _proptype(x; skipmissing=sm, names=nt) + Extractor{T,P,name,typeof(nt),typeof(g),typeof(i),typeof(sm),typeof(f)}(nt, g, i, sm, f) +end + +Base.eltype(::Extractor{T}) where T = T + +mutable struct LineRefs{T} + i::Int + prev::CartesianIndex{2} + rows::Vector{T} + function LineRefs{T}() where T + i = 1 + prev = CartesianIndex((typemin(Int), typemin(Int))) + new{T}(i, prev, Vector{T}()) + end +end + +function _initialise!(lr::LineRefs) + lr.i = 1 + lr.prev = CartesianIndex((typemin(Int), typemin(Int))) +end + +_geom_rowtype(A, geom; kw...) = + _geom_rowtype(A, GI.trait(geom), geom; kw...) +_geom_rowtype(A, ::Nothing, geoms; kw...) = + _geom_rowtype(A, first(geoms); kw...) +_geom_rowtype(A, ::GI.AbstractGeometryTrait, geom; kw...) = + _geom_rowtype(A, first(GI.getpoint(geom)); kw...) +function _geom_rowtype(A, ::GI.AbstractPointTrait, p; kw...) + tuplepoint = if GI.is3d(p) + (GI.x(p), GI.y(p), GI.z(p)) + else + (GI.x(p), GI.y(p)) + end + return _rowtype(A, tuplepoint; kw...) +end + +# skipinvalid: can G and I be missing. skipmissing: can nametypes be missing +function _proptype(x; + skipmissing, names::NamedTuple{K}, kw... +) where K + NamedTuple{K,Tuple{_nametypes(x, names, skipmissing)...}} +end + """ - extract(x, data; kw...) + extract(x, geometries; kw...) + +Extracts the value of `Raster` or `RasterStack` for the passed in geometries, +returning an `Vector{NamedTuple}` with properties for `:geometry` and `Raster` +or `RasterStack` layer values. -Extracts the value of `Raster` or `RasterStack` at given points, returning -an iterable of `NamedTuple` with properties for `:geometry` and raster or -stack layer values. +For lines, linestrings and linear rings points are extracted for each pixel that +the line touches. + +For polygons, all cells witih centers covered by the polygon are returned. Note that if objects have more dimensions than the length of the point tuples, sliced arrays or stacks will be returned instead of single values. @@ -15,13 +86,17 @@ $DATA_ARGUMENT # Keywords -- `geometry`: include `:geometry` in returned `NamedTuple`, `true` by default. -- `index`: include `:index` of the `CartesianIndex` in returned `NamedTuple`, `false` by default. -- `name`: a `Symbol` or `Tuple` of `Symbol` corresponding to layer/s of a `RasterStack` to extract. All layers by default. +- `geometry`: include `:geometry` column in rows. `true` by default. +- `index`: include `:index` of the `CartesianIndex` in rows, `false` by default. +- `name`: a `Symbol` or `Tuple` of `Symbol` corresponding to layer/s of a `RasterStack` to extract. + All layers are extracted by default. - `skipmissing`: skip missing points automatically. +- `flatten`: flatten extracted points from multiple geometries into a single + vector. `true` by default. Unmixed point geometries are always flattened. - `atol`: a tolerance for floating point lookup values for when the `Lookup` contains `Points`. `atol` is ignored for `Intervals`. --$GEOMETRYCOLUMN_KEYWORD +$BOUNDARY_KEYWORD +$GEOMETRYCOLUMN_KEYWORD # Example @@ -50,201 +125,237 @@ extract(st, obs; skipmissing=true) (geometry = (0.32, 40.24), bio1 = 16.321388f0, bio3 = 41.659454f0, bio5 = 30.029825f0, bio7 = 25.544561f0, bio12 = 480.0f0) ``` -Note: passing in arrays, geometry collections or feature collections +Note: passing in arrays, geometry collections or feature collections containing a mix of points and other geometries has undefined results. """ function extract end @inline function extract(x::RasterStackOrArray, data; - names=_names(x), name=names, skipmissing=false, geometry=true, index=false, geometrycolumn=nothing, kw... + geometrycolumn=nothing, + names::NTuple{<:Any,Symbol}=_names(x), + name::NTuple{<:Any,Symbol}=names, + skipmissing=false, + flatten=true, + geometry=true, + index=false, + kw... ) n = DD._astuple(name) - _extract(x, data; - dims=DD.dims(x, DEFAULT_POINT_ORDER), - names=NamedTuple{n}(n), - # These keywords are converted to _True/_False for type stability later on - # The @inline above helps constant propagation of the Bools - geometry=_booltype(geometry), - index=_booltype(index), - skipmissing=_booltype(skipmissing), - geometrycolumn, - kw... - ) + g, g1 = if GI.isgeometry(data) + data, data + elseif GI.isfeature(data) + g = GI.geometry(data) + g, g + else + gs = _get_geometries(data, geometrycolumn) + gs, first(Base.skipmissing(gs)) + end + + xp = _prepare_for_burning(x) + e = Extractor(xp, g1; name, skipmissing, flatten, geometry, index) + return _extract(xp, g, e; kw...) end # TODO use a GeometryOpsCore method like `applyreduce` here? -function _extract(A::RasterStackOrArray, geom::Missing, names, kw...) - T = _rowtype(A, geom; names, kw...) - [_maybe_add_fields(T, map(_ -> missing, names), missing, missing)] +function _extract(A::RasterStackOrArray, geom::Missing, e; kw...) + return if istrue(e.skipmissing) + T[] + else + T[_maybe_add_fields(e, map(_ -> missing, e.names), missing, missing)] + end end -function _extract(A::RasterStackOrArray, geom; names, kw...) - _extract(A, GI.geomtrait(geom), geom; names, kw...) +function _extract(A::RasterStackOrArray, geom, e; kw...) + _extract(A, GI.geomtrait(geom), geom, e; kw...) end -function _extract(A::RasterStackOrArray, ::Nothing, data; - names, skipmissing, geometrycolumn, kw... -) - geoms = _get_geometries(data, geometrycolumn) - T = _rowtype(A, eltype(geoms); names, skipmissing, kw...) +function _extract(A::RasterStackOrArray, ::Nothing, geoms::AbstractArray, e::Extractor{T}; + threaded=false, progress=true, kw... +) where T # Handle empty / all missing cases (length(geoms) > 0 && any(!ismissing, geoms)) || return T[] - - geom1 = first(Base.skipmissing(geoms)) + + geom1 = first(skipmissing(geoms)) trait1 = GI.trait(geom1) - # We need to split out points from other geoms - # TODO this will fail with mixed point/geom vectors + # We split out points from other geoms for performance if trait1 isa GI.PointTrait + allpoints = true + i = 1 rows = Vector{T}(undef, length(geoms)) - if istrue(skipmissing) - j = 1 + if istrue(e.skipmissing) + i = 1 for i in eachindex(geoms) g = geoms[i] ismissing(g) && continue - e = _extract_point(T, A, g, skipmissing; names, kw...) - if !ismissing(e) - rows[j] = e - j += 1 - end - nothing + geomtrait(g) isa GI.PointTrait || break + i += _extract_point!(rows, A, g, e, i; kw...) end - deleteat!(rows, j:length(rows)) + deleteat!(rows, i:length(rows)) else - for i in eachindex(geoms) + _run(1:length(geoms)) do i g = geoms[i] - rows[i] = _extract_point(T, A, g, skipmissing; names, kw...)::T - nothing + geomtrait(g) isa GI.PointTrait || return nothing + _extract_point!(rows, A, g, e, i; kw...)::T + return nothing end end - return rows - else - # This will be a list of vectors, we need to flatten it into one - rows = Iterators.flatten(_extract(A, g; names, skipmissing, kw...) for g in geoms) - if istrue(skipmissing) - return collect(_skip_missing_rows(rows, _missingval_or_missing(A), names)) + # If we found a non-point geometry, + # ignore these rows and start again generically + allpoints && return rows + end + + return if istrue(e.flatten) + if threaded + thread_rows = [T[] for _ in 1:Threads.nthreads()] + # thread_line_refs = [LineRefs{eltype(e)}() for _ in 1:Threads.nthreads()] + _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i + id = Threads.threadid() + # line_refs = thread_line_refs[id] + rows = _extract(A, geoms[i], e; kw...) + append!(thread_rows[id], rows) + end + l = sum(map(length, thread_rows)) + rows = Vector{T}(undef, l) + i = 1 + for rs in thread_rows + for r in rs + rows[i] = r + i += 1 + end + end + rows else - return collect(rows) + line_refs = LineRefs{eltype(e)}() + rows_ref = Ref{Vector{T}}() + _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i + rows_ref[] = _extract(A, geoms[i], e; line_refs, kw...) + end + rows_ref[] + end + else + row_vecs = Vector{Vector{T}}(undef, length(geoms)) + _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i + row_vecs[i] = _extract(A, geoms[i], e; kw...) end + row_vecs end end -function _extract(A::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...) - _extract(A, GI.geometry(feature); kw...) +function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom, e; kw...) + n = GI.npoint(geom) + rows = _init_rows(e, n) + for p in GI.getpoint(geom) + i += _extract_point!(rows, A, e, p, i; kw...) + end + if istrue(skipmissing) + # Remove excees rows where missing + deleteat!(rows, i:length(rows)) + end + return rows end -function _extract(A::RasterStackOrArray, ::GI.FeatureCollectionTrait, fc; kw...) - # Fall back to the Array/iterator method for feature collections - _extract(A, [GI.geometry(f) for f in GI.getfeature(fc)]; kw...) +function _extract(A::RasterStackOrArray, ::GI.PointTrait, p, e; kw...) + _extract_point!(rows, A, e, p, length(rows); kw...) end -function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; - skipmissing, kw... -) - T = _rowtype(A, GI.getpoint(geom, 1); names, skipmissing, kw...) - rows = (_extract_point(T, A, p, skipmissing; kw...) for p in GI.getpoint(geom)) - return skipmissing isa _True ? collect(_skip_missing_rows(rows, _missingval_or_missing(A), names)) : collect(rows) -end -function _extract(A::RasterStackOrArray, ::GI.PointTrait, geom; - skipmissing, kw... -) - T = _rowtype(A, geom; names, skipmissing, kw...) - _extract_point(T, A, geom, skipmissing; kw...) +@noinline function _extract( + A::RasterStackOrArray, trait::GI.AbstractLineStringTrait, geom, e::Extractor{T}; + line_refs=LineRefs{T}(), + kw... +) where T + _initialise!(line_refs) + offset = CartesianIndex((0, 0)) + dp = DimPoints(A) + function _length_callback(n) + rows = line_refs.rows + resize!(rows, n + line_refs.i - 1) + end + + _burn_lines!(_length_callback, dims(A), geom) do D + I = CartesianIndex(map(val, D)) + # Avoid duplicates from adjacent line segments + line_refs.prev == I && return nothing + line_refs.prev = I + # Make sure we only hit this pixel once + # D is always inbounds + line_refs.i += _maybe_set_row!(line_refs.rows, e.skipmissing, e, A, dp, offset, I, line_refs.i) + return nothing + end + rows = line_refs.rows + deleteat!(rows, line_refs.i+1:length(rows)) + return rows end -function _extract(A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom; - names, skipmissing, kw... +@noinline function _extract( + A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom, e; kw... ) # Make a raster mask of the geometry - template = view(A, Touches(GI.extent(geom))) ods = otherdims(A, DEFAULT_POINT_ORDER) + template = view(A, Touches(GI.extent(geom))) if length(ods) > 0 - template = view(template, map(d -> rebuild(d, firstindex(d)), ods)) - end - p = first(GI.getpoint(geom)) - tuplepoint = if GI.is3d(geom) - GI.x(p), GI.y(p), GI.z(p) - else - GI.x(p), GI.y(p) + template = view(template, map(d -> rebuild(d, firstindex(d)), ods)) end - T = _rowtype(A, tuplepoint; names, skipmissing, kw...) - B = boolmask(geom; to=template, kw...) - offset = CartesianIndex(map(x -> first(x) - 1, parentindices(parent(template)))) + B = boolmask(geom; to=template, burncheck_metadata=NoMetadata(), kw...) + n = count(B) + offset = _offset(template) + dp = DimPoints(A) + i = 1 + rows = _init_rows(e, n) # Add a row for each pixel that is `true` in the mask - rows = (_missing_or_fields(T, A, Tuple(I + offset), names, skipmissing) for I in CartesianIndices(B) if B[I]) - # Maybe skip missing rows - if istrue(skipmissing) - return collect(Base.skipmissing(rows)) - else - return collect(rows) - end -end -_extract(::Type{T}, A::RasterStackOrArray, trait::GI.PointTrait, point; skipmissing, kw...) where T = - _extract_point(T, A, point, skipmissing; kw...) -# function _extract(A::RasterStackOrArray, t::GI.AbstractLineStringTrait, geom; -# names, skipmissing, kw... -# ) -# # Make a raster mask of the geometry -# ods = otherdims(A, DEFAULT_POINT_ORDER) -# p = first(GI.getpoint(geom)) -# tuplepoint = if GI.is3d(geom) -# GI.x(p), GI.y(p), GI.z(p) -# else -# GI.x(p), GI.y(p) -# end -# template = view(A, Touches(GI.extent(geom))) -# offset = CartesianIndex(map(x -> first(x) - 1, parentindices(parent(template)))) -# B = _init_bools(template, BitArray; kw...) -# T = _rowtype(A, tuplepoint; names, skipmissing, kw...) -# # Add a row for each pixel that is `true` in the mask -# rows = _direct_extract(T, B, geom, offset, names, skipmissing) - -# # Maybe skip missing rows -# if istrue(skipmissing) -# return collect(Base.skipmissing(rows)) -# else -# return collect(rows) -# end -# end - -# Skip defining a Bool template for all kinds of lines, -# and just push result rows directly to a vector. -function _direct_extract(::Type{T}, B::AbstractRaster, geom, offset, names, skipmissing) where T - rows = T[] - _burn_lines!(dims(B), geom) do D - # Make sure we only hit this pixel once - B[D] && return nothing - B[D] = true - I = Tuple(CartesianIndex(map(val, D)) + offset) - row = _missing_or_fields(T, B, I, names, skipmissing) - push!(rows, row) - return nothing + for I in CartesianIndices(B) + B[I] || continue + i += _maybe_set_row!(rows, e.skipmissing, e, A, dp, offset, I, i) end + # Cleanup + deleteat!(rows, i:length(rows)) return rows end -function _missing_or_fields(::Type{T}, A, I, names, skipmissing) where T - props = _prop_nt(A, I, names) - if istrue(skipmissing) && _ismissingval(A, props) - missing +Base.@assume_effects :foldable function _maybe_set_row!( + rows, skipmissing::_True, e, A, dp, offset, I, i; + props=_prop_nt(e, A, I) +) + return if !_ismissingval(A, props) + _maybe_set_row!(rows, _False(), e, A, dp, offset, I; props) else - geom = DimPoints(A)[I...] - _maybe_add_fields(T, props, geom, I) + 0 end end +Base.@assume_effects :foldable function _maybe_set_row!( + rows::Vector{T}, skipmissing::_False, e, A, dp, offset, I, i; + props=_prop_nt(e, A, I) +) where T + Ioff = I + offset + geom = dp[Ioff] + i <= length(rows) || @show i length(rows) + rows[i] = _maybe_add_fields(T, props, geom, Tuple(Ioff)) + return 1 +end -_ismissingval(A::Union{Raster,RasterStack}, props) = +_init_rows(e::Extractor{T}, n=0) where T = Vector{T}(undef, n) + +Base.@assume_effects :foldable _ismissingval(A::Union{Raster,RasterStack}, props)::Bool = _ismissingval(missingval(A), props) -_ismissingval(A::Union{Raster,RasterStack}, props::NamedTuple) = +Base.@assume_effects :foldable _ismissingval(A::Union{Raster,RasterStack}, props::NamedTuple)::Bool = _ismissingval(missingval(A), props) -_ismissingval(mvs::NamedTuple, props::NamedTuple{K}) where K = - any(k -> ismissing(props[k]) || props[k] === mvs[k], K) -_ismissingval(mv, props::NamedTuple) = any(x -> ismissing(x) || x === mv, props) -_ismissingval(mv, prop) = (mv === prop) +Base.@assume_effects :foldable _ismissingval(mvs::NamedTuple, props::NamedTuple)::Bool = + any(DD.unrolled_map((p, mv) -> ismissing(p) || p === mv, props, mvs)) +Base.@assume_effects :foldable _ismissingval(mv, props::NamedTuple)::Bool = + any(DD.unrolled_map(p -> ismissing(p) || p === mv, props)) +Base.@assume_effects :foldable _ismissingval(mv, prop)::Bool = (mv === prop) # We always return NamedTuple -@inline _prop_nt(st::AbstractRasterStack, I, ::NamedTuple{K}) where K = st[I...][K] -@inline _prop_nt(A::AbstractRaster, I, ::NamedTuple{K}) where K = NamedTuple{K}((A[I...],)) +Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, st::AbstractRasterStack, I) where {P,K} = + P(st[K][I])::P +Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, st::AbstractRasterStack{K}, I) where {P,K} = + P(st[I])::P +Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, A::AbstractRaster, I) where {P,K} = + P((A[I],))::P # Extract a single point # Missing point to remove -@inline function _extract_point(::Type, x::RasterStackOrArray, point::Missing, skipmissing::_True; kw...) - return missing +@inline function _extract_point!( + rows::Vector{T}, x::RasterStackOrArray, skipmissing::_True, point::Missing, i; + kw... +) where T + return 0 end # Missing point to keep -@inline function _extract_point(::Type{T}, x::RasterStackOrArray, point::Missing, skipmissing::_False; +@inline function _extract_point!( + rows::Vector{T}, x::RasterStackOrArray, skipmissing::_False, point::Missing, i; names, kw... ) where T props = map(_ -> missing, names) @@ -253,50 +364,56 @@ end return _maybe_add_fields(T, props, geom, I) end # Normal point with missing / out of bounds data removed -@inline function _extract_point(::Type{T}, x::RasterStackOrArray, point, skipmissing::_True; - dims, names::NamedTuple{K}, atol=nothing, kw... +@inline function _extract_point!( + rows::Vector{T}, x::RasterStackOrArray, skipmissing::_True, point, i; + dims=DD.dims(x, DEFAULT_POINT_ORDER), + names::NamedTuple{K}, + atol=nothing, + kw... ) where {T,K} # Get the actual dimensions available in the object coords = map(DD.commondims(x, dims)) do d _dimcoord(d, point) end # If any are coordinates missing, also return missing for everything - if any(map(ismissing, coords)) - return missing - else + if !any(map(ismissing, coords)) selector_dims = map(dims, coords) do d, c _at_or_contains(d, c, atol) end selectors = map(val, DD.dims(selector_dims, DD.dims(x))) # Check the selector is in bounds / actually selectable I = DD.selectindices(DD.dims(x, dims), selectors; err=_False())::Union{Nothing,Tuple{Int,Int}} - if isnothing(I) - return missing - else + if !isnothing(I) D = map(rebuild, selector_dims, I) - if x isa Raster + if x isa Raster prop = x[D] - _ismissingval(missingval(x), prop) && return missing + _ismissingval(missingval(x), prop) && return 0 props = NamedTuple{K,Tuple{eltype(x)}}((prop,)) else props = x[D][K] - _ismissingval(missingval(x), props) && return missing + _ismissingval(missingval(x), props) && return 0 end - return _maybe_add_fields(T, props, point, I) + rows[i] = _maybe_add_fields(T, props, point, I) + return 1 end end + return 0 end # Normal point with missing / out of bounds data kept with `missing` fields -@inline function _extract_point(::Type{T}, x::RasterStackOrArray, point, skipmissing::_False; - dims, names::NamedTuple{K}, atol=nothing, kw... +@inline function _extract_point!( + row::Vector{T}, x::RasterStackOrArray, skipmissing::_False, point, i; + dims=DD.dims(x, DEFAULT_POINT_ORDER), + names::NamedTuple{K}, + atol=nothing, + kw... ) where {T,K} # Get the actual dimensions available in the object coords = map(DD.commondims(x, dims)) do d _dimcoord(d, point) end # If any are coordinates missing, also return missing for everything - if any(map(ismissing, coords)) - return _maybe_add_fields(T, map(_ -> missing, names), missing, missing) + rows[i] = if any(map(ismissing, coords)) + _maybe_add_fields(T, map(_ -> missing, names), missing, missing) else selector_dims = map(dims, coords) do d, c _at_or_contains(d, c, atol) @@ -305,22 +422,25 @@ end # Check the selector is in bounds / actually selectable I = DD.selectindices(DD.dims(x, dims), selectors; err=_False())::Union{Nothing,Tuple{Int,Int}} if isnothing(I) - return _maybe_add_fields(T, map(_ -> missing, names), point, missing) + _maybe_add_fields(T, map(_ -> missing, names), point, missing) else D = map(rebuild, selector_dims, I) - props = if x isa Raster + props = if x isa Raster NamedTuple{K,Tuple{eltype(x)}}((x[D],)) else NamedTuple(x[D])[K] end - return _maybe_add_fields(T, props, point, I) + _maybe_add_fields(T, props, point, I) end end + return 1 end # Maybe add optional fields # It is critically important for performance that this is type stable -Base.@assume_effects :total function _maybe_add_fields(::Type{T}, props::NamedTuple, point, I)::T where {T<:NamedTuple{K}} where K +Base.@assume_effects :total function _maybe_add_fields( + ::Type{T}, props::NamedTuple, point, I +)::T where {T<:NamedTuple{K}} where K if :geometry in K :index in K ? merge((; geometry=point, index=I), props) : merge((; geometry=point), props) else @@ -328,18 +448,5 @@ Base.@assume_effects :total function _maybe_add_fields(::Type{T}, props::NamedTu end |> T end -@inline _skip_missing_rows(rows, ::Missing, names) = - Iterators.filter(row -> !any(ismissing, row), rows) -@inline _skip_missing_rows(rows, missingval, names) = - Iterators.filter(row -> !any(x -> ismissing(x) || x === missingval, row), rows) -@inline function _skip_missing_rows(rows, missingval::NamedTuple, names::NamedTuple{K}) where K - # first check if all fields are equal - if so just call with the first value - if Base.allequal(missingval) == 1 - return _skip_missing_rows(rows, first(missingval), names) - else - Iterators.filter(rows) do row - # rows may or may not contain a :geometry field, so map over keys instead - !any(key -> ismissing(row[key]) || row[key] === missingval[key], K) - end - end -end +_offset(template) = + CartesianIndex(map(x -> first(x) - 1, parent(template).indices)) diff --git a/src/methods/mask.jl b/src/methods/mask.jl index 55b0f3732..a85de7ba3 100644 --- a/src/methods/mask.jl +++ b/src/methods/mask.jl @@ -28,7 +28,8 @@ $SUFFIX_KEYWORD These can be used when `with` is a GeoInterface.jl compatible object: -$SHAPE_KEYWORDS +$SHAPE_KEYWORD +$BOUNDARY_KEYWORD $GEOMETRYCOLUMN_KEYWORD # Example @@ -336,7 +337,8 @@ function boolmask!(dest::AbstractRaster, data; lock=nothing, progress=true, threaded=false, - allocs=_burning_allocs(dest; threaded), + burncheck_metadata=Metadata(), + allocs=_burning_allocs(dest; threaded, burncheck_metadata), geometrycolumn=nothing, kw... ) diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index 0b1640b0e..7bc69a1a4 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -312,15 +312,10 @@ const RASTERIZE_KEYWORDS = """ - `op`: A reducing function that accepts two values and returns one, like `min` to `minimum`. For common methods this will be assigned for you, or is not required. But you can use it instead of a `reducer` as it will usually be faster. -- `shape`: force `data` to be treated as `:polygon`, `:line` or `:point`, where possible - Points can't be treated as lines or polygons, and lines may not work as polygons, but - an attempt will be made. -- `geometrycolumn`: `Symbol` to manually select the column the geometries are in - when `data` is a Tables.jl compatible table, or a tuple of `Symbol` for columns of - point coordinates. -- `progress`: show a progress bar, `true` by default, `false` to hide.. -- `verbose`: print information and warnings when there are problems with the rasterisation. - `true` by default. +$GEOM_KEYWORDS +$GEOMETRYCOLUMN_KEYWORD +$PROGRESS_KEYWORD +$VERBOSE_KEYWORD $THREADED_KEYWORD - `threadsafe`: specify that custom `reducer` and/or `op` functions are thread-safe, in that the order of operation or blocking does not matter. For example, @@ -356,8 +351,6 @@ $RASTERIZE_ARGUMENTS These are detected automatically from `data` where possible. -$GEOMETRYCOLUMN_KEYWORD -$GEOM_KEYWORDS $RASTERIZE_KEYWORDS $FILENAME_KEYWORD $SUFFIX_KEYWORD diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 443808750..f2a690488 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -18,15 +18,15 @@ const CRS_KEYWORD = """ - `crs`: a `crs` which will be attached to the resulting raster when `to` not passed or is an `Extent`. Otherwise the crs from `to` is used. """ - -const SHAPE_KEYWORDS = """ -- `shape`: Force `data` to be treated as `:polygon`, `:line` or `:point` geometries. - using points or lines as polygons may have unexpected results. +const BOUNDARY_KEYWORD = """ - `boundary`: for polygons, include pixels where the `:center` is inside the polygon, where the polygon `:touches` the pixel, or that are completely `:inside` the polygon. The default is `:center`. """ - +const SHAPE_KEYWORD = """ +- `shape`: Force `data` to be treated as `:polygon`, `:line` or `:point` geometries. + using points or lines as polygons may have unexpected results. +""" const THREADED_KEYWORD = """ - `threaded`: run operations in parallel, `false` by default. In some circumstances `threaded` can give large speedups over single-threaded operation. This can be true for complicated @@ -41,7 +41,8 @@ $TO_KEYWORD $RES_KEYWORD $SIZE_KEYWORD $CRS_KEYWORD -$SHAPE_KEYWORDS +$BOUNDARY_KEYWORD +$SHAPE_KEYWORD """ const DATA_ARGUMENT = """ diff --git a/src/utils.jl b/src/utils.jl index e069755c3..dc2184dc5 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -249,7 +249,7 @@ _warn_disk() = @warn "Disk-based objects may be very slow here. User `read` firs _filenotfound_error(filename) = throw(ArgumentError("file \"$filename\" not found")) -_progress(args...; kw...) = ProgressMeter.Progress(args...; color=:blue, barlen=50, kw...) +_progress(args...; kw...) = ProgressMeter.Progress(args...; dt=0.1, color=:blue, barlen=50, kw...) # Function barrier for splatted vector broadcast @noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) @@ -376,7 +376,7 @@ istrue(::_False) = false _rowtype(x, g, args...; kw...) = _rowtype(x, typeof(g), args...; kw...) function _rowtype( x, ::Type{G}, i::Type{I} = typeof(size(x)); - geometry, index, skipmissing, skipinvalid = skipmissing, names, kw... + geometry, index, skipmissing, skipinvalid=skipmissing, names, kw... ) where {G, I} _G = istrue(skipinvalid) ? nonmissingtype(G) : G _I = istrue(skipinvalid) ? I : Union{Missing, I} @@ -385,7 +385,6 @@ function _rowtype( NamedTuple{keys,types} end - function _rowtypes( x, ::Type{G}, ::Type{I}, geometry::_True, index::_True, skipmissing, names::NamedTuple{Names} ) where {G,I,Names} @@ -407,8 +406,10 @@ function _rowtypes( Tuple{_nametypes(x, names, skipmissing)...} end -@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, skipmissing::_True) where {T,Names} = (nonmissingtype(T),) -@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, skipmissing::_False) where {T,Names} = (Union{Missing,T},) +@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, sm::_True) where {T,Names} = + (nonmissingtype(T),) +@inline _nametypes(::Raster{T}, ::NamedTuple{Names}, sm::_False) where {T,Names} = + (Union{Missing,T},) # This only compiles away when generated @generated function _nametypes( ::RasterStack{<:Any,T}, ::NamedTuple{PropNames}, skipmissing::_True diff --git a/test/methods.jl b/test/methods.jl index 324edaa8f..b3047989b 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -364,192 +364,6 @@ end [(9.0, 0.1) (9.0, 0.2); (10.0, 0.1) (10.0, 0.2)]) end -createpoint(args...) = ArchGDAL.createpoint(args...) - -@testset "extract" begin - dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) - rast = Raster(Union{Int,Missing}[1 2; 3 4], dimz; name=:test, missingval=missing) - rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) - rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing) - mypoints = [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3), (10.0, 0.2)] - table = (geometry=mypoints, foo=zeros(4)) - st = RasterStack(rast, rast2) - @testset "from Raster" begin - # Tuple points - ex = extract(rast, mypoints) - T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} - @test eltype(ex) == T - @test all(ex .=== T[ - (geometry = missing, test = missing) - (geometry = (9.0, 0.1), test=1) - (geometry = (9.0, 0.2), test=2) - (geometry = (10.0, 0.3), test=missing) - (geometry = (10.0, 0.2), test=4) - ]) - ex = extract(rast_m, mypoints; skipmissing=true) - T = @NamedTuple{geometry::Tuple{Float64, Float64}, test::Int64} - @test eltype(ex) == T - @test all(ex .=== T[(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)]) - ex = extract(rast_m, mypoints; skipmissing=true, geometry=false) - T = @NamedTuple{test::Int64} - @test eltype(ex) == T - @test all(ex .=== T[(test = 1,), (test = 2,)]) - @test all(extract(rast_m, mypoints; skipmissing=true, geometry=false, index=true) .=== [ - (index = (1, 1), test = 1,) - (index = (1, 2), test = 2,) - ]) - # NamedTuple (reversed) points - tests a Table that iterates over points - T = @NamedTuple{geometry::Union{@NamedTuple{Y::Float64,X::Float64}},test::Union{Missing,Int64}} - @test all(extract(rast, [(Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) .=== T[ - (geometry = (Y = 0.1, X = 9.0), test = 1) - (geometry = (Y = 0.2, X = 10.0), test = 4) - (geometry = (Y = 0.3, X = 10.0), test = missing) - ]) - # Vector points - @test all(extract(rast, [[9.0, 0.1], [10.0, 0.2]]) .== [ - (geometry = [9.0, 0.1], test = 1) - (geometry = [10.0, 0.2], test = 4) - ]) - # Extract a polygon - p = ArchGDAL.createpolygon([[[8.0, 0.0], [11.0, 0.0], [11.0, 0.4], [8.0, 0.0]]]) - T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},test::Union{Missing,Int64}} - @test all(extract(rast_m, p) .=== T[ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - (geometry = (10.0, 0.2), test = missing) - ]) - # Extract a vector of polygons - ex = extract(rast_m, [p, p]) - @test eltype(ex) == T - @test all(ex .=== T[ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - (geometry = (10.0, 0.2), test = missing) - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - (geometry = (10.0, 0.2), test = missing) - ]) - # Test all the keyword combinations - @test all(extract(rast_m, p) .=== T[ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - (geometry = (10.0, 0.2), test = missing) - ]) - T = @NamedTuple{test::Union{Missing,Int64}} - @test all(extract(rast_m, p; geometry=false) .=== T[ - (test = 1,) - (test = 3,) - (test = missing,) - ]) - T = @NamedTuple{index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} - @test all(extract(rast_m, p; geometry=false, index=true) .=== T[ - (index = (1, 1), test = 1) - (index = (2, 1), test = 3) - (index = (2, 2), test = missing) - ]) - T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} - @test all(extract(rast_m, p; index=true) .=== T[ - (geometry = (9.0, 0.1), index = (1, 1), test = 1) - (geometry = (10.0, 0.1), index = (2, 1), test = 3) - (geometry = (10.0, 0.2), index = (2, 2), test = missing) - ]) - @test extract(rast_m, p; skipmissing=true) == [ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.1), test = 3) - ] - @test extract(rast_m, p; skipmissing=true, geometry=false) == [ - (test = 1,) - (test = 3,) - ] - @test extract(rast_m, p; skipmissing=true, geometry=false, index=true) == [ - (index = (1, 1), test = 1) - (index = (2, 1), test = 3) - ] - @test extract(rast_m, p; skipmissing=true, index=true) == [ - (geometry = (9.0, 0.1), index = (1, 1), test = 1) - (geometry = (10.0, 0.1), index = (2, 1), test = 3) - ] - @test extract(rast2, p; skipmissing=true) == [ - (geometry = (10.0, 0.1), test2 = 7) - (geometry = (10.0, 0.2), test2 = 8) - ] - # Empty geoms - @test extract(rast, []) == NamedTuple{(:geometry, :test),Tuple{Missing,Missing}}[] - @test extract(rast, []; geometry=false) == NamedTuple{(:test,),Tuple{Missing}}[] - # Missing coord errors - @test_throws ArgumentError extract(rast, [(0.0, missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) - @test_throws ArgumentError extract(rast, [(9.0, 0.1), (0.0, missing), (9.0, 0.2), (10.0, 0.3)]) - @test_throws ArgumentError extract(rast, [(X=0.0, Y=missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) - end - - @testset "with table" begin - T = @NamedTuple{geometry::Union{Missing, Tuple{Float64, Float64}}, test::Union{Missing, Int64}} - @test all(extract(rast, table) .=== T[ - (geometry = missing, test = missing) - (geometry = (9.0, 0.1), test = 1) - (geometry = (9.0, 0.2), test = 2) - (geometry = (10.0, 0.3), test = missing) - (geometry = (10.0, 0.2), test = 4) - ]) - @test extract(rast, table; skipmissing=true) == [ - (geometry = (9.0, 0.1), test = 1) - (geometry = (9.0, 0.2), test = 2) - (geometry = (10.0, 0.2), test = 4) - ] - @test extract(rast, table; skipmissing=true, geometry=false) == [ - (test = 1,) - (test = 2,) - (test = 4,) - ] - @test extract(rast, table; skipmissing=true, geometry=false, index=true) == [ - (index = (1, 1), test = 1,) - (index = (1, 2), test = 2,) - (index = (2, 2), test = 4,) - ] - - @test_throws ArgumentError extract(rast, (foo = zeros(4),)) - end - - @testset "from stack" begin - T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64},test2::Union{Missing,Int64}} - @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) .=== T[ - (geometry = missing, test = missing, test2 = missing) - (geometry = (9.0, 0.1), test = 1, test2 = 5) - (geometry = (10.0, 0.2), test = 4, test2 = 8) - (geometry = (10.0, 0.3), test = missing, test2 = missing) - ]) - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true) == [ - (geometry = (10.0, 0.2), test = 4, test2 = 8) - ] - @test extract(st2, [missing, (2, 2), (2, 1)]; skipmissing=true) == [ - (geometry = (2, 1), a = 7.0, b = 2.0) - ] - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) == [ - (test = 4, test2 = 8) - ] - T = @NamedTuple{index::Union{Missing, Tuple{Int,Int}}, test::Union{Missing, Int64}, test2::Union{Missing, Int64}} - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) == T[ - (index = (2, 2), test = 4, test2 = 8) - ] - # Subset with `names` - T = @NamedTuple{geometry::Union{Missing, Tuple{Float64, Float64}}, test2::Union{Missing, Int64}} - @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test2,)) .=== T[ - (geometry = missing, test2 = missing) - (geometry = (9.0, 0.1), test2 = 5) - (geometry = (10.0, 0.2), test2 = 8) - (geometry = (10.0, 0.3), test2 = missing) - ]) - # Subset with `names` and `skipmissing` with mixed missingvals - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test2,), skipmissing=true) == [ - (geometry = (10.0, 0.2), test2 = 8) - ] - @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test,), skipmissing=true) == [ - (geometry = (9.0, 0.1), test = 1) - (geometry = (10.0, 0.2), test = 4) - ] - end -end - @testset "trim, crop, extend" begin A = [missing missing missing missing 2.0 0.5 @@ -755,4 +569,4 @@ test = rebuild(ga; name = :test) @test_throws "strictly positive" Rasters.sample(StableRNG(123), test, 3, skipmissing = true, replace = false) @test_throws "Cannot draw" Rasters.sample(StableRNG(123), test, 5, replace = false) -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index 589bf472d..2080da64c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,15 +11,16 @@ if VERSION >= v"1.9.0" @time @safetestset "extensions" begin include("extensions.jl") end end -@time @safetestset "methods" begin include("methods.jl") end @time @safetestset "array" begin include("array.jl") end @time @safetestset "stack" begin include("stack.jl") end @time @safetestset "series" begin include("series.jl") end @time @safetestset "utils" begin include("utils.jl") end @time @safetestset "set" begin include("set.jl") end +@time @safetestset "adapt" begin include("adapt.jl") end +@time @safetestset "methods" begin include("methods.jl") end @time @safetestset "aggregate" begin include("aggregate.jl") end @time @safetestset "rasterize" begin include("rasterize.jl") end -@time @safetestset "adapt" begin include("adapt.jl") end +@time @safetestset "extract" begin include("extract.jl") end @time @safetestset "reproject" begin include("reproject.jl") end @time @safetestset "warp" begin include("warp.jl") end @time @safetestset "cellarea" begin include("cellarea.jl") end From bc6ac09c6ace9370e77946f081148581dc8b2d05 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 1 Dec 2024 03:31:30 +0100 Subject: [PATCH 03/11] extract in a separate file --- test/extract.jl | 314 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 test/extract.jl diff --git a/test/extract.jl b/test/extract.jl new file mode 100644 index 000000000..d79ff5ad0 --- /dev/null +++ b/test/extract.jl @@ -0,0 +1,314 @@ +using Rasters, Test, DataFrames, Extents +import GeoInterface as GI +using Rasters.Lookups, Rasters.Dimensions + +include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) + +dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) +rast = Raster(Union{Int,Missing}[1 2; 3 4], dimz; name=:test, missingval=missing) +rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) +rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing) +st = RasterStack(rast, rast2) + +points = [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3), (10.0, 0.2)] +poly = GI.Polygon([[(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]]) +linestring = GI.LineString([(8.0, 0.0), (11.0, 0.0), (11.0, 0.4)]) +line = GI.Line([(8.0, 0.0), (12.0, 4.0)]) +table = (geometry=points, foo=zeros(4)) + +@testset "Points" begin + @testset "From Raster" begin + # Tuple points + ex = extract(rast, points) + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64}} + @test eltype(ex) == T + @test all(ex .=== T[ + (geometry = missing, test = missing) + (geometry = (9.0, 0.1), test=1) + (geometry = (9.0, 0.2), test=2) + (geometry = (10.0, 0.3), test=missing) + (geometry = (10.0, 0.2), test=4) + ]) + ex = extract(rast_m, points; skipmissing=true) + T = @NamedTuple{geometry::Tuple{Float64, Float64}, test::Int64} + @test eltype(ex) == T + @test all(ex .=== T[(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)]) + ex = extract(rast_m, points; skipmissing=true, geometry=false) + T = @NamedTuple{test::Int64} + @test eltype(ex) == T + @test all(ex .=== T[(test = 1,), (test = 2,)]) + @test all(extract(rast_m, points; skipmissing=true, geometry=false, index=true) .=== [ + (index = (1, 1), test = 1,) + (index = (1, 2), test = 2,) + ]) + # NamedTuple (reversed) points - tests a Table that iterates over points + T = @NamedTuple{geometry::Union{@NamedTuple{Y::Float64,X::Float64}},test::Union{Missing,Int64}} + @test all(extract(rast, [(Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) .=== T[ + (geometry = (Y = 0.1, X = 9.0), test = 1) + (geometry = (Y = 0.2, X = 10.0), test = 4) + (geometry = (Y = 0.3, X = 10.0), test = missing) + ]) + # Vector points + @test all(extract(rast, [[9.0, 0.1], [10.0, 0.2]]) .== [ + (geometry = [9.0, 0.1], test = 1) + (geometry = [10.0, 0.2], test = 4) + ]) + end + + @testset "From RasterStack" begin + T = @NamedTuple{geometry::Union{Missing,Tuple{Float64,Float64}},test::Union{Missing,Int64},test2::Union{Missing,Int64}} + @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) .=== T[ + (geometry = missing, test = missing, test2 = missing) + (geometry = (9.0, 0.1), test = 1, test2 = 5) + (geometry = (10.0, 0.2), test = 4, test2 = 8) + (geometry = (10.0, 0.3), test = missing, test2 = missing) + ]) + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true) == [ + (geometry = (10.0, 0.2), test = 4, test2 = 8) + ] + @test extract(st2, [missing, (2, 2), (2, 1)]; skipmissing=true) == [ + (geometry = (2, 1), a = 7.0, b = 2.0) + ] + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) == [ + (test = 4, test2 = 8) + ] + T = @NamedTuple{index::Union{Missing, Tuple{Int,Int}}, test::Union{Missing, Int64}, test2::Union{Missing, Int64}} + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) == T[ + (index = (2, 2), test = 4, test2 = 8) + ] + # Subset with `names` + T = @NamedTuple{geometry::Union{Missing, Tuple{Float64, Float64}}, test2::Union{Missing, Int64}} + @test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test2,)) .=== T[ + (geometry = missing, test2 = missing) + (geometry = (9.0, 0.1), test2 = 5) + (geometry = (10.0, 0.2), test2 = 8) + (geometry = (10.0, 0.3), test2 = missing) + ]) + # Subset with `names` and `skipmissing` with mixed missingvals + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test2,), skipmissing=true) == [ + (geometry = (10.0, 0.2), test2 = 8) + ] + @test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; name=(:test,), skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.2), test = 4) + ] + end + + @testset "Errors" begin + # Missing coord errors + @test_throws ArgumentError extract(rast, [(0.0, missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) + @test_throws ArgumentError extract(rast, [(9.0, 0.1), (0.0, missing), (9.0, 0.2), (10.0, 0.3)]) + @test_throws ArgumentError extract(rast, [(X=0.0, Y=missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) + end +end + +@testset "Polygons" begin + # Extract a polygon + T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},test::Union{Missing,Int64}} + @test all(extract(rast_m, poly) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + # Test all the keyword combinations + @test all(extract(rast_m, poly) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + T = @NamedTuple{test::Union{Missing,Int64}} + @test all(extract(rast_m, poly; geometry=false) .=== T[ + (test = 1,) + (test = 3,) + (test = missing,) + ]) + T = @NamedTuple{index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} + @test all(extract(rast_m, poly; geometry=false, index=true) .=== T[ + (index = (1, 1), test = 1) + (index = (2, 1), test = 3) + (index = (2, 2), test = missing) + ]) + T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},index::Union{Missing,Tuple{Int,Int}},test::Union{Missing,Int64}} + @test all(extract(rast_m, poly; index=true) .=== T[ + (geometry = (9.0, 0.1), index = (1, 1), test = 1) + (geometry = (10.0, 0.1), index = (2, 1), test = 3) + (geometry = (10.0, 0.2), index = (2, 2), test = missing) + ]) + @test extract(rast_m, poly; skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + ] + @test extract(rast_m, poly; skipmissing=true, geometry=false) == [ + (test = 1,) + (test = 3,) + ] + @test extract(rast_m, poly; skipmissing=true, geometry=false, index=true) == [ + (index = (1, 1), test = 1) + (index = (2, 1), test = 3) + ] + @test extract(rast_m, poly; skipmissing=true, index=true) == [ + (geometry = (9.0, 0.1), index = (1, 1), test = 1) + (geometry = (10.0, 0.1), index = (2, 1), test = 3) + ] + @test extract(rast2, poly; skipmissing=true) == [ + (geometry = (10.0, 0.1), test2 = 7) + (geometry = (10.0, 0.2), test2 = 8) + ] + T = @NamedTuple{geometry::Union{Tuple{Float64,Float64}},test::Union{Missing,Int64}} + @test all(extract(rast_m, poly) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + + @testset "Vector of polygons" begin + ex = extract(rast_m, [poly, poly, poly]) + @test eltype(ex) == T + @test all(ex .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + end + + @testset "LineString" begin + @test extract(rast, linestring) == T[ + (geometry=(9.0, 0.1), test=1) + (geometry=(10.0, 0.1), test=3) + ] + end +end + +@testset "Extract a linestring" begin + T = @NamedTuple{geometry::Tuple{Float64,Float64},test::Union{Missing,Int64}} + Tsm = @NamedTuple{geometry::Tuple{Float64,Float64},test::Int64} + + @test all(extract(rast, l) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) +end + +@testset "Table" begin + T = @NamedTuple{geometry::Union{Missing, Tuple{Float64, Float64}}, test::Union{Missing, Int64}} + @test all(extract(rast, table) .=== T[ + (geometry = missing, test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.3), test = missing) + (geometry = (10.0, 0.2), test = 4) + ]) + @test extract(rast, table; skipmissing=true) == [ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.2), test = 4) + ] + @test extract(rast, table; skipmissing=true, geometry=false) == [ + (test = 1,) + (test = 2,) + (test = 4,) + ] + @test extract(rast, table; skipmissing=true, geometry=false, index=true) == [ + (index = (1, 1), test = 1,) + (index = (1, 2), test = 2,) + (index = (2, 2), test = 4,) + ] + @test_throws ArgumentError extract(rast, (foo = zeros(4),)) +end + +@testset "Empty geoms" begin + @test extract(rast, []) == NamedTuple{(:geometry, :test),Tuple{Missing,Missing}}[] + @test extract(rast, []; geometry=false) == NamedTuple{(:test,),Tuple{Missing}}[] +end + +#= =# +using BenchmarkTools +using ProfileView + +dimz = (X(5.0:0.1:15.0; sampling=Intervals(Start())), Y(-0.1:0.01:0.5; sampling=Intervals(Start()))) +rast_m = Raster(rand([missing, 1, 2], dimz); name=:test, missingval=missing) +rast = Raster(rand(Int, dimz); name=:test2, missingval=5) +st = RasterStack((a=rast, b=rast, c=Float32.(rast))) + +ks = ntuple(x -> gensym(), 100) +layers = NamedTuple{ks}(ntuple(x -> rast2, length(ks))) +st100 = RasterStack(layers) + +poly = GI.Polygon([[(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]]) +linestring = GI.LineString([(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]) +line = GI.Line([(8.0, 0.0), (12.0, 4.0)]) +polys = [poly for i in 1:2] +lines = [line for i in 1:2] +linestrings = [linestring for i in 1:2] + +extract(rast, linestring) +extract(rast, poly) +@time extract(rast, lines; geometry=false, threaded=true, flatten=true) +@time extract(st, lines; geometry=false, threaded=true, flatten=false) +@time extract(st, lines; geometry=false, threaded=false, flatten=false) |> length +@time extract(rast_m, lines; geometry=false, threaded=true, flatten=false) |> length +@time extract(st, lines; geometry=false, threaded=false, flatten=false) |> length +@time extract(st100, lines; geometry=false, threaded=false, flatten=false) |> length +@time extract(st100, lines; geometry=false, threaded=true, flatten=false) |> length +@time extract(rast, lines; geometry=false, threaded=true, flatten=true) +@time extract(rast, lines; threaded=true, flatten=true, progress=false) +@time extract(rast, lines; threaded=false, flatten=false, progress=false); +@benchmark extract(rast, lines; threaded=false, flatten=true) +@benchmark extract(rast, lines; threaded=true, flatten=true) +@benchmark extract(rast, lines; threaded=false, flatten=false) +@benchmark extract(rast, lines; threaded=true, flatten=false) +@profview +@time extract(rast, polys; flatten=false); +extract(rast, polys; flatten=true); +extract(st100, linestring) +extract(st100, poly) + +@b extract(rast2, linestring) +@b extract(rast, linestring) +@b extract(rast, poly) +@b extract(rast, polies) +@b extract(rast, polies; flatten=false) +@b extract(st26, linestring) +@b extract(st26, poly) + + +f(rast, ls, n; skipmissing=true) = for _ in 1:n extract(rast, ls; skipmissing) end +@profview f(rast2, polies, 10000; skipmissing=false) +@profview f(rast2, poly, 10000; skipmissing=false) +@profview f(rast, poly, 10000; skipmissing=false) +@profview f(rast2, linestring, 10000; skipmissing=false) +@profview f(rast, linestring, 10000; skipmissing=false) +@profview f(rast2, linestring, 10000) +@profview f(st, linestring, 100000; skipmissing=false) +@profview f(st, linestring, 100000; skipmissing=true) +@profview f(st2, linestring, 100000) +@profview f(st26, linestring, 1000) +@profview f(st26, line, 1000) +@profview f(st26, poly, 10000; skipmissing=true) +@profview f(st26, linestring, 1000; skipmissing=true) +@profview f(st26, linestring, 1000; skipmissing=false) + +# Demo plots +using GLMakie + +Makie.plot(rast; colormap=:Reds) +Makie.plot!(polys) +points = getindex.(extract(rast, poly; index=true, flatten=true), :geometry) +Makie.scatter!(points; color=(:yellow, 0.5)) + +Makie.plot(rast; colormap=:Reds) +Makie.plot!(polys) +points = getindex.(extract(rast, poly; index=true, flatten=true, boundary=:touches), :geometry) +Makie.scatter!(points; color=(:yellow, 0.5)) + +Makie.plot(rast; colormap=:Reds) +Makie.plot!(linestring; color=:violet, linewidth=5) +points = getindex.(extract(rast, linestring; index=true), :geometry) +Makie.scatter!(points; color=:yellow) + + +#= =# From 869a9ab4b14f573e624255993028bc3ae8b82be8 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 1 Dec 2024 15:41:45 +0100 Subject: [PATCH 04/11] flatten --- src/methods/extract.jl | 73 ++++++++++++++++++------------------------ test/extract.jl | 39 +++++++++++----------- 2 files changed, 53 insertions(+), 59 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 51fa1e316..3c9df3df5 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -201,40 +201,23 @@ function _extract(A::RasterStackOrArray, ::Nothing, geoms::AbstractArray, e::Ext allpoints && return rows end - return if istrue(e.flatten) - if threaded - thread_rows = [T[] for _ in 1:Threads.nthreads()] - # thread_line_refs = [LineRefs{eltype(e)}() for _ in 1:Threads.nthreads()] - _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i - id = Threads.threadid() - # line_refs = thread_line_refs[id] - rows = _extract(A, geoms[i], e; kw...) - append!(thread_rows[id], rows) - end - l = sum(map(length, thread_rows)) - rows = Vector{T}(undef, l) - i = 1 - for rs in thread_rows - for r in rs - rows[i] = r - i += 1 - end - end - rows - else - line_refs = LineRefs{eltype(e)}() - rows_ref = Ref{Vector{T}}() - _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i - rows_ref[] = _extract(A, geoms[i], e; line_refs, kw...) + row_vecs = Vector{Vector{T}}(undef, length(geoms)) + _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i + row_vecs[i] = _extract(A, geoms[i], e; kw...) + end + if istrue(e.flatten) + n = sum(map(length, row_vecs)) + rows = Vector{T}(undef, n) + i = 1 + for rs in row_vecs + for r in rs + rows[i] = r + i += 1 end - rows_ref[] end + return rows else - row_vecs = Vector{Vector{T}}(undef, length(geoms)) - _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i - row_vecs[i] = _extract(A, geoms[i], e; kw...) - end - row_vecs + return row_vecs end end function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom, e; kw...) @@ -283,14 +266,9 @@ end A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom, e; kw... ) # Make a raster mask of the geometry - ods = otherdims(A, DEFAULT_POINT_ORDER) - template = view(A, Touches(GI.extent(geom))) - if length(ods) > 0 - template = view(template, map(d -> rebuild(d, firstindex(d)), ods)) - end - B = boolmask(geom; to=template, burncheck_metadata=NoMetadata(), kw...) + dims, offset = _template(A, geom) + B = boolmask(geom; to=dims, burncheck_metadata=NoMetadata(), kw...) n = count(B) - offset = _offset(template) dp = DimPoints(A) i = 1 rows = _init_rows(e, n) @@ -304,6 +282,22 @@ end return rows end +function _template(x, geom) + ods = otherdims(x, DEFAULT_POINT_ORDER) + # Build a dummy raster in case this is a stack + # views are just easier on an array than dims + t1 = Raster(FillArrays.Zeros(size(x)), dims(x)) + t2 = if length(ods) > 0 + view(t1, map(d -> rebuild(d, firstindex(d)), ods)) + else + t1 + end + I = dims2indices(dims(t2), Touches(GI.extent(geom))) + t3 = view(t2, I...) + offset = CartesianIndex(map(first, I)) + return dims(t3), offset +end + Base.@assume_effects :foldable function _maybe_set_row!( rows, skipmissing::_True, e, A, dp, offset, I, i; props=_prop_nt(e, A, I) @@ -447,6 +441,3 @@ Base.@assume_effects :total function _maybe_add_fields( :index in K ? merge((; index=I), props) : props end |> T end - -_offset(template) = - CartesianIndex(map(x -> first(x) - 1, parent(template).indices)) diff --git a/test/extract.jl b/test/extract.jl index d79ff5ad0..37638b149 100644 --- a/test/extract.jl +++ b/test/extract.jl @@ -49,10 +49,10 @@ table = (geometry=points, foo=zeros(4)) (geometry = (Y = 0.3, X = 10.0), test = missing) ]) # Vector points - @test all(extract(rast, [[9.0, 0.1], [10.0, 0.2]]) .== [ + @test extract(rast, [[9.0, 0.1], [10.0, 0.2]] == [ (geometry = [9.0, 0.1], test = 1) (geometry = [10.0, 0.2], test = 4) - ]) + ] end @testset "From RasterStack" begin @@ -225,9 +225,13 @@ end @test extract(rast, []; geometry=false) == NamedTuple{(:test,),Tuple{Missing}}[] end -#= =# +#= using BenchmarkTools using ProfileView +using Rasters +using DataFrames +import GeoInterface as GI +using Rasters.Lookups, Rasters.Dimensions dimz = (X(5.0:0.1:15.0; sampling=Intervals(Start())), Y(-0.1:0.01:0.5; sampling=Intervals(Start()))) rast_m = Raster(rand([missing, 1, 2], dimz); name=:test, missingval=missing) @@ -235,28 +239,28 @@ rast = Raster(rand(Int, dimz); name=:test2, missingval=5) st = RasterStack((a=rast, b=rast, c=Float32.(rast))) ks = ntuple(x -> gensym(), 100) -layers = NamedTuple{ks}(ntuple(x -> rast2, length(ks))) -st100 = RasterStack(layers) +st100 = RasterStack(NamedTuple{ks}(ntuple(x -> rast, length(ks)))) poly = GI.Polygon([[(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]]) linestring = GI.LineString([(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]) line = GI.Line([(8.0, 0.0), (12.0, 4.0)]) -polys = [poly for i in 1:2] -lines = [line for i in 1:2] -linestrings = [linestring for i in 1:2] +polys = [poly for i in 1:10000] +lines = [line for i in 1:100000] +linestrings = [linestring for i in 1:100000] extract(rast, linestring) -extract(rast, poly) -@time extract(rast, lines; geometry=false, threaded=true, flatten=true) -@time extract(st, lines; geometry=false, threaded=true, flatten=false) -@time extract(st, lines; geometry=false, threaded=false, flatten=false) |> length +extract(rast, polys; threaded=false) +@time extract(rast, polys; geometry=false, threaded=true, flatten=true) |> length +@time extract(rast, lines; geometry=false, threaded=true, flatten=true) |> length +@time extract(st, polys; geometry=false, threaded=true, flatten=true) |> length +@time extract(st, lines; geometry=false, threaded=true, flatten=true) |> length + @time extract(rast_m, lines; geometry=false, threaded=true, flatten=false) |> length -@time extract(st, lines; geometry=false, threaded=false, flatten=false) |> length @time extract(st100, lines; geometry=false, threaded=false, flatten=false) |> length @time extract(st100, lines; geometry=false, threaded=true, flatten=false) |> length -@time extract(rast, lines; geometry=false, threaded=true, flatten=true) -@time extract(rast, lines; threaded=true, flatten=true, progress=false) -@time extract(rast, lines; threaded=false, flatten=false, progress=false); +@time extract(rast, lines; geometry=false, threaded=true, flatten=true) |> length +@time extract(rast, lines; threaded=true, flatten=true, progress=false) |> length +@time extract(rast, lines; threaded=false, flatten=false, progress=false) |> length @benchmark extract(rast, lines; threaded=false, flatten=true) @benchmark extract(rast, lines; threaded=true, flatten=true) @benchmark extract(rast, lines; threaded=false, flatten=false) @@ -310,5 +314,4 @@ Makie.plot!(linestring; color=:violet, linewidth=5) points = getindex.(extract(rast, linestring; index=true), :geometry) Makie.scatter!(points; color=:yellow) - -#= =# +=# From 5c2e582efb09e7cfe03a2bf117db13101f220ac6 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 1 Dec 2024 19:27:43 +0100 Subject: [PATCH 05/11] bugfixes --- src/methods/burning/line.jl | 1 + src/methods/extract.jl | 69 ++++++++++------ test/extract.jl | 161 +++++++++++++++++++++++++++++++----- 3 files changed, 185 insertions(+), 46 deletions(-) diff --git a/src/methods/burning/line.jl b/src/methods/burning/line.jl index 6dc8c0baf..63d3d154c 100644 --- a/src/methods/burning/line.jl +++ b/src/methods/burning/line.jl @@ -91,6 +91,7 @@ end function _burn_line!(f::Function, c::Function, dims::Tuple, line::NamedTuple) xdim, ydim = dims + @show xdim ydim di = DimIndices(dims) @assert xdim isa XDim diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 3c9df3df5..b48e7ae4a 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -180,16 +180,29 @@ function _extract(A::RasterStackOrArray, ::Nothing, geoms::AbstractArray, e::Ext i = 1 rows = Vector{T}(undef, length(geoms)) if istrue(e.skipmissing) - i = 1 - for i in eachindex(geoms) - g = geoms[i] - ismissing(g) && continue - geomtrait(g) isa GI.PointTrait || break - i += _extract_point!(rows, A, g, e, i; kw...) + if threaded + nonmissing = Vector{Bool}(undef, length(geoms)) + _run(1:length(geoms), threaded, progress, "Extracting points...") do i + g = geoms[i] + geomtrait(g) isa GI.PointTrait || return nothing + nonmissing[i] = _extract_point!(rows, A, g, e, i; kw...)::T + return nothing + end + # This could use less memory if we reuse `rows` and shorten it + rows = rows[nonmissing] + else + j = Ref(1) + # For non-threaded be more memory efficient + _run(1:length(geoms), threaded, progress, "Extracting points...") do _ + g = geoms[j[]] + geomtrait(g) isa GI.PointTrait || return nothing + j[] += _extract_point!(rows, A, g, e, i; kw...)::T + return nothing + end + deleteat!(rows, j[]:length(rows)) end - deleteat!(rows, i:length(rows)) else - _run(1:length(geoms)) do i + _run(1:length(geoms), threaded, progress, "Extracting points...") do i g = geoms[i] geomtrait(g) isa GI.PointTrait || return nothing _extract_point!(rows, A, g, e, i; kw...)::T @@ -241,14 +254,18 @@ end kw... ) where T _initialise!(line_refs) - offset = CartesianIndex((0, 0)) + # Subset/offst is not really needed for line buring + # But without it we can get different fp errors + # to polygons and eng up with lines in different + # places when they are right on the line. + dims, offset = _template(A, geom) dp = DimPoints(A) function _length_callback(n) rows = line_refs.rows resize!(rows, n + line_refs.i - 1) end - _burn_lines!(_length_callback, dims(A), geom) do D + _burn_lines!(_length_callback, dims, geom) do D I = CartesianIndex(map(val, D)) # Avoid duplicates from adjacent line segments line_refs.prev == I && return nothing @@ -259,7 +276,7 @@ end return nothing end rows = line_refs.rows - deleteat!(rows, line_refs.i+1:length(rows)) + deleteat!(rows, line_refs.i:length(rows)) return rows end @noinline function _extract( @@ -294,18 +311,18 @@ function _template(x, geom) end I = dims2indices(dims(t2), Touches(GI.extent(geom))) t3 = view(t2, I...) - offset = CartesianIndex(map(first, I)) + offset = CartesianIndex(map(i -> first(i) - 1, I)) return dims(t3), offset end Base.@assume_effects :foldable function _maybe_set_row!( rows, skipmissing::_True, e, A, dp, offset, I, i; - props=_prop_nt(e, A, I) ) - return if !_ismissingval(A, props) - _maybe_set_row!(rows, _False(), e, A, dp, offset, I; props) - else + props = _prop_nt(e, A, I) + return if _ismissingval(A, props) 0 + else + _maybe_set_row!(rows, _False(), e, A, dp, offset, I, i; props) end end Base.@assume_effects :foldable function _maybe_set_row!( @@ -314,7 +331,6 @@ Base.@assume_effects :foldable function _maybe_set_row!( ) where T Ioff = I + offset geom = dp[Ioff] - i <= length(rows) || @show i length(rows) rows[i] = _maybe_add_fields(T, props, geom, Tuple(Ioff)) return 1 end @@ -333,11 +349,11 @@ Base.@assume_effects :foldable _ismissingval(mv, prop)::Bool = (mv === prop) # We always return NamedTuple Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, st::AbstractRasterStack, I) where {P,K} = - P(st[K][I])::P + st[K][I] Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, st::AbstractRasterStack{K}, I) where {P,K} = - P(st[I])::P + st[I] Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, A::AbstractRaster, I) where {P,K} = - P((A[I],))::P + NamedTuple{K}((A[I],)) # Extract a single point # Missing point to remove @@ -345,7 +361,7 @@ Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, A::AbstractRaste rows::Vector{T}, x::RasterStackOrArray, skipmissing::_True, point::Missing, i; kw... ) where T - return 0 + return false end # Missing point to keep @inline function _extract_point!( @@ -355,7 +371,8 @@ end props = map(_ -> missing, names) geom = missing I = missing - return _maybe_add_fields(T, props, geom, I) + rows[i] = _maybe_add_fields(T, props, geom, I) + return true end # Normal point with missing / out of bounds data removed @inline function _extract_point!( @@ -381,17 +398,17 @@ end D = map(rebuild, selector_dims, I) if x isa Raster prop = x[D] - _ismissingval(missingval(x), prop) && return 0 + _ismissingval(missingval(x), prop) && return false props = NamedTuple{K,Tuple{eltype(x)}}((prop,)) else props = x[D][K] - _ismissingval(missingval(x), props) && return 0 + _ismissingval(missingval(x), props) && return false end rows[i] = _maybe_add_fields(T, props, point, I) - return 1 + return true end end - return 0 + return false end # Normal point with missing / out of bounds data kept with `missing` fields @inline function _extract_point!( diff --git a/test/extract.jl b/test/extract.jl index 37638b149..302618d1e 100644 --- a/test/extract.jl +++ b/test/extract.jl @@ -12,8 +12,8 @@ st = RasterStack(rast, rast2) points = [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3), (10.0, 0.2)] poly = GI.Polygon([[(8.0, 0.0), (11.0, 0.0), (11.0, 0.4), (8.0, 0.0)]]) -linestring = GI.LineString([(8.0, 0.0), (11.0, 0.0), (11.0, 0.4)]) -line = GI.Line([(8.0, 0.0), (12.0, 4.0)]) +linestring = GI.LineString([(8.0, 0.0), (9.5, 0.0), (10.0, 0.4)]) +line = GI.Line([(8.0, 0.0), (12.0, 0.4)]) table = (geometry=points, foo=zeros(4)) @testset "Points" begin @@ -173,24 +173,132 @@ end (geometry = (10.0, 0.2), test = missing) ]) end - - @testset "LineString" begin - @test extract(rast, linestring) == T[ - (geometry=(9.0, 0.1), test=1) - (geometry=(10.0, 0.1), test=3) - ] - end end @testset "Extract a linestring" begin T = @NamedTuple{geometry::Tuple{Float64,Float64},test::Union{Missing,Int64}} Tsm = @NamedTuple{geometry::Tuple{Float64,Float64},test::Int64} + linestrings = [linestring, linestring] + fc = GI.FeatureCollection(map(GI.Feature, [linestring, linestring])) + + # Single LineString + @test extract(rast, linestring) isa Vector{T} + @test all(extract(rast_m, linestring) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + @test all(extract(rast_m, linestring; skipmissing=true) .=== Tsm[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + ]) - @test all(extract(rast, l) .=== T[ + # Multiple linstrings + # Test all combinations of skipmissing, flatten and threaded + @test extract(rast_m, linestrings; skipmissing=true) isa Vector{Tsm} + @test extract(rast_m, fc; skipmissing=true) == + extract(rast_m, fc; skipmissing=true, threaded=true) == + extract(rast_m, linestrings; skipmissing=true) == + extract(rast_m, linestrings; skipmissing=true, threaded=true) == Tsm[ (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + ] + + @test extract(rast_m, linestrings; skipmissing=false) isa Vector{T} + @test all( + extract(rast_m, fc; skipmissing=false) .=== + extract(rast_m, fc; skipmissing=false, threaded=true) .=== + extract(rast_m, linestrings; skipmissing=false) .=== + extract(rast_m, linestrings; skipmissing=false, threaded=true) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) + ]) + + @test extract(rast_m, linestrings; skipmissing=true, flatten=false) isa Vector{Vector{Tsm}} + @test extract(rast_m, linestrings; skipmissing=true, flatten=false) == + extract(rast_m, linestrings; skipmissing=true, flatten=false, threaded=true) == Vector{Tsm}[ + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], + ] + + # Nested Vector holding missing needs special handling to check equality + @test extract(rast_m, linestrings; skipmissing=false, flatten=false) isa Vector{Vector{T}} + ref = Vector{T}[ + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)] , + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], + ] + matching(a, b) = all(map(===, a, b)) + @test all(map(matching, extract(rast_m, linestrings; skipmissing=false, flatten=false, threaded=false), ref)) + @test all(map(matching, extract(rast_m, linestrings; skipmissing=false, flatten=false, threaded=true), ref)) +end + +@testset "Extract a line" begin + T = @NamedTuple{geometry::Tuple{Float64,Float64},test::Union{Missing,Int64}} + Tsm = @NamedTuple{geometry::Tuple{Float64,Float64},test::Int64} + lines = [line, line] + fc = GI.FeatureCollection(map(GI.Feature, [line, line])) + + # Single LineString + @test extract(rast, line) isa Vector{T} + @test all(extract(rast_m, line) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) (geometry = (10.0, 0.2), test = missing) ]) + @test all(extract(rast_m, line; skipmissing=true) .=== Tsm[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + ]) + + # Multiple linstrings + # Test all combinations of skipmissing, flatten and threaded + @test extract(rast_m, lines; skipmissing=true) isa Vector{Tsm} + @test extract(rast_m, fc; skipmissing=true) == + extract(rast_m, fc; skipmissing=true, threaded=true) == + extract(rast_m, lines; skipmissing=true) == + extract(rast_m, lines; skipmissing=true, threaded=true) == Tsm[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + ] + + @test extract(rast_m, lines; skipmissing=false) isa Vector{T} + @test all( + extract(rast_m, fc; skipmissing=false) .=== + extract(rast_m, fc; skipmissing=false, threaded=true) .=== + extract(rast_m, lines; skipmissing=false) .=== + extract(rast_m, lines; skipmissing=false, threaded=true) .=== T[ + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (9.0, 0.2), test = 2) + (geometry = (10.0, 0.2), test = missing) + ]) + + @test extract(rast_m, lines; skipmissing=true, flatten=false) isa Vector{Vector{Tsm}} + @test extract(rast_m, lines; skipmissing=true, flatten=false) == + extract(rast_m, lines; skipmissing=true, flatten=false, threaded=true) == Vector{Tsm}[ + [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)], + [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)], + ] + + # Nested Vector holding missing needs special handling to check equality + @test extract(rast_m, lines; skipmissing=false, flatten=false) isa Vector{Vector{T}} + ref = Vector{T}[ + [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2), (geometry = (10.0, 0.2), test = missing)] , + [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2), (geometry = (10.0, 0.2), test = missing)], + ] + matching(a, b) = all(map(===, a, b)) + @test all(map(matching, extract(rast_m, lines; skipmissing=false, flatten=false, threaded=false), ref)) + @test all(map(matching, extract(rast_m, lines; skipmissing=false, flatten=false, threaded=true), ref)) end @testset "Table" begin @@ -226,8 +334,11 @@ end end #= +Some benchmark and plotting code + using BenchmarkTools using ProfileView +using Chairmarks using Rasters using DataFrames import GeoInterface as GI @@ -237,7 +348,6 @@ dimz = (X(5.0:0.1:15.0; sampling=Intervals(Start())), Y(-0.1:0.01:0.5; sampling= rast_m = Raster(rand([missing, 1, 2], dimz); name=:test, missingval=missing) rast = Raster(rand(Int, dimz); name=:test2, missingval=5) st = RasterStack((a=rast, b=rast, c=Float32.(rast))) - ks = ntuple(x -> gensym(), 100) st100 = RasterStack(NamedTuple{ks}(ntuple(x -> rast, length(ks)))) @@ -279,7 +389,6 @@ extract(st100, poly) @b extract(st26, linestring) @b extract(st26, poly) - f(rast, ls, n; skipmissing=true) = for _ in 1:n extract(rast, ls; skipmissing) end @profview f(rast2, polies, 10000; skipmissing=false) @profview f(rast2, poly, 10000; skipmissing=false) @@ -296,22 +405,34 @@ f(rast, ls, n; skipmissing=true) = for _ in 1:n extract(rast, ls; skipmissing) e @profview f(st26, linestring, 1000; skipmissing=true) @profview f(st26, linestring, 1000; skipmissing=false) + + # Demo plots using GLMakie +using GeoInterfaceMakie + +Makie.plot(rast; colormap=:Reds) +Makie.plot!(rast[Touches(GI.extent(first(polys)))] ; colormap=:Greens) +Makie.plot!(polys; alpha=0.2) +ps = getindex.(extract(rast, poly; index=true, flatten=true), :geometry) +Makie.scatter!(ps; color=:yellow) Makie.plot(rast; colormap=:Reds) -Makie.plot!(polys) -points = getindex.(extract(rast, poly; index=true, flatten=true), :geometry) -Makie.scatter!(points; color=(:yellow, 0.5)) +Makie.plot!(rast[Touches(GI.extent(first(polys)))] ; colormap=:Greens) +Makie.plot!(poly; alpha=0.7) +ps = getindex.(extract(rast, poly; index=true, flatten=true, boundary=:touches), :geometry); +Makie.scatter!(ps; color=:pink) Makie.plot(rast; colormap=:Reds) -Makie.plot!(polys) -points = getindex.(extract(rast, poly; index=true, flatten=true, boundary=:touches), :geometry) -Makie.scatter!(points; color=(:yellow, 0.5)) +Makie.plot!(rast[Touches(GI.extent(first(polys)))] ; colormap=:Greens) +Makie.plot!(polys; alpha=0.7) +ps = getindex.(extract(rast, poly; index=true, flatten=true, boundary=:inside), :geometry) +Makie.scatter!(ps; color=:pink) Makie.plot(rast; colormap=:Reds) +Makie.plot!(rast[Touches(GI.extent(first(polys)))] ; colormap=:Greens) Makie.plot!(linestring; color=:violet, linewidth=5) -points = getindex.(extract(rast, linestring; index=true), :geometry) -Makie.scatter!(points; color=:yellow) +ps = getindex.(extract(rast, linestring; index=true), :geometry); +Makie.scatter!(ps; color=:yellow) =# From 9ab06aa69fa818895117dceb677e87bc01173103 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 1 Dec 2024 19:41:54 +0100 Subject: [PATCH 06/11] some more comments --- src/methods/burning/array_init.jl | 2 +- src/methods/burning/line.jl | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/methods/burning/array_init.jl b/src/methods/burning/array_init.jl index 8cc8a6200..5f130b992 100644 --- a/src/methods/burning/array_init.jl +++ b/src/methods/burning/array_init.jl @@ -7,7 +7,7 @@ _init_bools(to, T::Type; kw...) = _init_bools(to, T, nothing; kw...) _init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = _init_bools(dims(first(to)), T, data; kw...) _init_bools(to::AbstractRasterStack, T::Type, data; kw...) = - _init_bools(first(to), dims(to), T, data; kw...) + _init_bools(dims(to), dims(to), T, data; kw...) _init_bools(to::AbstractRaster, T::Type, data; kw...) = _init_bools(to, dims(to), T, data; kw...) _init_bools(to::Extents.Extent, T::Type, data; kw...) = diff --git a/src/methods/burning/line.jl b/src/methods/burning/line.jl index 63d3d154c..193f208b1 100644 --- a/src/methods/burning/line.jl +++ b/src/methods/burning/line.jl @@ -1,6 +1,6 @@ # _burn_lines! # Fill a raster with `fill` where pixels touch lines in a geom -# Separated for a type stability function barrier +# Usually `fill` is `true` of `false` function _burn_lines!( B::AbstractRaster, geom; fill=true, verbose=false, kw... ) @@ -19,8 +19,7 @@ function _burn_lines!( """ all(regular) || throw(ArgumentError(msg)) - # For arbitrary dimension indexing - + # Set indices of B as `fill` when a cell is found to burn. _burn_lines!(identity, dims(B), geom) do D @inbounds B[D] = fill end @@ -87,8 +86,11 @@ end # Line-burning algorithm # Burns a single line into a raster with value where pixels touch a line # +# Function `f` does the actual work when passed a Dimension Tuple of a pixel to burn, +# and `c` is an initialisation callback that is passed the maximyum +# number of times `f` will be called. It may be called less than that. +# # TODO: generalise to Irregular spans? - function _burn_line!(f::Function, c::Function, dims::Tuple, line::NamedTuple) xdim, ydim = dims @show xdim ydim @@ -135,7 +137,8 @@ function _burn_line!(f::Function, c::Function, dims::Tuple, line::NamedTuple) n_on_line = 0 - # inform m of number of runs of `f` + # Pass of number of runs of `f` to callback `c` + # This can help with e.g. allocating a vector c(manhattan_distance + 1) if manhattan_distance == 0 From 55b5e7cbbb857336bf5be5694ee5e09c861f7c5a Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 1 Dec 2024 22:24:04 +0100 Subject: [PATCH 07/11] more perf tweaks --- src/methods/burning/line.jl | 1 - src/methods/extract.jl | 130 +++++++++++++++++++++++++----------- src/utils.jl | 4 ++ test/extract.jl | 50 ++++++++------ 4 files changed, 125 insertions(+), 60 deletions(-) diff --git a/src/methods/burning/line.jl b/src/methods/burning/line.jl index 193f208b1..c5d973dbe 100644 --- a/src/methods/burning/line.jl +++ b/src/methods/burning/line.jl @@ -93,7 +93,6 @@ end # TODO: generalise to Irregular spans? function _burn_line!(f::Function, c::Function, dims::Tuple, line::NamedTuple) xdim, ydim = dims - @show xdim ydim di = DimIndices(dims) @assert xdim isa XDim diff --git a/src/methods/extract.jl b/src/methods/extract.jl index b48e7ae4a..31f10a14b 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -6,7 +6,7 @@ struct Extractor{T,P,K,N<:NamedTuple{K},G,I,S,F} skipmissing::S flatten::F end -function Extractor(x, data; +Base.@constprop :aggressive @inline function Extractor(x, data; name::NTuple{<:Any,Symbol}, skipmissing, flatten, @@ -19,13 +19,18 @@ function Extractor(x, data; i = _booltype(index) sm = _booltype(skipmissing) f = _booltype(flatten) - T = _geom_rowtype(x, data; geometry=g, index=i, names=nt, skipmissing=sm) + Extractor(x, data, nt, g, i, sm, f) +end +function Extractor(x, data, nt::N, g::G, i::I, sm::S, f::F) where {N<:NamedTuple{K},G,I,S,F} where K P = _proptype(x; skipmissing=sm, names=nt) - Extractor{T,P,name,typeof(nt),typeof(g),typeof(i),typeof(sm),typeof(f)}(nt, g, i, sm, f) + T = _geom_rowtype(x, data; geometry=g, index=i, names=nt, skipmissing=sm) + Extractor{T,P,K,N,G,I,S,F}(nt, g, i, sm, f) end Base.eltype(::Extractor{T}) where T = T +# This mutable object is passed into closures as +# fields are type-stable in clusores but variables are not mutable struct LineRefs{T} i::Int prev::CartesianIndex{2} @@ -67,12 +72,12 @@ end """ extract(x, geometries; kw...) -Extracts the value of `Raster` or `RasterStack` for the passed in geometries, +Extracts the value of `Raster` or `RasterStack` for the passed in geometries, returning an `Vector{NamedTuple}` with properties for `:geometry` and `Raster` or `RasterStack` layer values. For lines, linestrings and linear rings points are extracted for each pixel that -the line touches. +the line touches. For polygons, all cells witih centers covered by the polygon are returned. @@ -86,13 +91,18 @@ $DATA_ARGUMENT # Keywords -- `geometry`: include `:geometry` column in rows. `true` by default. -- `index`: include `:index` of the `CartesianIndex` in rows, `false` by default. -- `name`: a `Symbol` or `Tuple` of `Symbol` corresponding to layer/s of a `RasterStack` to extract. - All layers are extracted by default. +- `geometry`: include a `:geometry` field in rows, which will be a + tuple point. Either the original point for points or the pixel + center point for line and polygon extract. `true` by default. +- `index`: include `:index` field of extracted points in rows, `false` by default. +- `name`: a `Symbol` or `Tuple` of `Symbol` corresponding to layer/s + of a `RasterStack` to extract. All layers are extracted by default. - `skipmissing`: skip missing points automatically. -- `flatten`: flatten extracted points from multiple geometries into a single - vector. `true` by default. Unmixed point geometries are always flattened. +- `flatten`: flatten extracted points from multiple + geometries into a single vector. `true` by default. + Unmixed point geometries are always flattened. + Flattening is slow and single threaded, `flatten=false` may be a + large performance improvement in combination with `threaded=true`. - `atol`: a tolerance for floating point lookup values for when the `Lookup` contains `Points`. `atol` is ignored for `Intervals`. $BOUNDARY_KEYWORD @@ -129,7 +139,7 @@ Note: passing in arrays, geometry collections or feature collections containing a mix of points and other geometries has undefined results. """ function extract end -@inline function extract(x::RasterStackOrArray, data; +Base.@constprop :aggressive @inline function extract(x::RasterStackOrArray, data; geometrycolumn=nothing, names::NTuple{<:Any,Symbol}=_names(x), name::NTuple{<:Any,Symbol}=names, @@ -214,26 +224,40 @@ function _extract(A::RasterStackOrArray, ::Nothing, geoms::AbstractArray, e::Ext allpoints && return rows end - row_vecs = Vector{Vector{T}}(undef, length(geoms)) - _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i - row_vecs[i] = _extract(A, geoms[i], e; kw...) + + row_vecs = Vector{Vector{T}}(undef, length(geoms)) + if threaded + thread_line_refs = [LineRefs{T}() for _ in 1:Threads.nthreads()] + _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i + line_refs = thread_line_refs[Threads.threadid()] + row_vecs[i] = _extract(A, geoms[i], e; line_refs, kw...) + end + else + line_refs = LineRefs{T}() + _run(1:length(geoms), threaded, progress, "Extracting geometries...") do i + row_vecs[i] = _extract(A, geoms[i], e; line_refs, kw...) + end end + + # TODO this is a bit slow and only on one thread if istrue(e.flatten) n = sum(map(length, row_vecs)) - rows = Vector{T}(undef, n) - i = 1 - for rs in row_vecs - for r in rs - rows[i] = r - i += 1 + out_rows = Vector{T}(undef, n) + k::Int = 1 + for row_vec in row_vecs + for row in row_vec + out_rows[k] = row + k += 1 end end - return rows + return out_rows else return row_vecs end end -function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom, e; kw...) +function _extract( + A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom, e::Extractor{T}; kw... +)::Vector{T} where T n = GI.npoint(geom) rows = _init_rows(e, n) for p in GI.getpoint(geom) @@ -246,21 +270,22 @@ function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom, e; return rows end function _extract(A::RasterStackOrArray, ::GI.PointTrait, p, e; kw...) - _extract_point!(rows, A, e, p, length(rows); kw...) + rows = _init_rows(e, 1) + _extract_point!(rows, A, e, p, 1; kw...) end @noinline function _extract( A::RasterStackOrArray, trait::GI.AbstractLineStringTrait, geom, e::Extractor{T}; - line_refs=LineRefs{T}(), + line_refs=LineRefs{T}(), kw... -) where T +)::Vector{T} where T _initialise!(line_refs) # Subset/offst is not really needed for line buring # But without it we can get different fp errors - # to polygons and eng up with lines in different + # to polygons and eng up with lines in different # places when they are right on the line. dims, offset = _template(A, geom) dp = DimPoints(A) - function _length_callback(n) + function _length_callback(n) rows = line_refs.rows resize!(rows, n + line_refs.i - 1) end @@ -280,8 +305,8 @@ end return rows end @noinline function _extract( - A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom, e; kw... -) + A::RasterStackOrArray, t::GI.AbstractGeometryTrait, geom, e::Extractor{T}; kw... +)::Vector{T} where T # Make a raster mask of the geometry dims, offset = _template(A, geom) B = boolmask(geom; to=dims, burncheck_metadata=NoMetadata(), kw...) @@ -318,8 +343,8 @@ end Base.@assume_effects :foldable function _maybe_set_row!( rows, skipmissing::_True, e, A, dp, offset, I, i; ) - props = _prop_nt(e, A, I) - return if _ismissingval(A, props) + props = _prop_nt(e, A, I, skipmissing) + return if ismissing(props) 0 else _maybe_set_row!(rows, _False(), e, A, dp, offset, I, i; props) @@ -327,7 +352,7 @@ Base.@assume_effects :foldable function _maybe_set_row!( end Base.@assume_effects :foldable function _maybe_set_row!( rows::Vector{T}, skipmissing::_False, e, A, dp, offset, I, i; - props=_prop_nt(e, A, I) + props=_prop_nt(e, A, I, skipmissing) ) where T Ioff = I + offset geom = dp[Ioff] @@ -348,12 +373,39 @@ Base.@assume_effects :foldable _ismissingval(mv, props::NamedTuple)::Bool = Base.@assume_effects :foldable _ismissingval(mv, prop)::Bool = (mv === prop) # We always return NamedTuple -Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, st::AbstractRasterStack, I) where {P,K} = - st[K][I] -Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, st::AbstractRasterStack{K}, I) where {P,K} = - st[I] -Base.@assume_effects :foldable _prop_nt(::Extractor{<:Any,P,K}, A::AbstractRaster, I) where {P,K} = - NamedTuple{K}((A[I],)) +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, st::AbstractRasterStack, I, sm::_False +)::P where {P,K} + P(st[K][I]) +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, st::AbstractRasterStack{K}, I, sm::_False +)::P where {P,K} + P(st[I]) +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, A::AbstractRaster, I, sm::_False +)::P where {P,K} + P(NamedTuple{K}((A[I],))) +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, st::AbstractRasterStack, I, sm::_True +)::Union{P,Missing} where {P,K} + x = st[K][I] + _ismissingval(A, x) ? missing : x::P +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, st::AbstractRasterStack{K}, I, sm::_True +)::Union{P,Missing} where {P,K} + x = st[I] + _ismissingval(st, x) ? missing : x::P +end +Base.@assume_effects :foldable function _prop_nt( + ::Extractor{<:Any,P,K}, A::AbstractRaster, I, sm::_True +)::Union{P,Missing} where {P,K} + x = A[I] + _ismissingval(A, x) ? missing : NamedTuple{K}((x,))::P +end # Extract a single point # Missing point to remove diff --git a/src/utils.jl b/src/utils.jl index dc2184dc5..b7e5d6e18 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -368,7 +368,11 @@ _names(A::AbstractRaster) = (Symbol(name(A)),) _names(A::AbstractRasterStack) = keys(A) using DimensionalData.Lookups: _True, _False + +_booltype(::_True) = _True() +_booltype(::_False) = _False() _booltype(x) = x ? _True() : _False() + istrue(::_True) = true istrue(::_False) = false diff --git a/test/extract.jl b/test/extract.jl index 302618d1e..dd20e030b 100644 --- a/test/extract.jl +++ b/test/extract.jl @@ -339,6 +339,7 @@ Some benchmark and plotting code using BenchmarkTools using ProfileView using Chairmarks +using Cthulhu using Rasters using DataFrames import GeoInterface as GI @@ -363,9 +364,13 @@ extract(rast, polys; threaded=false) @time extract(rast, polys; geometry=false, threaded=true, flatten=true) |> length @time extract(rast, lines; geometry=false, threaded=true, flatten=true) |> length @time extract(st, polys; geometry=false, threaded=true, flatten=true) |> length -@time extract(st, lines; geometry=false, threaded=true, flatten=true) |> length +@time extract(st, lines; geometry=false, threaded=false, flatten=true) |> length @time extract(rast_m, lines; geometry=false, threaded=true, flatten=false) |> length +@time extract(rast, lines; geometry=false, threaded=true, flatten=false) |> length +@time extract(rast_m, lines; skipmissing=true, geometry=false, threaded=true, flatten=false) |> length +@time extract(rast, lines; skipmissing=true, geometry=false, threaded=true, flatten=false) |> length +@time extract(st, lines; skipmissing=true, geometry=false, threaded=true, flatten=false) |> length @time extract(st100, lines; geometry=false, threaded=false, flatten=false) |> length @time extract(st100, lines; geometry=false, threaded=true, flatten=false) |> length @time extract(rast, lines; geometry=false, threaded=true, flatten=true) |> length @@ -375,35 +380,40 @@ extract(rast, polys; threaded=false) @benchmark extract(rast, lines; threaded=true, flatten=true) @benchmark extract(rast, lines; threaded=false, flatten=false) @benchmark extract(rast, lines; threaded=true, flatten=false) -@profview @time extract(rast, polys; flatten=false); extract(rast, polys; flatten=true); extract(st100, linestring) extract(st100, poly) -@b extract(rast2, linestring) -@b extract(rast, linestring) -@b extract(rast, poly) -@b extract(rast, polies) -@b extract(rast, polies; flatten=false) -@b extract(st26, linestring) -@b extract(st26, poly) - +@profview extract(rast, lines) +@profview extract(rast, lines; threaded=true) +@profview extract(rast, linestrings) +@profview extract(rast_m, linestrings) +@profview extract(rast, polys) +@profview extract(rast, polys; flatten=false) +@profview extract(st, linestring) +@profview extract(st, poly) + +# Profile running one function a lot. +# People will do this f(rast, ls, n; skipmissing=true) = for _ in 1:n extract(rast, ls; skipmissing) end -@profview f(rast2, polies, 10000; skipmissing=false) -@profview f(rast2, poly, 10000; skipmissing=false) +@profview f(rast, polies, 10; skipmissing=false) @profview f(rast, poly, 10000; skipmissing=false) -@profview f(rast2, linestring, 10000; skipmissing=false) +@profview f(rast_m, poly, 10000; skipmissing=false) @profview f(rast, linestring, 10000; skipmissing=false) -@profview f(rast2, linestring, 10000) +@profview f(rast_m, linestring, 10000; skipmissing=false) +@profview f(st, linestring, 10000; skipmissing=false) +@profview f(rast, linestring, 10000) @profview f(st, linestring, 100000; skipmissing=false) @profview f(st, linestring, 100000; skipmissing=true) -@profview f(st2, linestring, 100000) -@profview f(st26, linestring, 1000) -@profview f(st26, line, 1000) -@profview f(st26, poly, 10000; skipmissing=true) -@profview f(st26, linestring, 1000; skipmissing=true) -@profview f(st26, linestring, 1000; skipmissing=false) +@profview f(st100, linestring, 1000) +@profview f(st100, line, 1000) +@profview f(st100, poly, 10000; skipmissing=true) + +@profview f(st100, linestring, 1000; skipmissing=true) +@profview f(st100, linestring, 1000; skipmissing=false) +@profview f(st100, linestring, 1000; names=skipmissing=false) +keys(st100)[1:50] From aa266a288126b3e50cdc4a617294e7d8182b977c Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 1 Dec 2024 22:25:47 +0100 Subject: [PATCH 08/11] bugfix --- test/extract.jl | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/test/extract.jl b/test/extract.jl index dd20e030b..c9668d03a 100644 --- a/test/extract.jl +++ b/test/extract.jl @@ -178,8 +178,8 @@ end @testset "Extract a linestring" begin T = @NamedTuple{geometry::Tuple{Float64,Float64},test::Union{Missing,Int64}} Tsm = @NamedTuple{geometry::Tuple{Float64,Float64},test::Int64} - linestrings = [linestring, linestring] - fc = GI.FeatureCollection(map(GI.Feature, [linestring, linestring])) + linestrings = [linestring, linestring, linestring] + fc = GI.FeatureCollection(map(GI.Feature, linestrings)) # Single LineString @test extract(rast, linestring) isa Vector{T} @@ -204,6 +204,8 @@ end (geometry = (10.0, 0.1), test = 3) (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) ] @test extract(rast_m, linestrings; skipmissing=false) isa Vector{T} @@ -218,6 +220,9 @@ end (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) (geometry = (10.0, 0.2), test = missing) + (geometry = (9.0, 0.1), test = 1) + (geometry = (10.0, 0.1), test = 3) + (geometry = (10.0, 0.2), test = missing) ]) @test extract(rast_m, linestrings; skipmissing=true, flatten=false) isa Vector{Vector{Tsm}} @@ -225,6 +230,7 @@ end extract(rast_m, linestrings; skipmissing=true, flatten=false, threaded=true) == Vector{Tsm}[ [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], ] # Nested Vector holding missing needs special handling to check equality @@ -232,6 +238,7 @@ end ref = Vector{T}[ [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)] , [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], + [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], ] matching(a, b) = all(map(===, a, b)) @test all(map(matching, extract(rast_m, linestrings; skipmissing=false, flatten=false, threaded=false), ref)) @@ -242,7 +249,7 @@ end T = @NamedTuple{geometry::Tuple{Float64,Float64},test::Union{Missing,Int64}} Tsm = @NamedTuple{geometry::Tuple{Float64,Float64},test::Int64} lines = [line, line] - fc = GI.FeatureCollection(map(GI.Feature, [line, line])) + fc = GI.FeatureCollection(map(GI.Feature, lines)) # Single LineString @test extract(rast, line) isa Vector{T} From 41b4f2126acbc61c20672d6d84bd6b18a2e7f634 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 8 Dec 2024 19:40:25 +0100 Subject: [PATCH 09/11] fix extract ambiguities --- src/methods/extract.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 31f10a14b..5cc8e10a2 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -166,14 +166,14 @@ Base.@constprop :aggressive @inline function extract(x::RasterStackOrArray, data end # TODO use a GeometryOpsCore method like `applyreduce` here? -function _extract(A::RasterStackOrArray, geom::Missing, e; kw...) +function _extract(A::RasterStackOrArray, geom::Missing, e; kw...) where T return if istrue(e.skipmissing) T[] else T[_maybe_add_fields(e, map(_ -> missing, e.names), missing, missing)] end end -function _extract(A::RasterStackOrArray, geom, e; kw...) +function _extract(A::RasterStackOrArray, geom, e; kw...) where T _extract(A, GI.geomtrait(geom), geom, e; kw...) end function _extract(A::RasterStackOrArray, ::Nothing, geoms::AbstractArray, e::Extractor{T}; From 6d1d98fe8e21c084649f98b5fc3870980503c1b9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 8 Dec 2024 20:20:04 +0100 Subject: [PATCH 10/11] more ambiguity --- src/methods/extract.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index 5cc8e10a2..c3af50fd5 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -166,14 +166,14 @@ Base.@constprop :aggressive @inline function extract(x::RasterStackOrArray, data end # TODO use a GeometryOpsCore method like `applyreduce` here? -function _extract(A::RasterStackOrArray, geom::Missing, e; kw...) where T +function _extract(A::RasterStackOrArray, geom::Missing, e; kw...) return if istrue(e.skipmissing) T[] else T[_maybe_add_fields(e, map(_ -> missing, e.names), missing, missing)] end end -function _extract(A::RasterStackOrArray, geom, e; kw...) where T +function _extract(A::RasterStackOrArray, geom, e; kw...) _extract(A, GI.geomtrait(geom), geom, e; kw...) end function _extract(A::RasterStackOrArray, ::Nothing, geoms::AbstractArray, e::Extractor{T}; @@ -269,7 +269,7 @@ function _extract( end return rows end -function _extract(A::RasterStackOrArray, ::GI.PointTrait, p, e; kw...) +function _extract(A::RasterStackOrArray, ::GI.PointTrait, p, e::Extractor{T}; kw...) where T rows = _init_rows(e, 1) _extract_point!(rows, A, e, p, 1; kw...) end From e746713f7ef164ac5ad73d78f87f8f6468be0f3b Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Mon, 9 Dec 2024 23:47:54 +0900 Subject: [PATCH 11/11] open all rasters in `extract` too --- src/methods/extract.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index c3af50fd5..ca2ba6752 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -159,10 +159,11 @@ Base.@constprop :aggressive @inline function extract(x::RasterStackOrArray, data gs = _get_geometries(data, geometrycolumn) gs, first(Base.skipmissing(gs)) end - - xp = _prepare_for_burning(x) - e = Extractor(xp, g1; name, skipmissing, flatten, geometry, index) - return _extract(xp, g, e; kw...) + open(x) do xo + xp = _prepare_for_burning(xo) + e = Extractor(xp, g1; name, skipmissing, flatten, geometry, index) + return _extract(xp, g, e; kw...) + end end # TODO use a GeometryOpsCore method like `applyreduce` here?