From 913c1b3c0d11289b787eb0cbc02779fbaf193f2f Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 23 Apr 2025 10:19:40 +0200 Subject: [PATCH 01/43] Fix outbounds line burn (#951) * fix out of bounds line burning * no show * omg --- src/methods/burning/line.jl | 18 +++++++++--------- test/extract.jl | 21 +++++++-------------- test/methods.jl | 1 + 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/methods/burning/line.jl b/src/methods/burning/line.jl index 24f924f6c..f960eddf1 100644 --- a/src/methods/burning/line.jl +++ b/src/methods/burning/line.jl @@ -115,23 +115,23 @@ function _burn_line!(f::Function, c::Function, dims::Tuple, line::NamedTuple) # Ray/Slope calculations # Straight distance to the first vertical/horizontal grid boundaries if relstop.x > relstart.x - xoffset = trunc(relstart.x) - relstart.x + 1 - xmoves = trunc(Int, relstop.x) - trunc(Int, relstart.x) + xoffset = floor(relstart.x) - relstart.x + 1 + xmoves = floor(Int, relstop.x) - floor(Int, relstart.x) else - xoffset = relstart.x - trunc(relstart.x) - xmoves = trunc(Int, relstart.x) - trunc(Int, relstop.x) + xoffset = relstart.x - floor(relstart.x) + xmoves = floor(Int, relstart.x) - floor(Int, relstop.x) end if relstop.y > relstart.y - yoffset = trunc(relstart.y) - relstart.y + 1 - ymoves = trunc(Int, relstop.y) - trunc(Int, relstart.y) + yoffset = floor(relstart.y) - relstart.y + 1 + ymoves = floor(Int, relstop.y) - floor(Int, relstart.y) else - yoffset = relstart.y - trunc(relstart.y) - ymoves = trunc(Int, relstart.y) - trunc(Int, relstop.y) + yoffset = relstart.y - floor(relstart.y) + ymoves = floor(Int, relstart.y) - floor(Int, relstop.y) end manhattan_distance = xmoves + ymoves # Int starting points for the line. +1 converts to julia indexing - j, i = trunc(Int, relstart.x) + 1, trunc(Int, relstart.y) + 1 # Int + j, i = floor(Int, relstart.x) + 1, floor(Int, relstart.y) + 1 # Int n_on_line = 0 diff --git a/test/extract.jl b/test/extract.jl index cc6d08a02..349ec6594 100644 --- a/test/extract.jl +++ b/test/extract.jl @@ -211,12 +211,10 @@ end # Single LineString @test extract(rast, linestring) isa Vector{T} @test all(extract(rast_m, linestring) .=== T[ - (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) (geometry = (10.0, 0.2), test = missing) ]) @test all(extract(rast_m, linestring; skipmissing=true) .=== Tsm[ - (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) ]) @@ -227,11 +225,8 @@ end extract(rast_m, fc; skipmissing=true, threaded=true) == extract(rast_m, linestrings; skipmissing=true) == extract(rast_m, linestrings; skipmissing=true, threaded=true) == Tsm[ - (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) - (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) - (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) ] @@ -241,13 +236,10 @@ end extract(rast_m, fc; skipmissing=false, threaded=true) .=== extract(rast_m, linestrings; skipmissing=false) .=== extract(rast_m, linestrings; skipmissing=false, threaded=true) .=== T[ - (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) (geometry = (10.0, 0.2), test = missing) - (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) (geometry = (10.0, 0.2), test = missing) - (geometry = (9.0, 0.1), test = 1) (geometry = (10.0, 0.1), test = 3) (geometry = (10.0, 0.2), test = missing) ]) @@ -255,21 +247,22 @@ end @test extract(rast_m, linestrings; skipmissing=true, flatten=false) isa Vector{Vector{Tsm}} @test extract(rast_m, linestrings; skipmissing=true, flatten=false) == extract(rast_m, linestrings; skipmissing=true, flatten=false, threaded=true) == Vector{Tsm}[ - [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], - [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], - [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3)], + [(geometry = (10.0, 0.1), test = 3)], + [(geometry = (10.0, 0.1), test = 3)], + [(geometry = (10.0, 0.1), test = 3)], ] # Nested Vector holding missing needs special handling to check equality @test extract(rast_m, linestrings; skipmissing=false, flatten=false) isa Vector{Vector{T}} ref = Vector{T}[ - [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)] , - [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], - [(geometry = (9.0, 0.1), test = 1), (geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], + [(geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)] , + [(geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], + [(geometry = (10.0, 0.1), test = 3), (geometry = (10.0, 0.2), test = missing)], ] matching(a, b) = all(map(===, a, b)) @test all(map(matching, extract(rast_m, linestrings; skipmissing=false, flatten=false, threaded=false), ref)) @test all(map(matching, extract(rast_m, linestrings; skipmissing=false, flatten=false, threaded=true), ref)) + end @testset "Extract a line" begin diff --git a/test/methods.jl b/test/methods.jl index 864719a2f..8722a1701 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -364,6 +364,7 @@ end @test !zonal(x -> x isa Raster, rast; of=polygon, skipmissing=true) @test zonal(x -> x isa Raster, rast; of=polygon, skipmissing=false) end + end @testset "zonal return missing" begin From 7574a8f14802f39b8afe43ddbb54844e90836994 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 23 Apr 2025 10:47:33 +0200 Subject: [PATCH 02/43] bump patch version to 0.14.4 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 392bf01df..210753fee 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Rasters" uuid = "a3a2b9e3-a471-40c9-b274-f788e487c689" authors = ["Rafael Schouten "] -version = "0.14.3" +version = "0.14.4" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From f1c4b75d70ab5401648a4876eeadc2b0ae7d022b Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Thu, 24 Apr 2025 11:55:22 +0300 Subject: [PATCH 03/43] use the correct `@example` label --- docs/src/tutorials/spatial_mean.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorials/spatial_mean.md b/docs/src/tutorials/spatial_mean.md index a52fa3cbf..c3cc51606 100644 --- a/docs/src/tutorials/spatial_mean.md +++ b/docs/src/tutorials/spatial_mean.md @@ -147,7 +147,7 @@ As a next step, we would like to know how precipitation will change in Chile unt To start, we define a simple function that takes an SSP (socioeconomic scenario) and a GCM (climate model) as input, and return the appropriate climate data. -````@example zonal +````@example cellarea using Dates getfutureprec(ssp, gcm) = Raster(WorldClim{Future{Climate, CMIP6, gcm, ssp}}, :prec, date = Date(2090)) ```` From a99c80070e0d59ddf3ffe1c0366d0d38b144de18 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 3 May 2025 19:59:50 +0200 Subject: [PATCH 04/43] start cdm read overhaul --- Project.toml | 4 +- src/geometry_lookup/io.jl | 174 ++++++++++++--------------------- src/geometry_lookup/nc_io.jl | 29 +++++- src/sources/commondatamodel.jl | 167 ++++++++++++++++++++----------- src/stack.jl | 8 +- 5 files changed, 203 insertions(+), 179 deletions(-) diff --git a/Project.toml b/Project.toml index d25c8f06c..9cdf74d0e 100644 --- a/Project.toml +++ b/Project.toml @@ -71,8 +71,8 @@ GeoDataFrames = "0.3" GeoFormatTypes = "0.4" GeoInterface = "1.0" GeometryBasics = "0.4" -GeometryOps = "0.1.19" -GeometryOpsCore = "0.1.1" +GeometryOps = "0.1" +GeometryOpsCore = "0.1" Makie = "0.20, 0.21, 0.22" Missings = "0.4, 1" NCDatasets = "0.13, 0.14" diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index 4986f99db..0a0accf5f 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -88,7 +88,6 @@ function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geo current_ring_index = 1 for (i, geom) in enumerate(geoms) - this_geom_npoints = GI.npoint(geom) # Bear in mind, that the last point (which == first point) # of the linear ring is removed when encoding, so not included @@ -153,48 +152,9 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension{<: GeometryLookup}) end =# - -function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, ds, geometry_container_attribs; crs = nothing) - # First of all, we assert certain things about the geometry container and what it has. - @assert haskey(ds, geometry_container_attribs["node_count"]) - node_count_var = ds[geometry_container_attribs["node_count"]] - # only(CDM.dimnames(node_count_var)) != u_dim_name && throw(ArgumentError("node_count variable $u_dim_name does not match the unknown dimension $u_dim_name")) - - # Load and create all the data we need. - node_count = collect(node_count_var) - node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) - - # We can take a fast path for polygons, if we know that there are no multipart polygons. - if !haskey(geometry_container_attribs, "part_node_count") - node_count_stops = cumsum(node_count) - node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] - return map(node_count_starts, node_count_stops) do start, stop - GI.Polygon([GI.LinearRing(node_coordinates[start:stop]; crs)]; crs) - end - end - - - part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) - interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) - - # First, we assemble all the rings. That's the slightly complex part. - # After rings are assembled, we assemble the polygons and multipolygons from the rings. - - # Initialize variables for ring assembly - start = 1 - stop = part_node_count[1] - rings = [node_coordinates[start:stop]] - push!(rings[end], node_coordinates[start]) - - # Assemble all rings - for i in 2:length(part_node_count) - start = stop + 1 - stop = start + part_node_count[i] - 1 - push!(rings, node_coordinates[start:stop]) - # Ensure rings are closed by adding the first point at the end - push!(rings[end], node_coordinates[start]) - end - +function _geometry_cf_decode(::GI.MultiPolygonTrait, ds, geometry; crs=nothing) + rings = _split_inner_geoms(geometry) + interior_ring = geometry[:interior_ring] # Now, we proceed to assemble the polygons and multipolygons from the rings. # TODO: no better way to get the tuple type, at least for now. _lr = GI.LinearRing([(0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0)]; crs) @@ -206,12 +166,10 @@ function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, ds, for (geom_idx, total_nodes) in enumerate(node_count) # Find all rings that belong to this polygon polygon_rings = Tuple{typeof(_lr), Int8}[] - accumulated_nodes = 0 - while current_ring <= length(part_node_count) && accumulated_nodes < total_nodes + while current_ring <= length(part_node_count) ring = rings[current_ring] push!(polygon_rings, (GI.LinearRing(ring; crs), interior_ring[current_ring])) - accumulated_nodes += part_node_count[current_ring] current_ring += 1 end @@ -241,45 +199,13 @@ function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, ds, # Create multipolygon from all polygons geoms[geom_idx] = GI.MultiPolygon(polygons; crs) end - return geoms - end +function _geometry_cf_decode(::GI.MultiLineStringTrait, ds, geometry_container_attribs; crs=nothing) + node_count = geometry[:node_count] + lines = _split_inner_geoms(geometry) - - -function _geometry_cf_decode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, ds, geometry_container_attribs; crs = nothing) - @assert haskey(ds, geometry_container_attribs["node_count"]) - node_count_var = ds[geometry_container_attribs["node_count"]] - - # Load and create all the data we need. - node_count = collect(node_count_var) - node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) - - # we can use a fast path for lines, if we know that there are no multipart lines. - if !haskey(geometry_container_attribs, "part_node_count") - node_count_stops = cumsum(node_count) - node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] - return GI.LineString.(getindex.((node_coordinates,), (:).(node_count_starts, node_count_stops)); crs) - end - - # otherwise, we need to decode the multipart lines. - part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) - - # Initialize variables for line assembly - start = 1 - stop = part_node_count[1] - lines = [node_coordinates[start:stop]] - - # Assemble all lines - for i in 2:length(part_node_count) - start = stop + 1 - stop = start + part_node_count[i] - 1 - push!(lines, node_coordinates[start:stop]) - end - - # Now assemble the multilinestrings - _ls = GI.LineString(node_coordinates[1:2]; crs) + _ls = GI.LineString(lines[1]; crs) _mls = GI.MultiLineString([_ls]; crs) geoms = Vector{typeof(_mls)}(undef, length(node_count)) @@ -287,16 +213,11 @@ function _geometry_cf_decode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait current_line = 1 for (geom_idx, total_nodes) in enumerate(node_count) # Find all lines that belong to this multilinestring - multilinestring_lines = typeof(_ls)[] - accumulated_nodes = 0 - - while current_line <= length(part_node_count) && accumulated_nodes < total_nodes - line = lines[current_line] - push!(multilinestring_lines, GI.LineString(line; crs)) - accumulated_nodes += part_node_count[current_line] + multilinestring_lines = Vector{typeof(_ls)}(undef, total_nodes) + for i in 1:total_nodes + multilinestring_lines[i] = GI.LineString(lines[current_line]; crs) current_line += 1 end - # Create multilinestring from all lines geoms[geom_idx] = GI.MultiLineString(multilinestring_lines; crs) end @@ -304,31 +225,56 @@ function _geometry_cf_decode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait return geoms end -function _geometry_cf_decode(::Union{GI.PointTrait, GI.MultiPointTrait}, ds, geometry_container_attribs; crs = nothing) - - node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) - # We can take a fast path for points, if we know that there are no multipoints - if haskey(geometry_container_attribs, "node_count") - @assert haskey(ds, geometry_container_attribs["node_count"]) - node_count_var = ds[geometry_container_attribs["node_count"]] - node_count = collect(node_count_var) - # The code below could be a fast path, but we don't want - # to arbitrarily change the output type of the decoder. - # MultiPoints should always roundtrip and write as multipoints. - # if !all(==(1), node_count) - # do nothing - # else - # return a fast path - # end - # we have multipoints - node_count_stops = cumsum(node_count) - node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] - return map(node_count_starts, node_count_stops) do start, stop - GI.MultiPoint(node_coordinates[start:stop]; crs) - end +function _geometry_cf_decode(::GI.PointTrait, ds, geometry; crs=nothing) + # Just wrap raw coordinates as Points + return GI.Point.(geometry[:node_coordinates]; crs) +end +function _geometry_cf_decode(::GI.LineStringTrait, ds, geometry_container_attribs; crs=nothing) + node_count, node_coordinates = geometry[:node_count], geometry[:node_coordinates] + # Split coordinates to separate LineStrings by node count ranges + return map(_node_ranges(node_count)) do range + GI.LineString(node_coordinates[range]; crs) + end +end +function _geometry_cf_decode(::GI.MultiPointTrait, ds, geometry; crs = nothing) + node_count, node_coordinates = geometry[:node_count], geometry[:node_coordinates] + # Split coordinates to separate MultiPoint by node count ranges + return map(_node_ranges(node_count)) do range + GI.MultiPoint(node_coordinates[range]; crs) + end +end +function _geometry_cf_decode(::GI.PolygonTrait, ds, geometry; crs=nothing) + node_count, node_coordinates = geometry[:node_count], geometry[:node_coordinates] + # Split coordinates to separate single-ring Polygons by node count ranges + return map(_node_ranges(node_count)) do range + GI.Polygon([GI.LinearRing(node_coordinates[range]; crs)]; crs) end +end - # finally, if we have no node count, or all node counts are 1, we just return the points - return GI.Point.(node_coordinates; crs) +function _node_ranges(node_count) + ranges = Vector{UnitRange{Int}}(undef, length(node_count)) + for i in eachindex(ranges) + ranges[i] = i == 1 ? (1:node_count[i]) : (node_count[i-1]:node_count[i]) + end + return ranges +end + +function _split_inner_geoms(geometry) + part_node_count = geometry[:part_node_count] + node_coordinates = geometry[:node_coordinates] + # Initialize variables for ring assembly + start = 1 + stop = part_node_count[1] + rings = [node_coordinates[start:stop]] + push!(rings[end], node_coordinates[start]) + # Assemble all rings + for i in 2:length(part_node_count) + start = stop + 1 + stop = start + part_node_count[i] - 1 + push!(rings, node_coordinates[start:stop]) + # Ensure rings are closed by adding the first point at the end + push!(rings[end], node_coordinates[start]) + end + return rings end \ No newline at end of file diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl index d5ebc7c20..47484e7ac 100644 --- a/src/geometry_lookup/nc_io.jl +++ b/src/geometry_lookup/nc_io.jl @@ -7,10 +7,31 @@ using Test import Rasters import GeoInterface as GI -#= + +ds = NCDataset("C:\\Users\\rafael.schouten\\Downloads\\i.nc") var = ds["someData"] +keys(ds) +v = CDM.variable(ds, :lon) +CDM.dimnames(ds) +CDM.dimnames(v) +CDM.attribnames(var) +CDM.attrib(var, "coordinates") +map(CDM.keys(ds)) do v + var = CDM.variable(ds, v) + @show v + v => CDM.dimnames(var) + # all(CDM.dimnames(var)) do d + # @show d + # d in CDM.dimnames(ds) + # end ? v => :Alligned : v => :Unalligned +end + +geom = CDM.variable(ds, :geometry_container) +geom.attrib +coords = split(geom.attrib["coordinates"], ' ') +x = _read_geometry(ds, :geometry_container) |> pairs -knowndims = Rasters._dims(var) +knowndims = Rasters._dims(var) unknowndims_idxs = findall(Rasters.isnolookup ∘ Rasters.lookup, knowndims) @@ -47,6 +68,10 @@ throw(ArgumentError("We only support polygon geometry types at this time, got $g geoms = Rasters._geometry_cf_decode(GI.PolygonTrait(), ds, geometry_container_attribs) encoded = Rasters._geometry_cf_encode(GI.PolygonTrait(), geoms) +node_count = collect(nggode_count_var) +node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) +part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) +interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) @test encoded.node_coordinates_x == ds["x"] @test encoded.node_coordinates_y == ds["y"] diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 07fb842ed..2b42f6915 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -109,54 +109,81 @@ haslayers(::CDMsource) = true defaultcrs(::CDMsource) = EPSG(4326) defaultmappedcrs(::CDMsource) = EPSG(4326) -function _nondimnames(ds) - dimnames = CDM.dimnames(ds) - toremove = if "bnds" in dimnames - dimnames = setdiff(dimnames, ("bnds",)) - boundsnames = String[] - for k in dimnames - var = ds[k] - attr = CDM.attribs(var) - if haskey(attr, "bounds") - push!(boundsnames, attr["bounds"]) - end - end - union(dimnames, boundsnames)::Vector{String} - else - collect(dimnames)::Vector{String} - end - # Maybe this should be fixed in ZarrDatasets but it works with this patch. - nondim = collect(setdiff(keys(ds), toremove)) - return nondim -end - -function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) - nondim = _nondimnames(ds) - grid_mapping = String[] - vars = map(k -> CDM.variable(ds, k), nondim) +# Get the names of all data layers, removing +# coordinate, bounds, and geometry variables etc +function _layer_names(ds) + names = keys(ds) + return _layer_names(names, CDM.attribs.(CDM.variable.(ds, names))) +end +function _layer_names(var_names, attribs) + bounds_names = String[] + geometry_names = String[] + grid_mapping_names = String[] + coordinate_names = String[] + node_coordinate_names = String[] + for n, attr in zip(var_names, attribs) + # Coordinate variables + haskey(attr, "coordinates") && setdiff(coordinate_names, _cdm_coordinates(attr)) + haskey(attr, "node_coordinates") && setdiff(node_coordinate_names, _cdm_node_coordinates(attr)) + # Bounds variables + haskey(attr, "bounds") && push!(bounds_names, attr["bounds"]) + # Geometry variables + haskey(attr, "geometry") && push!(geometry_names, attr["geometry"]) + # Grid mapping variables + haskey(attr, "grid_mapping") && push!(grid_mapping_names, attr["grid_mapping"]) + end + return setdiff(varnames, bounds_names, geometry_names, grid_mapping_names, coordinate_names, node_coordinate_names) +end + +_cdm_coordinates(attr) = collect(split(attr["coordinates"], ' ')) +_cdm_node_coordinates(attr) = collect(split(attr["node_coordinates"], ' ')) + +function _classify_dataset(ds::AbstractDataset, names::NoKW=nokw, group::NoKW=nokw) + dim_names = CDM.dimnames(ds) + var_names = keys(ds) + vars = map(k -> CDM.variable(ds, k), var_names) attrs = map(CDM.attribs, vars) - for attr in attrs - if haskey(attr, "grid_mapping") - push!(grid_mapping, attr["grid_mapping"]) + layer_names = _layer_names(var_names, vars, attrs) + + internal_dim_names = String[] + output_dim_names = setdiff(dim_names, internal_dim_names) + + bounds_dict = Dict{String,Array}() + dim_dict = Dict{String,Dimension}() + geometry_dict = Dict{String,Dimension}() + output_layers_vec = Vector{CDM.AbstractVariable}(undef, length(layer_names)) + output_attrs_vec = Vector{CDM.AbstractVariable}(undef, length(layer_names)) + layer_dimensions_vec = Vector{Vector{String}}(undef, length(layer_names)) + for (i, name) in enumerate(layer_names) + v = findfirst(isequal(name), var_names) + output_layers_vec[i] = var = vars[v] + output_attrs_vec[i] = attr = attrs[v] + coordinates = _cdm_coordinates(attr) + layer_dimensions_vec[i] = dimensions = CDM.dimnames(var) + geometry_key = if haskey(attr, "geometry") + attr["geometry"] + end + grid_mapping_key = if haskey(attr, "grid_mapping") + attr["grid_mapping"] + end + for dimname in dimensions + get(dimdict, dimname) do + _cdmdim(ds, dimname, geometry_dict, grid_mapping_dict, geometry_key, grid_mapping_key) + end end end - bitinds = map(!in(grid_mapping), nondim) - (; - names=nondim[bitinds], - vars=vars[bitinds], - attrs=attrs[bitinds], - ) + layerdims_vec = _layerdims(ds; layers, dimdict) + (; layers, dims, layerdims_vec, layermetadata_vec) end -_layers(ds::AbstractDataset, names, ::NoKW) = - _layers(ds, collect(names), nokw) -function _layers(ds::AbstractDataset, names::Vector, ::NoKW) +function _classify_dataset(ds::AbstractDataset, names::Vector, group::NoKW) vars = map(n -> CDM.variable(ds, n), names) attrs = map(CDM.attribs, vars) (; names, vars, attrs) end -function _layers(ds::AbstractDataset, names, group) - _layers(ds.group[group], names, nokw) -end +_classify_dataset(ds::AbstractDataset, names, ::NoKW) = + _classify_dataset(ds, collect(names), nokw) +_classify_dataset(ds::AbstractDataset, names, group) = + _classify_dataset(ds.group[group], names, nokw) function _dims(var::AbstractVariable{<:Any,N}, crs=nokw, mappedcrs=nokw) where N dimnames = CDM.dimnames(var) @@ -167,13 +194,6 @@ end _metadata(var::AbstractVariable; attr=CDM.attribs(var)) = _metadatadict(sourcetrait(var), attr) -function _dimdict(ds::AbstractDataset, crs=nokw, mappedcrs=nokw) - dimdict = Dict{String,Dimension}() - for dimname in CDM.dimnames(ds) - dimdict[dimname] = _cdmdim(ds, dimname, crs, mappedcrs) - end - return dimdict -end function _dims(ds::AbstractDataset, dimdict::Dict) map(CDM.dimnames(ds)) do dimname dimdict[dimname] @@ -210,8 +230,7 @@ end # TODO don't load all keys here with _layers _name_or_firstname(ds::AbstractDataset, name) = Symbol(name) -function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=nokw) - names = _nondimnames(ds) +function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=_layer_names(ds)) if length(names) > 0 return Symbol(first(names)) else @@ -219,7 +238,15 @@ function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=nokw) end end -function _cdmdim(ds, dimname::Key, crs=nokw, mappedcrs=nokw) +function _cdmdim(ds, dimname::Key, grid_mapping_dict, geometry_dict, grid_mapping_name, geom_name) + get(geometry_dict, geom_name) do + _cdm_geometry(ds, geom_name) + end + crs = if !isnothing(grid_mapping_name) + get(grid_mapping_dict, grid_mapping_name) do + _cdm_grid_mapping(ds, grid_mapping_name) + end + end if haskey(ds, dimname) var = ds[dimname] D = _cdmdimtype(CDM.attribs(var), dimname) @@ -269,7 +296,7 @@ function _cdmdimtype(attrib, dimname) end # _cdmlookup -# Generate a `Lookup` from a nCDM dim. +# Generate a `Lookup` from a CDM dim. function _cdmlookup(ds::AbstractDataset, dimname, D::Type, crs, mappedcrs) var = ds[dimname] index = Missings.disallowmissing(var[:]) @@ -285,14 +312,13 @@ end # We need to include `Missing` in unions in case `_FillValue` is used # on coordinate variables in a file and propagates here. function _cdmlookup( - ds::AbstractDataset, var, attr, dimname, - D::Type, index::AbstractArray{<:Union{Missing,Number,Dates.AbstractTime}}, + ds::AbstractDataset, var, attr, dimname, D::Type, + index::AbstractArray{<:Union{Missing,Number,Dates.AbstractTime}}, metadata, crs, mappedcrs ) # Assume the locus is at the center of the cell if boundaries aren't provided. # http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries order = LA.orderof(index) - var = ds[dimname] # Detect lat/lon span, sampling = if eltype(index) <: Union{Missing,Dates.AbstractTime} _cdmperiod(index, metadata) @@ -303,17 +329,18 @@ function _cdmlookup( # This is important for things like rasterizatin and conversion # to gdal to be easy, and selectors are faster. # TODO are there any possible floating point errors from this? - if haskey(CDM.attribs(var), "bounds") + if haskey(attr, "bounds") span, sampling = if isregular(span) span, Intervals(Center()) else - boundskey = var.attrib["bounds"] + boundskey = attr["bounds"] boundsmatrix = Array(ds[boundskey]) locus = if mapreduce(==, &, view(boundsmatrix, 1, :), index) Start() elseif mapreduce(==, &, view(boundsmatrix, 2, :), index) End() else + # TODO better checks here Center() end Explicit(boundsmatrix), Intervals(locus) @@ -607,3 +634,31 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension) CDM.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib=attrib) return nothing end + +function _classify_variables(ds::AbstractDataset) +end + + +function _read_geometry(ds, key) + geom = CDM.variable(ds, key) + attrib = CDM.attribs(geom) + # Node counts in variable with name in "node_count" attribute + geometry_type = attrib["geometry_type"] + node_count_var = CDM.variable(ds, attrib["node_count"]) + part_node_count_var = CDM.variable(ds, attrib["part_node_count"]) + interior_ring_var = CDM.variable(ds, attrib["interior_ring"]) + grid_mapping_var = CDM.variable(ds, attrib["grid_mapping"]) + node_coordinate_names = split(attrib["node_coordinates"], ' ') + node_coordinate_vars = map(c -> CDM.variable(ds, c), node_coordinate_names) + + node_count = collect(node_count_var) + part_node_count = collect(part_node_count_var) + dimension = CDM.dimnames(node_count_var)[1] + internal_dimensions = union(CDM.dimnames(part_node_count_var), CDM.dimnames(interior_ring_var), CDM.dimnames.(node_coordinate_vars)...) + interior_ring = Bool.(collect(interior_ring_var)) + node_coordinates = map(node_coordinate_names, node_coordinate_vars) do c, var + c => collect(var) + end + grid_mapping = grid_mapping_var.attrib + (; dimension, internal_dimensions, geometry_type, node_count, part_node_count, interior_ring, node_coordinates, grid_mapping) +end \ No newline at end of file diff --git a/src/stack.jl b/src/stack.jl index d738a90c5..716b1d722 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -471,13 +471,11 @@ function RasterStack(ds; ) check_multilayer_dataset(ds) scaled, missingval = _raw_check(raw, scaled, missingval, verbose) - layers = _layers(ds, name, group) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` - dimdict = _dimdict(ds, crs, mappedcrs) + layers, dimsdict, layerdims_vec, layermetata_vec = _classify_dataset(ds, name, group) + dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims_vec) refdims = isnokw(refdims) || isnothing(refdims) ? () : refdims metadata = isnokw(metadata) ? _metadata(ds) : metadata - layerdims_vec = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims - dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims_vec) layermetadata_vec = if isnokw(layermetadata) _layermetadata(ds; layers) else @@ -500,7 +498,7 @@ function RasterStack(ds; else map(layers.vars, layermetadata_vec, mod_vec) do var, md, mod modvar = _maybe_modify(var, mod) - checkmem && _checkobjmem(modvar) + yer checkmem && _checkobjmem(modvar) Array(modvar) end |> NT end From 6ac1b339886d5dab6095790a21b9fb9116d75059 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 3 May 2025 14:04:35 -0400 Subject: [PATCH 05/43] Fix multilinestring load --- src/geometry_lookup/io.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index 0a0accf5f..e8a925b70 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -153,7 +153,7 @@ end =# function _geometry_cf_decode(::GI.MultiPolygonTrait, ds, geometry; crs=nothing) - rings = _split_inner_geoms(geometry) + rings = _split_inner_geoms(geometry; autoclose = true) interior_ring = geometry[:interior_ring] # Now, we proceed to assemble the polygons and multipolygons from the rings. # TODO: no better way to get the tuple type, at least for now. @@ -203,7 +203,7 @@ function _geometry_cf_decode(::GI.MultiPolygonTrait, ds, geometry; crs=nothing) end function _geometry_cf_decode(::GI.MultiLineStringTrait, ds, geometry_container_attribs; crs=nothing) node_count = geometry[:node_count] - lines = _split_inner_geoms(geometry) + lines = _split_inner_geoms(geometry; autoclose = false) _ls = GI.LineString(lines[1]; crs) _mls = GI.MultiLineString([_ls]; crs) @@ -259,7 +259,7 @@ function _node_ranges(node_count) return ranges end -function _split_inner_geoms(geometry) +function _split_inner_geoms(geometry; autoclose = false) part_node_count = geometry[:part_node_count] node_coordinates = geometry[:node_coordinates] # Initialize variables for ring assembly @@ -274,7 +274,7 @@ function _split_inner_geoms(geometry) stop = start + part_node_count[i] - 1 push!(rings, node_coordinates[start:stop]) # Ensure rings are closed by adding the first point at the end - push!(rings[end], node_coordinates[start]) + autoclose && push!(rings[end], node_coordinates[start]) end return rings end \ No newline at end of file From 87944b0aee81f771bfe1771834ad57defc5f6dbe Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 3 May 2025 15:20:14 -0400 Subject: [PATCH 06/43] Fix everything for CDM geom lookup + add tests --- src/geometry_lookup/io.jl | 31 ++++--- src/geometry_lookup/nc_io.jl | 147 ++++++++++++++++++++++----------- src/sources/commondatamodel.jl | 109 ++++++++++++++++++++---- test/geometry_lookup.jl | 101 ++++++++++++++++++++++ 4 files changed, 312 insertions(+), 76 deletions(-) diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index e8a925b70..bad711637 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -154,23 +154,28 @@ end function _geometry_cf_decode(::GI.MultiPolygonTrait, ds, geometry; crs=nothing) rings = _split_inner_geoms(geometry; autoclose = true) + node_count = geometry[:node_count] + part_node_count = geometry[:part_node_count] interior_ring = geometry[:interior_ring] # Now, we proceed to assemble the polygons and multipolygons from the rings. # TODO: no better way to get the tuple type, at least for now. - _lr = GI.LinearRing([(0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0)]; crs) + _lr = GI.LinearRing(first(rings); crs) _p = GI.Polygon([_lr]; crs) _mp = GI.MultiPolygon([_p]; crs) geoms = Vector{typeof(_mp)}(undef, length(node_count)) + # Assemble multipolygons current_ring = 1 for (geom_idx, total_nodes) in enumerate(node_count) # Find all rings that belong to this polygon - polygon_rings = Tuple{typeof(_lr), Int8}[] + polygon_rings = Tuple{typeof(_lr), Int}[] - while current_ring <= length(part_node_count) + n_points_added = 0 + while current_ring <= length(rings) && n_points_added < total_nodes ring = rings[current_ring] push!(polygon_rings, (GI.LinearRing(ring; crs), interior_ring[current_ring])) current_ring += 1 + n_points_added += length(ring) end # Create polygons from rings @@ -195,13 +200,13 @@ function _geometry_cf_decode(::GI.MultiPolygonTrait, ds, geometry; crs=nothing) if !isnothing(current_exterior) push!(polygons, GI.Polygon([current_exterior, current_holes...]; crs)) end - + @infiltrate # Create multipolygon from all polygons geoms[geom_idx] = GI.MultiPolygon(polygons; crs) end return geoms end -function _geometry_cf_decode(::GI.MultiLineStringTrait, ds, geometry_container_attribs; crs=nothing) +function _geometry_cf_decode(::GI.MultiLineStringTrait, ds, geometry; crs=nothing) node_count = geometry[:node_count] lines = _split_inner_geoms(geometry; autoclose = false) @@ -213,10 +218,13 @@ function _geometry_cf_decode(::GI.MultiLineStringTrait, ds, geometry_container_a current_line = 1 for (geom_idx, total_nodes) in enumerate(node_count) # Find all lines that belong to this multilinestring - multilinestring_lines = Vector{typeof(_ls)}(undef, total_nodes) - for i in 1:total_nodes - multilinestring_lines[i] = GI.LineString(lines[current_line]; crs) + multilinestring_lines = typeof(_ls)[] + nodes_added = 0 + while nodes_added < total_nodes + line = lines[current_line] + push!(multilinestring_lines, GI.LineString(line; crs)) current_line += 1 + nodes_added += length(line) end # Create multilinestring from all lines geoms[geom_idx] = GI.MultiLineString(multilinestring_lines; crs) @@ -229,7 +237,7 @@ function _geometry_cf_decode(::GI.PointTrait, ds, geometry; crs=nothing) # Just wrap raw coordinates as Points return GI.Point.(geometry[:node_coordinates]; crs) end -function _geometry_cf_decode(::GI.LineStringTrait, ds, geometry_container_attribs; crs=nothing) +function _geometry_cf_decode(::GI.LineStringTrait, ds, geometry; crs=nothing) node_count, node_coordinates = geometry[:node_count], geometry[:node_coordinates] # Split coordinates to separate LineStrings by node count ranges return map(_node_ranges(node_count)) do range @@ -252,9 +260,10 @@ function _geometry_cf_decode(::GI.PolygonTrait, ds, geometry; crs=nothing) end function _node_ranges(node_count) + cum_node_count = cumsum(node_count) ranges = Vector{UnitRange{Int}}(undef, length(node_count)) for i in eachindex(ranges) - ranges[i] = i == 1 ? (1:node_count[i]) : (node_count[i-1]:node_count[i]) + ranges[i] = i == 1 ? (1:node_count[i]) : ((cum_node_count[i-1]+1):cum_node_count[i]) end return ranges end @@ -266,7 +275,7 @@ function _split_inner_geoms(geometry; autoclose = false) start = 1 stop = part_node_count[1] rings = [node_coordinates[start:stop]] - push!(rings[end], node_coordinates[start]) + autoclose && push!(rings[end], node_coordinates[start]) # Assemble all rings for i in 2:length(part_node_count) diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl index 47484e7ac..beb392fc6 100644 --- a/src/geometry_lookup/nc_io.jl +++ b/src/geometry_lookup/nc_io.jl @@ -49,54 +49,7 @@ has_geometry = haskey(CDM.attribs(var), "geometry") if !has_geometry throw(ArgumentError("Variable $u_dim_name does not have a geometry attribute")) end -=# -# generate dataset -output_path = tempname() * ".nc" -testfile = "multipolygons.ncgen" -run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) -ds = NCDataset(output_path) -geometry_container_varname = CDM.attribs(var)["geometry"] -geometry_container_var = ds[geometry_container_varname] - -geometry_container_attribs = CDM.attribs(geometry_container_var) - -haskey(geometry_container_attribs, "geometry_type") && -geometry_container_attribs["geometry_type"] == "polygon" || -throw(ArgumentError("We only support polygon geometry types at this time, got $geometry_type")) - -geoms = Rasters._geometry_cf_decode(GI.PolygonTrait(), ds, geometry_container_attribs) - -encoded = Rasters._geometry_cf_encode(GI.PolygonTrait(), geoms) -node_count = collect(nggode_count_var) -node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) -part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) -interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) - -@test encoded.node_coordinates_x == ds["x"] -@test encoded.node_coordinates_y == ds["y"] -@test encoded.node_count == ds["node_count"] -@test encoded.part_node_count == ds["part_node_count"] -@test encoded.interior_ring == ds["interior_ring"] - -# lines now - -output_path = tempname() * ".nc" -testfile = "lines.ncgen" -run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) -ds = NCDataset(output_path) -var = ds["someData"] -geometry_container_varname = CDM.attribs(var)["geometry"] -geometry_container_var = ds[geometry_container_varname] -geometry_container_attribs = CDM.attribs(geometry_container_var) -geoms = Rasters._geometry_cf_decode(GI.LineStringTrait(), ds, geometry_container_attribs) - -encoded = Rasters._geometry_cf_encode(GI.LineStringTrait(), geoms) - -@test encoded.node_coordinates_x == ds["x"] -@test encoded.node_coordinates_y == ds["y"] -@test encoded.node_count == ds["node_count"] -@test !hasproperty(encoded, :part_node_count) output_path = tempname() * ".nc" @@ -152,4 +105,102 @@ encoded = Rasters._geometry_cf_encode(GI.PointTrait(), geoms) @test encoded.node_coordinates_x == ds["x"] @test encoded.node_coordinates_y == ds["y"] -@test !hasproperty(encoded, :node_count) \ No newline at end of file +@test !hasproperty(encoded, :node_count) + + + + +function _compile_ncgen(f, testfilename; rasterspath = Base.pathof("Rasters")) + output_path = tempname() * ".nc" + run(`$(NCDatasets.NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(rasterspath)), "test", "data", testfilename))`) + ds = NCDatasets.NCDataset(output_path) + f(ds) + close(ds) + rm(output_path) + return +end + +function _compile_ncgen(testfilename; rasterspath = Base.pathof("Rasters")) + output_path = tempname() * ".nc" + run(`$(NCDatasets.NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(rasterspath)), "test", "data", testfilename))`) + ds = NCDatasets.NCDataset(output_path) + return ds +end + +import NCDatasets +import NCDatasets.CommonDataModel as CDM +import GeoInterface as GI, GeometryOps as GO +using GeoFormatTypes + +rasterspath = joinpath(dirname(dirname(dirname(@__FILE__))), "src", "Rasters.jl") + +_compile_ncgen("multipolygons.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test length(geoms) == 2 + @test GI.nring.(geoms) == [3, 1] + + encoded = _geometry_cf_encode(GI.PolygonTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test encoded.node_count == ds["node_count"] + @test encoded.part_node_count == ds["part_node_count"] + @test encoded.interior_ring == ds["interior_ring"] +end + +_compile_ncgen("lines.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test all(g -> GI.trait(g) isa GI.LineStringTrait, geoms) + @test length(geoms) == 2 + + encoded = _geometry_cf_encode(GI.LineStringTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test encoded.node_count == ds["node_count"] + @test !hasproperty(encoded, :part_node_count) +end + +_compile_ncgen("points.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test length(geoms) == 2 + + encoded = _geometry_cf_encode(GI.PointTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test !hasproperty(encoded, :node_count) +end + + +_compile_ncgen("multilines.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test all(g -> GI.trait(g) isa GI.MultiLineStringTrait, geoms) + @test length(geoms) == 2 + + encoded = _geometry_cf_encode(GI.MultiLineStringTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test encoded.node_count == ds["node_count"] + @test encoded.part_node_count == ds["part_node_count"] + @test length(encoded) == 4 +end + + +_compile_ncgen("multipoints.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test all(g -> GI.trait(g) isa GI.MultiPointTrait, geoms) + @test length(geoms) == 2 + + encoded = _geometry_cf_encode(GI.MultiPointTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test encoded.node_count == ds["node_count"] +end diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 2b42f6915..8e1fafee7 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -640,25 +640,100 @@ end function _read_geometry(ds, key) - geom = CDM.variable(ds, key) - attrib = CDM.attribs(geom) - # Node counts in variable with name in "node_count" attribute + var = CDM.variable(ds, key) + var_attrib = CDM.attribs(var) + geom_key = var_attrib["geometry"] + geom_var = CDM.variable(ds, geom_key) + attrib = CDM.attribs(geom_var) geometry_type = attrib["geometry_type"] - node_count_var = CDM.variable(ds, attrib["node_count"]) - part_node_count_var = CDM.variable(ds, attrib["part_node_count"]) - interior_ring_var = CDM.variable(ds, attrib["interior_ring"]) - grid_mapping_var = CDM.variable(ds, attrib["grid_mapping"]) + node_coordinate_names = split(attrib["node_coordinates"], ' ') node_coordinate_vars = map(c -> CDM.variable(ds, c), node_coordinate_names) - - node_count = collect(node_count_var) - part_node_count = collect(part_node_count_var) - dimension = CDM.dimnames(node_count_var)[1] - internal_dimensions = union(CDM.dimnames(part_node_count_var), CDM.dimnames(interior_ring_var), CDM.dimnames.(node_coordinate_vars)...) - interior_ring = Bool.(collect(interior_ring_var)) - node_coordinates = map(node_coordinate_names, node_coordinate_vars) do c, var - c => collect(var) + # Map the node coordinates to a vector of n-tuples. + node_coordinates = map(tuple, node_coordinate_vars...) + # There are three types of geometries: + # - point (can be point or multipoint depending on presence of node_count) + # - line (can be linestring or multilinestring depending on presence of part_node_count) + # - polygon (can be poly or multipoly depending on presence of part_node_count, multipoly also needs interior_ring) + # This is the meat of the function, that parses the geometry and returns a named tuple + trait, geom_parsing_args = if geometry_type == "point" + if haskey(attrib, "node_count") # actually multipoint + (GI.MultiPointTrait(), (; node_coordinates, node_count = CDM.variable(ds, attrib["node_count"]))) + else # regular points + (GI.PointTrait(), (; node_coordinates)) + end + elseif geometry_type == "line" + if haskey(attrib, "part_node_count") # actually multilinestring + (GI.MultiLineStringTrait(), (; + node_coordinates, + node_count = CDM.variable(ds, attrib["node_count"]), + part_node_count = CDM.variable(ds, attrib["part_node_count"]) + )) + else # regular linestring + (GI.LineStringTrait(), (; + node_coordinates, + node_count = CDM.variable(ds, attrib["node_count"]) + )) + end + elseif geometry_type == "polygon" + if haskey(attrib, "part_node_count") # actually multipoly + if haskey(attrib, "interior_ring") # this will usually be the case + println("hello") + (GI.MultiPolygonTrait(), (; + node_coordinates, + node_count = CDM.variable(ds, attrib["node_count"]), + part_node_count = CDM.variable(ds, attrib["part_node_count"]), + interior_ring = CDM.variable(ds, attrib["interior_ring"]) + )) + else # no interior ring, but part node count is present + # so, we need to assert that part_node_count has the same length as node_count + # in which case, interior_ring is always false by definition. + node_count = CDM.variable(ds, attrib["node_count"]) + part_node_count = CDM.variable(ds, attrib["part_node_count"]) + + if length(node_count) != length(part_node_count) + throw(ArgumentError(""" + When parsing geometry lookup for CDM variable $key, + the length of `node_count` ($(length(node_count))) and + `part_node_count` ($(length(part_node_count))) must be the same. + """)) + end + (GI.PolygonTrait(), (; node_coordinates, node_count)) + end + else # regular polygon + (GI.PolygonTrait(), (; node_coordinates, node_count = CDM.variable(ds, attrib["node_count"]))) + end + else + throw(ArgumentError(""" + When parsing geometry lookup for CDM variable $key, + the geometry type is $geometry_type, + but we could not figure out what that is! + Allowed geometry types are: `point`, `line`, `polygon`. + + If you need something else, please open a Github issue! + """)) + end + + crs = if haskey(attrib, "grid_mapping") + grid_mapping_var = CDM.variable(ds, attrib["grid_mapping"]) + grid_mapping_attrib = CDM.attribs(grid_mapping_var) + if haskey(grid_mapping_attrib, "crs_wkt") + WellKnownText2(GeoFormatTypes.CRS(), grid_mapping_attrib["crs_wkt"]) + elseif haskey(grid_mapping_attrib, "spatial_epsg") + EPSG(parse(Int, grid_mapping_attrib["spatial_epsg"])) + elseif haskey(grid_mapping_attrib, "proj4string") + ProjString(grid_mapping_attrib["proj4string"]) + else + nothing # unparseable - TODO get CFProjections.jl to do it. + end + else + nothing end - grid_mapping = grid_mapping_var.attrib - (; dimension, internal_dimensions, geometry_type, node_count, part_node_count, interior_ring, node_coordinates, grid_mapping) + + # return the geometry directly + # TODO: parse coordinate names, to get dimensions + # depends on what utils / luts we have elsewhere. + return _geometry_cf_decode(trait, ds, geom_parsing_args; crs) + + # (; dimension, internal_dimensions, geometry_type, node_count, part_node_count, interior_ring, node_coordinates, grid_mapping) end \ No newline at end of file diff --git a/test/geometry_lookup.jl b/test/geometry_lookup.jl index 9b4f33314..2122e422a 100644 --- a/test/geometry_lookup.jl +++ b/test/geometry_lookup.jl @@ -7,6 +7,8 @@ import Proj import GeometryOps as GO, GeoInterface as GI using Extents +import NCDatasets + @testset "construction" begin # fetch land polygons from Natural Earth country_polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry @@ -73,4 +75,103 @@ ras2d = Raster( @test_broken only(DimensionalData.dims2indices(results_regular, Geometry(Touches(usa_extent)))) == findfirst(==("USA"), country_polygons.ADM0_A3) end end +end + + +function _compile_ncgen(f, testfilename; rasterspath = Base.pathof("Rasters")) + output_path = tempname() * ".nc" + run(`$(NCDatasets.NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(rasterspath)), "test", "data", testfilename))`) + ds = NCDatasets.NCDataset(output_path) + f(ds) + close(ds) + rm(output_path) + return +end + +function _compile_ncgen(testfilename; rasterspath = Base.pathof("Rasters")) + output_path = tempname() * ".nc" + run(`$(NCDatasets.NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(rasterspath)), "test", "data", testfilename))`) + ds = NCDatasets.NCDataset(output_path) + return ds +end + +@testset "Geometry lookup IO internals" begin + @testset "Multipolygons" begin + _compile_ncgen("multipolygons.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test length(geoms) == 2 + @test GI.nring.(geoms) == [3, 1] + + encoded = _geometry_cf_encode(GI.PolygonTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test encoded.node_count == ds["node_count"] + @test encoded.part_node_count == ds["part_node_count"] + @test encoded.interior_ring == ds["interior_ring"] + end + end + + @testset "Lines" begin + _compile_ncgen("lines.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test all(g -> GI.trait(g) isa GI.LineStringTrait, geoms) + @test length(geoms) == 2 + + encoded = _geometry_cf_encode(GI.LineStringTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test encoded.node_count == ds["node_count"] + @test !hasproperty(encoded, :part_node_count) + end + end + + @testset "Points" begin + _compile_ncgen("points.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test length(geoms) == 2 + + encoded = _geometry_cf_encode(GI.PointTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test !hasproperty(encoded, :node_count) + end + end + + @testset "MultiLines" begin + _compile_ncgen("multilines.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test all(g -> GI.trait(g) isa GI.MultiLineStringTrait, geoms) + @test length(geoms) == 2 + + encoded = _geometry_cf_encode(GI.MultiLineStringTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test encoded.node_count == ds["node_count"] + @test encoded.part_node_count == ds["part_node_count"] + @test length(encoded) == 4 + end + end + + @testset "MultiPoints" begin + _compile_ncgen("multipoints.ncgen"; rasterspath) do ds + geoms = _read_geometry(ds, "someData") + + @test all(g -> GI.trait(g) isa GI.MultiPointTrait, geoms) + @test length(geoms) == 2 + + encoded = _geometry_cf_encode(GI.MultiPointTrait(), geoms) + + @test encoded.node_coordinates_x == ds["x"] + @test encoded.node_coordinates_y == ds["y"] + @test encoded.node_count == ds["node_count"] + end + end end \ No newline at end of file From 57b0a6430fbcb074e93d36e109d8437d6c0879c9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 4 May 2025 03:12:46 +0200 Subject: [PATCH 07/43] more cf read / write --- src/geometry_lookup/io.jl | 27 ++- src/sources/commondatamodel.jl | 413 +++++++++++++++++++-------------- 2 files changed, 257 insertions(+), 183 deletions(-) diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index bad711637..891940ebe 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -6,8 +6,9 @@ Encode functions will always return a named tuple with the standard function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) if all(x -> GI.trait(x) isa GI.PointTrait, geoms) return (; - node_coordinates_x = GI.x.(geoms), - node_coordinates_y = GI.y.(geoms), + geometry_type = "point", + x = GI.x.(geoms), + y = GI.y.(geoms), ) end # else: we have some multipolygons in here @@ -27,8 +28,9 @@ function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) foreach(identity, flattener) return (; - node_coordinates_x = flat_xs, - node_coordinates_y = flat_ys, + geometry_type = "line", + x = flat_xs, + y = flat_ys, node_count = GI.npoint.(geoms) ) end @@ -50,27 +52,29 @@ function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait # iterate over flattener to populate the arrays, # without allocating an extra array. foreach(identity, flattener) + attribs = Dict{String,Any}("geometry_type" => "linestring") # If all geoms are linestrings, we can take a fast path. if all(x -> GI.trait(x) isa GI.LineStringTrait, geoms) return (; - node_coordinates_x = flat_xs, - node_coordinates_y = flat_ys, + geometry_type = "line", + x = flat_xs, + y = flat_ys, node_count = GI.npoint.(geoms) ) end # Otherwise, we need to encode part_node_count for multilinestrings. return (; - node_coordinates_x = flat_xs, - node_coordinates_y = flat_ys, + geometry_type = "line", + x = flat_xs, + y = flat_ys, part_node_count = collect(GO.flatten(GI.npoint, GI.LineStringTrait, geoms)), node_count = GI.npoint.(geoms) ) end function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) - ngeoms = length(geoms) nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0, threaded = false) n_points_per_geom_vec = GI.npoint.(geoms) @@ -130,8 +134,9 @@ function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geo # node_coordinates_x and node_coordinates_y are the coordinates of the nodes of the rings. # but cf encodes them as a space separated string. That's the only difference. return (; - node_coordinates_x = xs, - node_coordinates_y = ys, + geometry_type = "polygon", + x = xs, + y = ys, node_count = node_count_vec, part_node_count = part_node_count_vec, interior_ring = interior_ring_vec diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 8e1fafee7..fb13b2012 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -2,8 +2,28 @@ const CDM = CommonDataModel const CDMallowedType = Union{Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float32,Float64,Char,String} +const CF_DEGREES_NORTH = ("degrees_north", "degree_north", "degree_N", "degrees_N", "degreeN", "degreesN") +const CF_DEGREES_EAST = ("degrees_east", "degree_east", "degree_E", "degrees_E", "degreeE", "degreesE") + const UNNAMED_FILE_KEY = "unnamed" +const CDM_AXIS_MAP = Dict( + "X" => X, + "Y" => Y, + "Z" => Z, + "T" => Ti, +) + +const CDM_STANDARD_NAME_MAP = Dict( + "longitude" => X, + "latitude" => Y, + "height" => Z, + "depth" => Z, + "time" => Ti, +) + +# These are mostly a hack for when +# standard_name or axis are missing const CDM_DIM_MAP = Dict( "lat" => Y, "latitude" => Y, @@ -21,20 +41,6 @@ const CDM_DIM_MAP = Dict( "band" => Band, ) -const CDM_AXIS_MAP = Dict( - "X" => X, - "Y" => Y, - "Z" => Z, - "T" => Ti, -) - -const CDM_STANDARD_NAME_MAP = Dict( - "longitude" => X, - "latitude" => Y, - "depth" => Z, - "time" => Ti, -) - # `Source`` from variables and datasets sourcetrait(var::CDM.CFVariable) = sourcetrait(var.var) # Dataset constructor from `Source` @@ -109,19 +115,67 @@ haslayers(::CDMsource) = true defaultcrs(::CDMsource) = EPSG(4326) defaultmappedcrs(::CDMsource) = EPSG(4326) +function _dims(var::AbstractVariable{<:Any,N}, crs=nokw, mappedcrs=nokw) where N + dimnames = CDM.dimnames(var) + ntuple(Val(N)) do i + _cdm_dim(CDM.dataset(var), dimnames[i], crs, mappedcrs) + end +end +_metadata(var::AbstractVariable; attr=CDM.attribs(var)) = + _metadatadict(sourcetrait(var), attr) + +function _dims(ds::AbstractDataset, dimdict::Dict) + map(CDM.dimnames(ds)) do dimname + dimdict[dimname] + end |> Tuple +end +_metadata(ds::AbstractDataset; attr=CDM.attribs(ds)) = + _metadatadict(sourcetrait(ds), attr) +function _layerdims(ds::AbstractDataset; layers, dimdict) + map(layers.vars) do var + map(CDM.dimnames(var)) do dimname + basedims(dimdict[dimname]) + end |> Tuple + end +end +function _layermetadata(ds::AbstractDataset; layers) + map(layers.attrs) do attr + md = _metadatadict(sourcetrait(ds), attr) + if haskey(attr, "grid_mapping") + if haskey(ds, attr["grid_mapping"]) + md["grid_mapping"] = Dict(CDM.attribs(ds[attr["grid_mapping"]])) + else + global_attrs = CDM.attribs(ds) + if haskey(global_attrs, attr["grid_mapping"]) + md["grid_mapping"] = global_attrs["grid_mapping"] + end + end + end + md + end +end + + +# Utils ######################################################################## + # Get the names of all data layers, removing # coordinate, bounds, and geometry variables etc function _layer_names(ds) names = keys(ds) - return _layer_names(names, CDM.attribs.(CDM.variable.(ds, names))) + return _layer_names(ds, names, Dict{String,Any}(), Dict{String,Any}()) end -function _layer_names(var_names, attribs) +function _layer_names(ds, var_names, vars_dict, attrs_dict) bounds_names = String[] geometry_names = String[] grid_mapping_names = String[] coordinate_names = String[] node_coordinate_names = String[] - for n, attr in zip(var_names, attribs) + # Sometimes coordinates are not included in var attributes + # So we check standard names to know vars are lookups and not data + standard_names = String[] + for n in zip(var_names) + get!(() -> CDM.variable(ds, n), vars_dict, n) + get!(() -> CDM.attribs(vars_dict[n]), attrs_dict, n) # Coordinate variables haskey(attr, "coordinates") && setdiff(coordinate_names, _cdm_coordinates(attr)) haskey(attr, "node_coordinates") && setdiff(node_coordinate_names, _cdm_node_coordinates(attr)) @@ -131,103 +185,115 @@ function _layer_names(var_names, attribs) haskey(attr, "geometry") && push!(geometry_names, attr["geometry"]) # Grid mapping variables haskey(attr, "grid_mapping") && push!(grid_mapping_names, attr["grid_mapping"]) + # Common Standard names - very likely coordinates + haskey(attr, "standard_name") && attr["standard_name"] in keys(CDM_STANDARD_NAME_MAP) && + push!(standard_names, n) end - return setdiff(varnames, bounds_names, geometry_names, grid_mapping_names, coordinate_names, node_coordinate_names) + dim_names = CDM.dimnames(ds) + # if all(n -> n in var_names, dim_names) + # end + return setdiff(varnames, bounds_names, geometry_names, grid_mapping_names, coordinate_names, node_coordinate_names, standard_names) end _cdm_coordinates(attr) = collect(split(attr["coordinates"], ' ')) _cdm_node_coordinates(attr) = collect(split(attr["node_coordinates"], ' ')) -function _classify_dataset(ds::AbstractDataset, names::NoKW=nokw, group::NoKW=nokw) - dim_names = CDM.dimnames(ds) +function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) var_names = keys(ds) - vars = map(k -> CDM.variable(ds, k), var_names) - attrs = map(CDM.attribs, vars) - layer_names = _layer_names(var_names, vars, attrs) + vars_dict = Dict{String,Any}() + attrs_dict = Dict{String,Any}() + layer_names = isnokw(names) ? _layer_names(ds, var_names, vars_dict, attrs_dict) : names - internal_dim_names = String[] - output_dim_names = setdiff(dim_names, internal_dim_names) - - bounds_dict = Dict{String,Array}() dim_dict = Dict{String,Dimension}() - geometry_dict = Dict{String,Dimension}() + geometry_dict = Dict{String,Any}() + grid_mapping_dict = Dict{String,Any}() output_layers_vec = Vector{CDM.AbstractVariable}(undef, length(layer_names)) - output_attrs_vec = Vector{CDM.AbstractVariable}(undef, length(layer_names)) - layer_dimensions_vec = Vector{Vector{String}}(undef, length(layer_names)) - for (i, name) in enumerate(layer_names) - v = findfirst(isequal(name), var_names) - output_layers_vec[i] = var = vars[v] - output_attrs_vec[i] = attr = attrs[v] - coordinates = _cdm_coordinates(attr) - layer_dimensions_vec[i] = dimensions = CDM.dimnames(var) + output_attrs_vec = Vector{CDM.Attributes}(undef, length(layer_names)) + + # Loop over the layers we want to load as rasters + # As we go, we add dimensions to dim_dict to be used in other layers + for (i, var_name) in enumerate(layer_names) + var = get(() -> CDM.variable(ds, var_name), vars_dict, var_name) + attr = get(() -> CDM.attribs(var), attrs_dict, var_name) + coord_names = _cdm_coordinates(attr) + layer_dimnames_vec[i] = dimnames = CDM.dimnames(var) + attr = attrs_dict[var_name] geometry_key = if haskey(attr, "geometry") attr["geometry"] end grid_mapping_key = if haskey(attr, "grid_mapping") attr["grid_mapping"] end - for dimname in dimensions - get(dimdict, dimname) do - _cdmdim(ds, dimname, geometry_dict, grid_mapping_dict, geometry_key, grid_mapping_key) - end + # Loop over the dimensions of this layer, adding missing dims to dim_dict + for dimname in dimnames + haskey(dim_dict, dimname) && continue + dim_dict[dimname] = _cdm_dim( + ds, vars_dict, attrs_dict, geometry_dict, grid_mapping_dict, geometry_key, grid_mapping_key, coord_names, dimname + ) end + output_layers_vec[i] = var + output_attrs_vec[i] = attr end - layerdims_vec = _layerdims(ds; layers, dimdict) + # TODO output (; layers, dims, layerdims_vec, layermetadata_vec) end -function _classify_dataset(ds::AbstractDataset, names::Vector, group::NoKW) - vars = map(n -> CDM.variable(ds, n), names) - attrs = map(CDM.attribs, vars) - (; names, vars, attrs) -end -_classify_dataset(ds::AbstractDataset, names, ::NoKW) = - _classify_dataset(ds, collect(names), nokw) -_classify_dataset(ds::AbstractDataset, names, group) = - _classify_dataset(ds.group[group], names, nokw) +_organise_dataset(ds::AbstractDataset, names, group) = + _organise_dataset(ds.group[group], names, nokw) -function _dims(var::AbstractVariable{<:Any,N}, crs=nokw, mappedcrs=nokw) where N - dimnames = CDM.dimnames(var) - ntuple(Val(N)) do i - _cdmdim(CDM.dataset(var), dimnames[i], crs, mappedcrs) - end -end -_metadata(var::AbstractVariable; attr=CDM.attribs(var)) = - _metadatadict(sourcetrait(var), attr) - -function _dims(ds::AbstractDataset, dimdict::Dict) - map(CDM.dimnames(ds)) do dimname - dimdict[dimname] - end |> Tuple -end -_metadata(ds::AbstractDataset; attr=CDM.attribs(ds)) = - _metadatadict(sourcetrait(ds), attr) -function _layerdims(ds::AbstractDataset; layers, dimdict) - map(layers.vars) do var - map(CDM.dimnames(var)) do dimname - basedims(dimdict[dimname]) - end |> Tuple +function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, grid_mapping_dict, geometry_key, grid_mapping_key, coord_names, dimname) + crs = if !isnothing(grid_mapping_key) + get(grid_mapping_dict, grid_mapping_key) do + _cdm_grid_mapping(ds, grid_mapping_key) + end end -end -function _layermetadata(ds::AbstractDataset; layers) - map(layers.attrs) do attr - md = _metadatadict(sourcetrait(ds), attr) - if haskey(attr, "grid_mapping") - if haskey(ds, attr["grid_mapping"]) - md["grid_mapping"] = Dict(CDM.attribs(ds[attr["grid_mapping"]])) - else - global_attrs = CDM.attribs(ds) - if haskey(global_attrs, attr["grid_mapping"]) - md["grid_mapping"] = global_attrs["grid_mapping"] - end - end + if haskey(ds, dimname) + # The dimension has a matching variable + dim_attrs = attrs_dict[dimname] + D = _cdm_dimtype(dim_attrs, dimname) + lookup = _cdm_lookup(ds, dimname, D, dim_attrs) + D(lookup) + else + if !isnothing(geometry_key) + geom = get(geometry_dict, geometry_key) do + _cdm_geometry(ds, geometry_key) + end + # Check that geometry node coordinates are for this dimension + is_geometry_dimension = all(_cdm_node_coordinates(geom)) do nc + dimname in CDM.dimnames(vars_dict[nc]) + end + if is_geometry_dimension + D = _cdm_dimtype(NoMetadata(), d) + lookup = _cdm_geometry_lookup(vars_dicts, attrs_dict, geom, dimname) + return D(lookup) + end + end + # No matching variable. Get all the coordinates for this dimension + dimension_coord_names = filter(coordnames) do coordname + dimnames = CDM.dimnames(vars_dict[coordname]) + length(dimnames) == 1 && only(dimnames) == dimname + end + # If there are coordinates for this dimension + if !isempty(dimension_coord_names) + # TODO rotations + dims = map(dimension_coord_names) do d + dim_attrs = attrs_dict[d] + D = _cdm_dimtype(dim_attrs, d) + lookup = _cdm_lookup(ds, d, D, dim_attrs) + D(lookup) + end + D = _cdm_dimtype(NoMetadata(), dimname) + # Use a MergedLookup with all dimensions + mergedims(dims, dims => D) + else + # The var doesn't exist. Maybe its `complex` or some other marker, + # so make it a custom `Dim` with `NoLookup` + lookup = NoLookup(Base.OneTo(len)) + D = _cdm_dimtype(NoMetadata(), dimname) + D(lookup) end - md end end - -# Utils ######################################################################## - # TODO don't load all keys here with _layers _name_or_firstname(ds::AbstractDataset, name) = Symbol(name) function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=_layer_names(ds)) @@ -238,51 +304,18 @@ function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=_laye end end -function _cdmdim(ds, dimname::Key, grid_mapping_dict, geometry_dict, grid_mapping_name, geom_name) - get(geometry_dict, geom_name) do - _cdm_geometry(ds, geom_name) - end - crs = if !isnothing(grid_mapping_name) - get(grid_mapping_dict, grid_mapping_name) do - _cdm_grid_mapping(ds, grid_mapping_name) - end - end - if haskey(ds, dimname) - var = ds[dimname] - D = _cdmdimtype(CDM.attribs(var), dimname) - lookup = _cdmlookup(ds, dimname, D, crs, mappedcrs) - return D(lookup) - else - # The var doesn't exist. Maybe its `complex` or some other marker, - # so make it a custom `Dim` with `NoLookup` - len = _cdmfinddimlen(ds, dimname) - len === nothing && _unuseddimerror(dimname) - lookup = NoLookup(Base.OneTo(len)) - D = _cdmdimtype(NoMetadata(), dimname) - return D(lookup) - end -end - -function _cdmfinddimlen(ds, dimname) - for name in keys(ds) - var = ds[name] - dimnames = CDM.dimnames(var) - if dimname in dimnames - return size(var)[findfirst(==(dimname), dimnames)] - end - end - return nothing -end - # Find the matching dimension constructor. If its an unknown name # use the generic Dim with the dim name as type parameter -function _cdmdimtype(attrib, dimname) +function _cdm_dimtype(attrib, dimname) + # First use the axis - X/Y/Z/T as the + # closest match to DimensionalData standard dimension names if haskey(attrib, "axis") k = attrib["axis"] if haskey(CDM_AXIS_MAP, k) return CDM_AXIS_MAP[k] end end + # Then use standard_name - latitude/longitude/height/time etc if haskey(attrib, "standard_name") k = attrib["standard_name"] if haskey(CDM_STANDARD_NAME_MAP, k) @@ -295,35 +328,33 @@ function _cdmdimtype(attrib, dimname) return DD.basetypeof(DD.name2dim(Symbol(dimname))) end -# _cdmlookup +# _cdm_lookup # Generate a `Lookup` from a CDM dim. -function _cdmlookup(ds::AbstractDataset, dimname, D::Type, crs, mappedcrs) +function _cdm_lookup(ds::AbstractDataset, attr, dimname, D::Type, attr) var = ds[dimname] - index = Missings.disallowmissing(var[:]) - attr = CDM.attribs(var) + data = Missings.disallowmissing(var[:]) metadata = _metadatadict(sourcetrait(ds), attr) - return _cdmlookup(ds, var, attr, dimname, D, index, metadata, crs, mappedcrs) + return _cdm_lookup(data, ds, var, attr, dimname, D, metadata) end # For unknown types we just make a Categorical lookup -function _cdmlookup(ds::AbstractDataset, var, attr, dimname, D::Type, index::AbstractArray, metadata, crs, mappedcrs) - Categorical(index; order=Unordered(), metadata=metadata) +function _cdm_lookup(data::AbstractArray, ds::AbstractDataset, var, attr, dimname, D::Type, metadata) + Categorical(data; order=Unordered(), metadata=metadata) end # For Number and AbstractTime we generate order/span/sampling # We need to include `Missing` in unions in case `_FillValue` is used # on coordinate variables in a file and propagates here. -function _cdmlookup( - ds::AbstractDataset, var, attr, dimname, D::Type, - index::AbstractArray{<:Union{Missing,Number,Dates.AbstractTime}}, - metadata, crs, mappedcrs +function _cdm_lookup( + data::AbstractArray{<:Union{Missing,Number,Dates.AbstractTime}}, + ds::AbstractDataset, var, attr, dimname, D::Type, metadata, ) # Assume the locus is at the center of the cell if boundaries aren't provided. # http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries order = LA.orderof(index) # Detect lat/lon span, sampling = if eltype(index) <: Union{Missing,Dates.AbstractTime} - _cdmperiod(index, metadata) + _cdm_period(index, metadata) else - _cdmspan(index, order) + _cdm_span(index, order) end # We only use Explicit if the span is not Regular # This is important for things like rasterizatin and conversion @@ -346,41 +377,35 @@ function _cdmlookup( Explicit(boundsmatrix), Intervals(locus) end end - - # We cant yet check CF standards crs, but we can at least check for units in lat/lon - if isnokw(mappedcrs) && get(metadata, "units", "") in ("degrees_north", "degrees_east") - mappedcrs = EPSG(4326) - end - # Additionally, crs and mappedcrs should be identical for Regular lookups - if isnokw(crs) && span isa Regular - crs = mappedcrs - end - return _cdmlookup(D, index, order, span, sampling, metadata, crs, mappedcrs) + return _cdm_lookup(data, D, order, span, sampling, metadata) end # For X and Y use a Mapped <: AbstractSampled lookup -function _cdmlookup( - D::Type{<:Union{<:XDim,<:YDim}}, index, order::Order, span, sampling, metadata, crs, mappedcrs +function _cdm_lookup( + data, D::Type{<:Union{<:XDim,<:YDim}}, order::Order, span, sampling, metadata ) - # If the index is regularly spaced and there is no crs - # then there is probably just one crs - the mappedcrs - crs = if isnokw(crs) && span isa Regular + # If units are degrees north/east, use EPSG:4326 as the mapped crs + units = get(metadata, "units", "") + mappedcrs = if isnokw(mappedcrs) && (units in CF_DEGREES_NORTH || units in CF_DEGREES_EAST) + EPSG(4326) + end + # Additionally, crs and mappedcrs should be identical for Regular lookups + crs = if span isa Regular mappedcrs - else - crs end dim = DD.basetypeof(D)() - return Mapped(index; order, span, sampling, metadata, crs, mappedcrs, dim) + return Mapped(data; order, span, sampling, metadata, crs, mappedcrs, dim) end # Band dims have a Categorical lookup, with order -function _cdmlookup(D::Type{<:Band}, index, order::Order, span, sampling, metadata, crs, mappedcrs) - Categorical(index, order, metadata) +# This is not CF, just for consistency with GDAL +function _cdm_lookup(data, D::Type{<:Band}, order::Order, span, sampling, metadata) + Categorical(data, order, metadata) end -# Otherwise use a regular Sampled lookup -function _cdmlookup(D::Type, index, order::Order, span, sampling, metadata, crs, mappedcrs) - Sampled(index, order, span, sampling, metadata) +# Otherwise use a Sampled lookup +function _cdm_lookup(data, D::Type, order::Order, span, sampling, metadata) + Sampled(data, order, span, sampling, metadata) end -function _cdmspan(index, order) +function _cdm_span(index, order) # Handle a length 1 index length(index) == 1 && return Regular(zero(eltype(index))), Points() @@ -414,7 +439,7 @@ function _cdmspan(index, order) end # delta_t and ave_period are not CF standards, but CDC -function _cdmperiod(index, metadata::Metadata{<:CDMsource}) +function _cdm_period(index, metadata::Metadata{<:CDMsource}) if haskey(metadata, "delta_t") period = _parse_period(metadata["delta_t"]) period isa Nothing || return Regular(period), Points() @@ -472,9 +497,9 @@ _cdm_set_axis_attrib!(atr, dim::Z) = (atr["axis"] = "Z"; atr["standard_name"] = _cdm_set_axis_attrib!(atr, dim::Ti) = (atr["axis"] = "T"; atr["standard_name"] = "time") _cdm_set_axis_attrib!(atr, dim) = nothing -_cdmshiftlocus(dim::Dimension) = _cdmshiftlocus(lookup(dim), dim) -_cdmshiftlocus(::Lookup, dim::Dimension) = dim -function _cdmshiftlocus(lookup::AbstractSampled, dim::Dimension) +_cdm_shiftlocus(dim::Dimension) = _cdmshiftlocus(lookup(dim), dim) +_cdm_shiftlocus(::Lookup, dim::Dimension) = dim +function _cdm_shiftlocus(lookup::AbstractSampled, dim::Dimension) # TODO improve this if span(lookup) isa Regular && sampling(lookup) isa Intervals # We cant easily shift a DateTime value @@ -590,7 +615,7 @@ function writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T,N var = CDM.defVar(ds, key, eltype, dimnames; attrib, chunksizes, kw...) if write - m = _maybe_modify(var.var, mod) + m = _maybe_modify(parent(var), mod) # Write with a DiskArays.jl broadcast m .= A # Apply `f` while the variable is open @@ -614,10 +639,54 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension) dimname = lowercase(string(DD.name(dim))) haskey(ds.dim, dimname) && return nothing CDM.defDim(ds, dimname, length(dim)) - lookup(dim) isa NoLookup && return nothing - - # Shift index before conversion to Mapped - dim = _cdmshiftlocus(dim) + _def_lookup_var!(ds, dim, dimname) +end +_def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:AbstractNoLookup}, dimname) = nothing +function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:ArrayLookup}, dimname) + # TODO what do we call the geometry variable, what if more than 1 + return nothing +end +function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:GeometryLookup}, dimname) + geom = _geometry_cf_encode(parent(lookup(dim))) + # Define base geometry attributes + attrib = Dict{String,Any}( + "geometry_type" => geom.geometry_type, + "node_coordinates" => "x y", + ) + if haskey(geom, :node_count) + # More nodes than dimension length: define a node dimension + node_dimname = dimname * "_node" + CDM.defVar(ds, "node_count", geom.node_count, dimname) + CDM.defDim(ds, node_dimname, length(geom.x)) + attrib["node_count"] = dimname * "_node_count" + else + # Node count is dimension length + node_dimame = dimname + end + if haskey(geom, :part_node_count) + # We also have geometry parts + part_dimname = dimname * "part" + part_varname = dimname * "_part_node_count" + CDM.defDim(ds, part_dimname, length(geom.part_node_count)) + CDM.defVar(ds, part_varname, geom.part_node_count, part_dimname) + attrib["part_node_count"] = part_varname + end + if haskey(geom, :interior_ring) + # And interior rings + ring_varname = dimname * "_interior_ring" + CDM.defVar(ds, ring_varname, geom.interior_ring, part_dimname) + attrib["interior_ring"] = ring_varname + end + # Define coordinate variables + CDM.defVar(ds, "x", geom.x, node_dimame) + CDM.defVar(ds, "y", geom.y, node_dimame) + # And the geometry container + CDM.defVar(ds, "geometry_container", nothing, (), attrib) + return nothing +end +function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:Union{Sampled,AbstractCategorical}}, dimname) + # Shift lookup before conversion to Mapped + dim = _cdm_shiftlocus(dim) if dim isa Y || dim isa X dim = convertlookup(Mapped, dim) end @@ -631,7 +700,7 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension) push!(attrib, "bounds" => boundskey) CDM.defVar(ds, boundskey, bounds, ("bnds", dimname)) end - CDM.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib=attrib) + CDM.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib) return nothing end From 768685db9d0133796c5643910822c5605c9a5da3 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sun, 4 May 2025 07:09:19 -0400 Subject: [PATCH 08/43] namespace coord names in geometry lookup, but only if necessary --- src/sources/commondatamodel.jl | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index fb13b2012..129ca732c 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -647,11 +647,23 @@ function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:ArrayLookup}, di return nothing end function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:GeometryLookup}, dimname) - geom = _geometry_cf_encode(parent(lookup(dim))) + l = lookup(dim) + geom = _geometry_cf_encode(parent(l)) # Define base geometry attributes + + coord_names_raw = map(dims(l)) do d + string(DD.name(d)) + end + + coord_names = if any(n -> haskey(ds, n), coord_names_raw) + (dimname,) .* coord_names_raw + else + coord_names_raw + end + attrib = Dict{String,Any}( "geometry_type" => geom.geometry_type, - "node_coordinates" => "x y", + "node_coordinates" => join(coord_names, " "), ) if haskey(geom, :node_count) # More nodes than dimension length: define a node dimension @@ -678,8 +690,9 @@ function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:GeometryLookup}, attrib["interior_ring"] = ring_varname end # Define coordinate variables - CDM.defVar(ds, "x", geom.x, node_dimame) - CDM.defVar(ds, "y", geom.y, node_dimame) + CDM.defVar(ds, coord_names[1], geom.x, node_dimame) + CDM.defVar(ds, coord_names[2], geom.y, node_dimame) + # TODO: add z and m, if present # And the geometry container CDM.defVar(ds, "geometry_container", nothing, (), attrib) return nothing @@ -747,7 +760,6 @@ function _read_geometry(ds, key) elseif geometry_type == "polygon" if haskey(attrib, "part_node_count") # actually multipoly if haskey(attrib, "interior_ring") # this will usually be the case - println("hello") (GI.MultiPolygonTrait(), (; node_coordinates, node_count = CDM.variable(ds, attrib["node_count"]), From 0b7e7e6dfb3ef1c74c20c85c864fac316518d40d Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 4 May 2025 18:05:18 +0200 Subject: [PATCH 09/43] read geom working --- src/Rasters.jl | 7 - src/geometry_lookup/geometry_lookup.jl | 47 +--- src/geometry_lookup/io.jl | 100 +++---- src/geometry_lookup/lookups.jl | 80 +++--- src/sources/commondatamodel.jl | 356 ++++++++++++------------- src/stack.jl | 20 +- 6 files changed, 289 insertions(+), 321 deletions(-) diff --git a/src/Rasters.jl b/src/Rasters.jl index afd5db110..aaf8c65f1 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -1,12 +1,5 @@ module Rasters -# Use the README as the module docs -@doc let - # path = joinpath(dirname(@__DIR__), "README.md") - # include_dependency(path) - # read(path, String) -end Rasters - using Dates # Load first to fix StaticArrays invalidations diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index 762bf451c..54a2bbb0c 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -1,15 +1,18 @@ """ GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) - A lookup type for geometry dimensions in vector data cubes. -`GeometryLookup` provides efficient spatial indexing and lookup for geometries using an STRtree (Sort-Tile-Recursive tree). -It is used as the lookup type for geometry dimensions in vector data cubes, enabling fast spatial queries and operations. +`GeometryLookup` provides efficient spatial indexing and lookup for +geometries using an STRtree (Sort-Tile-Recursive tree). -It spans the dimensions given to it in `dims`, as well as the dimension it's wrapped in - you would construct a DimArray with a GeometryLookup -like `DimArray(data, Geometry(GeometryLookup(data, dims)))`. Here, `Geometry` is a dimension - but selectors in X and Y will also eventually work! +It is used as the lookup type for geometry dimensions in vector +data cubes, enabling fast spatial queries and operations. +It spans the dimensions given to it in `dims`, as well as the dimension + it's wrapped in - you would construct a DimArray with a GeometryLookup +like `DimArray(data, Geometry(GeometryLookup(data, dims)))`. +Here, `Geometry` is a dimension - but selectors in X and Y will also work! # Examples @@ -29,18 +32,15 @@ dv = rand(Geometry(polygon_lookup)) # select the polygon with the centroid of the 88th polygon dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] # true ``` - """ -struct GeometryLookup{T, A <: AbstractVector{T}, D, M <: GO.Manifold, Tree, CRS} <: DD.Dimensions.MultiDimensionalLookup{T} +struct GeometryLookup{T,A<:AbstractVector{T},D,M<:GO.Manifold,Tree,CRS} <: DD.Dimensions.MultiDimensionalLookup{T} manifold::M data::A tree::Tree dims::D crs::CRS end - function GeometryLookup(data, dims=(X(), Y()); geometrycolumn=nothing, crs=nokw, tree=nokw) - # First, retrieve the geometries - from a table, vector of geometries, etc. geometries = _get_geometries(data, geometrycolumn) geometries = Missings.disallowmissing(geometries) @@ -101,7 +101,8 @@ This is broadly standard except for the `rebuild` method, which is used to updat =# DD.dims(l::GeometryLookup) = l.dims -DD.dims(d::DD.Dimension{<: GeometryLookup}) = val(d).dims +# This has to return itself +# DD.dims(d::DD.Dimension{<:GeometryLookup}) = dims(val(d)) DD.order(::GeometryLookup) = Lookups.Unordered() DD.parent(lookup::GeometryLookup) = lookup.data # TODO: format for geometry lookup @@ -143,27 +144,7 @@ function DD.rebuild( crs end - new_manifold = if isnokw(manifold) - lookup.manifold - else - manifold - end - - GeometryLookup(new_manifold, Missings.disallowmissing(data), new_tree, dims, new_crs) -end - + new_manifold = isnokw(manifold) ? lookup.manifold : manifold -# total_area_of_intersection = 0.0 -# current_area_of_intersection = 0.0 -# last_point = nothing -# apply_with_signal(trait, geom) do subgeom, state -# if state == :start -# total_area_of_intersection += current_area_of_intersection -# current_area_of_intersection = 0.0 -# last_point = nothing -# elseif state == :continue -# # shoelace formula for this point -# elseif state == :end -# # finish off the shoelace formula -# end -# end \ No newline at end of file + return GeometryLookup(new_manifold, Missings.disallowmissing(data), new_tree, dims, new_crs) +end \ No newline at end of file diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index 891940ebe..7b9e115f0 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -1,9 +1,7 @@ -#= -# Geometry encoding and decoding from CF conventions - -Encode functions will always return a named tuple with the standard -=# -function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) +# Geometry encoding and decoding from CF conventions (7.5) +_cf_geometry_encode(geoms) = _cf_geometry_encode(GI.trait(first(geoms)), geoms) +_cf_geometry_encode(trait, geoms) = throw(ArgumentError("Geometry trait $trait not currently handled by Rasters")) +function _cf_geometry_encode(::Union{GI.PointTrait,GI.MultiPointTrait}, geoms) if all(x -> GI.trait(x) isa GI.PointTrait, geoms) return (; geometry_type = "point", @@ -34,9 +32,7 @@ function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) node_count = GI.npoint.(geoms) ) end - - -function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) +function _cf_geometry_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) # There is a fast path without encoding part_node_count if all geoms are linestrings. npoints = sum(GI.npoint, geoms) flat_xs = Vector{Float64}(undef, npoints) @@ -73,8 +69,7 @@ function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait node_count = GI.npoint.(geoms) ) end - -function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) +function _cf_geometry_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) ngeoms = length(geoms) nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0, threaded = false) n_points_per_geom_vec = GI.npoint.(geoms) @@ -107,7 +102,6 @@ function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geo ys[current_xy_index] = GI.y(point) current_xy_index += 1 end - # Main.@infiltrate part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring)-1 interior_ring_vec[current_ring_index] = 0 current_ring_index += 1 @@ -143,25 +137,24 @@ function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geo ) end -#= -function _def_dim_var!(ds::AbstractDataset, dim::Dimension{<: GeometryLookup}) - dimname = lowercase(string(DD.name(dim))) - haskey(ds.dim, dimname) && return nothing - CDM.defDim(ds, dimname, length(dim)) - lookup(dim) isa NoLookup && return nothing - attribdict = _attribdict(NoMetadata()) - - - CDM.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib=attrib) - +# CF standards Geometry decoding (7.5) +# `geometry` is the CF attributes dict from the variable linked to a 'geometry" attribute. +# `ds` is any CommonDataModel.AbstractDataset +function _cf_geometry_decode(ds::AbstractDataset, geometry; kw...) + geometry_type = geometry["geometry_type"] + trait = if geometry_type == "point" + haskey(geometry, "node_count") ? GI.MultiPointTrait() : GI.PointTrait() + elseif geometry_type == "line" + haskey(geometry, "part_node_count") ? GI.MultiLineStringTrait() : GI.LineStringTrait() + elseif geometry_type == "polygon" + haskey(geometry, "part_node_count") ? GI.MultiPolygonTrait() : GI.PolygonTrait() + end + return _cf_geometry_decode(trait, ds, geometry; kw...) end -=# - -function _geometry_cf_decode(::GI.MultiPolygonTrait, ds, geometry; crs=nothing) - rings = _split_inner_geoms(geometry; autoclose = true) - node_count = geometry[:node_count] - part_node_count = geometry[:part_node_count] - interior_ring = geometry[:interior_ring] +function _cf_geometry_decode(::GI.MultiPolygonTrait, ds, geometry; crs=nothing) + rings = _split_inner_geoms(ds, geometry; autoclose=true) + node_count = _cf_node_count(ds, geometry) + interior_ring = _cf_interior_ring(ds, geometry) # Now, we proceed to assemble the polygons and multipolygons from the rings. # TODO: no better way to get the tuple type, at least for now. _lr = GI.LinearRing(first(rings); crs) @@ -205,15 +198,14 @@ function _geometry_cf_decode(::GI.MultiPolygonTrait, ds, geometry; crs=nothing) if !isnothing(current_exterior) push!(polygons, GI.Polygon([current_exterior, current_holes...]; crs)) end - @infiltrate # Create multipolygon from all polygons geoms[geom_idx] = GI.MultiPolygon(polygons; crs) end return geoms end -function _geometry_cf_decode(::GI.MultiLineStringTrait, ds, geometry; crs=nothing) - node_count = geometry[:node_count] - lines = _split_inner_geoms(geometry; autoclose = false) +function _cf_geometry_decode(::GI.MultiLineStringTrait, ds, geometry; crs=nothing) + node_count = _cf_node_count(ds, geometry) + lines = _split_inner_geoms(ds, geometry; autoclose = false) _ls = GI.LineString(lines[1]; crs) _mls = GI.MultiLineString([_ls]; crs) @@ -237,27 +229,30 @@ function _geometry_cf_decode(::GI.MultiLineStringTrait, ds, geometry; crs=nothin return geoms end - -function _geometry_cf_decode(::GI.PointTrait, ds, geometry; crs=nothing) +function _cf_geometry_decode(::GI.PointTrait, ds, geometry; crs=nothing) + node_coordinates = _cf_node_coordinates(ds, geometry) # Just wrap raw coordinates as Points - return GI.Point.(geometry[:node_coordinates]; crs) + return GI.Point.(node_coordinates; crs) end -function _geometry_cf_decode(::GI.LineStringTrait, ds, geometry; crs=nothing) - node_count, node_coordinates = geometry[:node_count], geometry[:node_coordinates] +function _cf_geometry_decode(::GI.LineStringTrait, ds, geometry; crs=nothing) + node_count = _cf_node_count(ds, geometry) + node_coordinates = _cf_node_coordinates(ds, geometry) # Split coordinates to separate LineStrings by node count ranges return map(_node_ranges(node_count)) do range GI.LineString(node_coordinates[range]; crs) end end -function _geometry_cf_decode(::GI.MultiPointTrait, ds, geometry; crs = nothing) - node_count, node_coordinates = geometry[:node_count], geometry[:node_coordinates] +function _cf_geometry_decode(::GI.MultiPointTrait, ds, geometry; crs = nothing) + node_count = _cf_node_count(ds, geometry) + node_coordinates = _cf_node_coordinates(ds, geometry) # Split coordinates to separate MultiPoint by node count ranges return map(_node_ranges(node_count)) do range GI.MultiPoint(node_coordinates[range]; crs) end end -function _geometry_cf_decode(::GI.PolygonTrait, ds, geometry; crs=nothing) - node_count, node_coordinates = geometry[:node_count], geometry[:node_coordinates] +function _cf_geometry_decode(::GI.PolygonTrait, ds, geometry; crs=nothing) + node_count = _cf_node_count(ds, geometry) + node_coordinates = _cf_node_coordinates(ds, geometry) # Split coordinates to separate single-ring Polygons by node count ranges return map(_node_ranges(node_count)) do range GI.Polygon([GI.LinearRing(node_coordinates[range]; crs)]; crs) @@ -273,9 +268,9 @@ function _node_ranges(node_count) return ranges end -function _split_inner_geoms(geometry; autoclose = false) - part_node_count = geometry[:part_node_count] - node_coordinates = geometry[:node_coordinates] +function _split_inner_geoms(ds, geometry; autoclose=false) + part_node_count = _cf_part_node_count(ds, geometry) + node_coordinates = _cf_node_coordinates(ds, geometry) # Initialize variables for ring assembly start = 1 stop = part_node_count[1] @@ -291,4 +286,15 @@ function _split_inner_geoms(geometry; autoclose = false) autoclose && push!(rings[end], node_coordinates[start]) end return rings -end \ No newline at end of file +end + +function _cf_node_coordinates(ds, geometry) + coords = map(_cf_node_coordinate_names(geometry)) do coordname + collect(CDM.variable(ds, coordname)) + end + return collect(zip(coords...)) +end +_cf_node_coordinate_names(geometry) = split(geometry["node_coordinates"], ' ') +_cf_node_count(ds, geometry) = collect(CDM.variable(ds, geometry["node_count"])) +_cf_part_node_count(ds, geometry) = collect(CDM.variable(ds, geometry["part_node_count"])) +_cf_interior_ring(ds, geometry) = collect(CDM.variable(ds, geometry["interior_ring"])) \ No newline at end of file diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl index 3d63ea93e..885f49d8d 100644 --- a/src/geometry_lookup/lookups.jl +++ b/src/geometry_lookup/lookups.jl @@ -8,13 +8,15 @@ We need to define methods that take selectors and convert them to extents, then =# # Bounds - get the bounds of the lookup -Lookups.bounds(lookup::GeometryLookup) = if isempty(lookup.data) - Extents.Extent(NamedTuple{DD.name.(lookup.dims)}(ntuple(2) do i; (nothing, nothing); end)) -else - if isnothing(lookup.tree) - mapreduce(GI.extent, Extents.union, lookup.data) +function Lookups.bounds(lookup::GeometryLookup) + if isempty(lookup.data) + Extents.Extent(NamedTuple{DD.name.(lookup.dims)}(ntuple(2) do i; (nothing, nothing); end)) else - GI.extent(lookup.tree) + if isnothing(lookup.tree) + mapreduce(GI.extent, Extents.union, lookup.data) + else + GI.extent(lookup.tree) + end end end @@ -32,39 +34,19 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where end function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) - i = findfirst(x -> all(map(Lookups._matches, sel, x)), lookup) + i = findfirst(x -> all(map(Dimensions._matches, sel, x)), lookup) isnothing(i) && _coord_not_found_error(sel) return i else - return [Lookups._matches(sel, x) for x in lookup] + return [Dimensions._matches(sel, x) for x in lookup] end end - -# Selector implementations that use geometry (like Contains, Touches, etc.) -""" - _maybe_get_candidates(lookup, selector_extent) - -Get the candidates for the selector extent. If the selector extent is disjoint from the tree rootnode extent, return an error. -""" -function _maybe_get_candidates(lookup::GeometryLookup, selector_extent) - tree = lookup.tree - isnothing(tree) && return 1:length(lookup) - Extents.disjoint(GI.extent(tree), selector_extent) && return Int[] - potential_candidates = GO.SpatialTreeInterface.query( - tree, - Base.Fix1(Extents.intersects, selector_extent) - ) - isempty(potential_candidates) && return Int[] - return potential_candidates -end - function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) potential_candidates = _maybe_get_candidates(lookup, sel_ext) filter(potential_candidates) do candidate GO.contains(lookup.data[candidate], val(sel)) end end - function Lookups.selectindices(lookup::GeometryLookup, sel::At) if GI.trait(val(sel)) isa GI.PointTrait Lookups.selectindices(lookup, (At(GI.x(val(sel))), At(GI.y(val(sel))))) @@ -72,7 +54,6 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::At) Lookups.at(lookup, sel) end end - function Lookups.selectindices(lookup::GeometryLookup, sel::Near) geom = val(sel) @assert GI.isgeometry(geom) @@ -90,8 +71,6 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::Near) # this depends on LibGEOS being installed. end - - function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) sel_ext = GI.extent(val(sel)) potential_candidates = _maybe_get_candidates(lookup, sel_ext) @@ -99,27 +78,31 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) GO.intersects(lookup.data[candidate], val(sel)) end end - -function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{ <: Touches}, Union{ <: Touches}}) +function Lookups.selectindices( + lookup::GeometryLookup, + (xs, ys)::Tuple{Union{<:Touches}, Union{<:Touches}} +) target_ext = Extents.Extent(X = (first(xs), last(xs)), Y = (first(ys), last(ys))) potential_candidates = _maybe_get_candidates(lookup, target_ext) return filter(potential_candidates) do candidate GO.intersects(lookup.data[candidate], target_ext) end end - - -function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: DD.IntervalSets.ClosedInterval}, Union{<: DD.IntervalSets.ClosedInterval}}) +function Lookups.selectindices( + lookup::GeometryLookup, + (xs, ys)::Tuple{Union{<:DD.IntervalSets.ClosedInterval},Union{<:DD.IntervalSets.ClosedInterval}} +) target_ext = Extents.Extent(X = extrema(xs), Y = extrema(ys)) potential_candidates = _maybe_get_candidates(lookup, target_ext) filter(potential_candidates) do candidate GO.covers(target_ext, lookup.data[candidate]) end end - -function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: At, <: Contains, <: Real}, Union{<: At, <: Contains, <: Real}}) +function Lookups.selectindices( + lookup::GeometryLookup, + (xs, ys)::Tuple{Union{<:At,<:Contains}, Union{<:At,<:Contains}} +) xval, yval = val(xs), val(ys) - lookup_ext = Lookups.bounds(lookup) if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] @@ -155,3 +138,22 @@ _val_or_nothing(::Nothing) = nothing _val_or_nothing(d::DD.Dimension) = val(d) +# Selector implementations that use geometry (like Contains, Touches, etc.) +""" + _maybe_get_candidates(lookup, selector_extent) + +Get the candidates for the selector extent. If the selector extent is disjoint from the tree rootnode extent, return an error. +""" +function _maybe_get_candidates(lookup::GeometryLookup, selector_extent) + tree = lookup.tree + isnothing(tree) && return 1:length(lookup) + Extents.disjoint(GI.extent(tree), selector_extent) && return Int[] + potential_candidates = GO.SpatialTreeInterface.query( + tree, + Base.Fix1(Extents.intersects, selector_extent) + ) + isempty(potential_candidates) && return Int[] + return potential_candidates +end + + diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 129ca732c..ad213c647 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -112,8 +112,6 @@ filekey(ds::AbstractDataset, name::Union{String,Symbol}) = Symbol(name) filekey(ds::AbstractDataset, name) = _name_or_firstname(ds, name) cleanreturn(A::AbstractVariable) = Array(A) haslayers(::CDMsource) = true -defaultcrs(::CDMsource) = EPSG(4326) -defaultmappedcrs(::CDMsource) = EPSG(4326) function _dims(var::AbstractVariable{<:Any,N}, crs=nokw, mappedcrs=nokw) where N dimnames = CDM.dimnames(var) @@ -124,11 +122,6 @@ end _metadata(var::AbstractVariable; attr=CDM.attribs(var)) = _metadatadict(sourcetrait(var), attr) -function _dims(ds::AbstractDataset, dimdict::Dict) - map(CDM.dimnames(ds)) do dimname - dimdict[dimname] - end |> Tuple -end _metadata(ds::AbstractDataset; attr=CDM.attribs(ds)) = _metadatadict(sourcetrait(ds), attr) function _layerdims(ds::AbstractDataset; layers, dimdict) @@ -138,8 +131,8 @@ function _layerdims(ds::AbstractDataset; layers, dimdict) end |> Tuple end end -function _layermetadata(ds::AbstractDataset; layers) - map(layers.attrs) do attr +function _layermetadata(ds::AbstractDataset; attrs) + map(attrs) do attr md = _metadatadict(sourcetrait(ds), attr) if haskey(attr, "grid_mapping") if haskey(ds, attr["grid_mapping"]) @@ -165,128 +158,204 @@ function _layer_names(ds) return _layer_names(ds, names, Dict{String,Any}(), Dict{String,Any}()) end function _layer_names(ds, var_names, vars_dict, attrs_dict) + # Keep everything separate for clarity bounds_names = String[] geometry_names = String[] grid_mapping_names = String[] coordinate_names = String[] + units_metadata_names = String[] node_coordinate_names = String[] # Sometimes coordinates are not included in var attributes # So we check standard names to know vars are lookups and not data standard_names = String[] - for n in zip(var_names) - get!(() -> CDM.variable(ds, n), vars_dict, n) - get!(() -> CDM.attribs(vars_dict[n]), attrs_dict, n) + for n in var_names + var = get!(() -> CDM.variable(ds, n), vars_dict, n) + attr = get!(() -> CDM.attribs(vars_dict[n]), attrs_dict, n) # Coordinate variables haskey(attr, "coordinates") && setdiff(coordinate_names, _cdm_coordinates(attr)) haskey(attr, "node_coordinates") && setdiff(node_coordinate_names, _cdm_node_coordinates(attr)) # Bounds variables haskey(attr, "bounds") && push!(bounds_names, attr["bounds"]) # Geometry variables - haskey(attr, "geometry") && push!(geometry_names, attr["geometry"]) + if haskey(attr, "geometry") + geometry_container_name = attr["geometry"] + push!(geometry_names, geometry_container_name) + geometry_attr = get!(attrs_dict, geometry_container_name) do + var = get!(vars_dict, geometry_container_name) do + CDM.variable(ds, geometry_container_name) + end + CDM.attribs(var) + end + haskey(geometry_attr, "node_coordinates") && push!(geometry_names, _cf_node_coordinate_names(geometry_attr)...) + haskey(geometry_attr, "node_count") && push!(geometry_names, geometry_attr["node_count"]) + haskey(geometry_attr, "part_node_count") && push!(geometry_names, geometry_attr["part_node_count"]) + haskey(geometry_attr, "interior_ring") && push!(geometry_names, geometry_attr["interior_ring"]) + end # Grid mapping variables haskey(attr, "grid_mapping") && push!(grid_mapping_names, attr["grid_mapping"]) + # Units metadata variables + haskey(attr, "units_metadata") && push!(units_metadata_names, attr["units_metadata"]) # Common Standard names - very likely coordinates haskey(attr, "standard_name") && attr["standard_name"] in keys(CDM_STANDARD_NAME_MAP) && push!(standard_names, n) end dim_names = CDM.dimnames(ds) - # if all(n -> n in var_names, dim_names) - # end - return setdiff(varnames, bounds_names, geometry_names, grid_mapping_names, coordinate_names, node_coordinate_names, standard_names) + return setdiff(var_names, dim_names, bounds_names, geometry_names, grid_mapping_names, coordinate_names, units_metadata_names, node_coordinate_names, standard_names) end -_cdm_coordinates(attr) = collect(split(attr["coordinates"], ' ')) -_cdm_node_coordinates(attr) = collect(split(attr["node_coordinates"], ' ')) +function _cdm_coordinates(attr) + if haskey(attr, "coordinates") + String.(split(attr["coordinates"], ' ')) + else + String[] + end +end +function _cdm_node_coordinates(attr) + if haskey(attr, "node_coordinates") + String.(split(attr["node_coordinates"], ' ')) + else + String[] + end +end +#= Here we convert the surface of the CF standard that we support +into DimensionalData.jl objects +Unhandled +- ancillary variables 3.4 (connection ignored) +- flag values 3.5 (ignored, raw data used - could use CategoricalArrays here?) +- parametric vertical coordinates 4.3.3 (ignored) +=# function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) + # Start with the names of all variables var_names = keys(ds) + # Define vars and attrs dicts so these are only loaded once vars_dict = Dict{String,Any}() attrs_dict = Dict{String,Any}() + geometry_dict = Dict{String,Any}() + crs_dict = Dict{String,Any}() + # Define a dimensions dict so these are only generated once + dim_dict = Dict{String,Dimension}() + # Get the layer names to target layer_names = isnokw(names) ? _layer_names(ds, var_names, vars_dict, attrs_dict) : names + layer_dimnames_vec = Vector{Tuple}(undef, length(layer_names)) - dim_dict = Dict{String,Dimension}() - geometry_dict = Dict{String,Any}() - grid_mapping_dict = Dict{String,Any}() - output_layers_vec = Vector{CDM.AbstractVariable}(undef, length(layer_names)) - output_attrs_vec = Vector{CDM.Attributes}(undef, length(layer_names)) + # Define output data and metadata vars + output_layers_vec = Vector{AbstractArray}(undef, length(layer_names)) + output_layerdims_vec = Vector{Tuple}(undef, length(layer_names)) + output_attrs_vec = Vector{Any}(undef, length(layer_names)) # Loop over the layers we want to load as rasters # As we go, we add dimensions to dim_dict to be used in other layers for (i, var_name) in enumerate(layer_names) + # Get the variable and its attributes var = get(() -> CDM.variable(ds, var_name), vars_dict, var_name) attr = get(() -> CDM.attribs(var), attrs_dict, var_name) - coord_names = _cdm_coordinates(attr) + # Get its dimensions layer_dimnames_vec[i] = dimnames = CDM.dimnames(var) - attr = attrs_dict[var_name] + # Get its coordinates + coord_names = _cdm_coordinates(attr) + # Check for a geometry variable (CF 7.5) geometry_key = if haskey(attr, "geometry") attr["geometry"] end - grid_mapping_key = if haskey(attr, "grid_mapping") - attr["grid_mapping"] + # Check for a grid mapping variable + crs = if haskey(attr, "grid_mapping") + grid_mapping_key = attr["grid_mapping"] + get!(() -> _cf_crs(ds, grid_mapping_key), crs_dict, grid_mapping_key) end # Loop over the dimensions of this layer, adding missing dims to dim_dict - for dimname in dimnames - haskey(dim_dict, dimname) && continue - dim_dict[dimname] = _cdm_dim( - ds, vars_dict, attrs_dict, geometry_dict, grid_mapping_dict, geometry_key, grid_mapping_key, coord_names, dimname - ) + output_layerdims_vec[i] = map(dimnames) do dimname + dim = get!(dim_dict, dimname) do + _cdm_dim( + ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_names, dimname, crs + ) + end + basedims(dim) end + # Add this layer to the output output_layers_vec[i] = var output_attrs_vec[i] = attr end # TODO output - (; layers, dims, layerdims_vec, layermetadata_vec) + return (; + names_vec=layer_names, + layers_vec=output_layers_vec, + layerdims_vec=output_layerdims_vec, + layermetadata_vec=output_attrs_vec, + dim_dict, + ) end _organise_dataset(ds::AbstractDataset, names, group) = _organise_dataset(ds.group[group], names, nokw) -function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, grid_mapping_dict, geometry_key, grid_mapping_key, coord_names, dimname) - crs = if !isnothing(grid_mapping_key) - get(grid_mapping_dict, grid_mapping_key) do - _cdm_grid_mapping(ds, grid_mapping_key) +function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_names, dimname, crs) + if !isnothing(geometry_key) + geometry_attrs = get!(attrs_dict, geometry_key) do + CDM.attribs(CDM.variable(ds, geometry_key)) + end + # Check that the geometry is for this dimension + is_geometry_dimension = if haskey(geometry_attrs, "node_count") + # Multi-part geometries + dimname in CDM.dimnames(vars_dict[geometry_attrs["node_count"]]) + else + # Points + dimname in CDM.dimnames(vars_dict[geometry_attrs["node_coordinates"]]) + end + geoms = get!(geometry_dict, geometry_key) do + _cf_geometry_decode(ds, geometry_attrs) + end + if is_geometry_dimension + dimension_coord_names = filter(coord_names) do coordname + dimnames = CDM.dimnames(vars_dict[coordname]) + length(dimnames) == 1 && only(dimnames) == dimname + end + dims = map(dimension_coord_names) do d + dim_attrs = attrs_dict[d] + Dn = _cdm_dimtype(dim_attrs, d) + Dn() + end |> Tuple + lookup = GeometryLookup(geoms, dims; crs) + D = Geometry + return D(lookup) end end if haskey(ds, dimname) # The dimension has a matching variable dim_attrs = attrs_dict[dimname] D = _cdm_dimtype(dim_attrs, dimname) - lookup = _cdm_lookup(ds, dimname, D, dim_attrs) + lookup = _cdm_lookup(ds, dim_attrs, dimname, D) D(lookup) else - if !isnothing(geometry_key) - geom = get(geometry_dict, geometry_key) do - _cdm_geometry(ds, geometry_key) - end - # Check that geometry node coordinates are for this dimension - is_geometry_dimension = all(_cdm_node_coordinates(geom)) do nc - dimname in CDM.dimnames(vars_dict[nc]) - end - if is_geometry_dimension - D = _cdm_dimtype(NoMetadata(), d) - lookup = _cdm_geometry_lookup(vars_dicts, attrs_dict, geom, dimname) - return D(lookup) - end - end - # No matching variable. Get all the coordinates for this dimension - dimension_coord_names = filter(coordnames) do coordname + # No matching variable was found, so we have something more complicated + # Get all the coordinates for this dimension + dimension_coord_names = filter(coord_names) do coordname dimnames = CDM.dimnames(vars_dict[coordname]) length(dimnames) == 1 && only(dimnames) == dimname end - # If there are coordinates for this dimension - if !isempty(dimension_coord_names) - # TODO rotations + if length(dimension_coord_names) == 1 + # If there is only one coordinate, just use it as the dimension + # This should rarely happen + linked_dimname = dimension_coord_names[1] + dim_attrs = attrs_dict[linked_dimname] + D = _cdm_dimtype(dim_attrs, linked_dimname) + lookup = _cdm_lookup(ds, dim_attrs, linked_dimname, D, crs) + D(lookup) + elseif length(dimension_coord_names) > 1 + # Generate a MergedLookup for multi-coordinate dimensions dims = map(dimension_coord_names) do d dim_attrs = attrs_dict[d] D = _cdm_dimtype(dim_attrs, d) - lookup = _cdm_lookup(ds, d, D, dim_attrs) + lookup = _cdm_lookup(ds, dim_attrs, d, D, crs) D(lookup) end D = _cdm_dimtype(NoMetadata(), dimname) + merged_data = collect(zip(lookup(dims)...)) # Use a MergedLookup with all dimensions - mergedims(dims, dims => D) + D(MergedLookup(merged_data, dims)) else - # The var doesn't exist. Maybe its `complex` or some other marker, - # so make it a custom `Dim` with `NoLookup` + # A matching variable doesn't exist. + # Its a "discrete axis" (4.5) so NoLookup + len = CDM.dim(ds, dimname) lookup = NoLookup(Base.OneTo(len)) D = _cdm_dimtype(NoMetadata(), dimname) D(lookup) @@ -330,9 +399,9 @@ end # _cdm_lookup # Generate a `Lookup` from a CDM dim. -function _cdm_lookup(ds::AbstractDataset, attr, dimname, D::Type, attr) +function _cdm_lookup(ds::AbstractDataset, attr, dimname, D::Type) var = ds[dimname] - data = Missings.disallowmissing(var[:]) + data = Missings.disallowmissing(collect(var)) metadata = _metadatadict(sourcetrait(ds), attr) return _cdm_lookup(data, ds, var, attr, dimname, D, metadata) end @@ -349,12 +418,12 @@ function _cdm_lookup( ) # Assume the locus is at the center of the cell if boundaries aren't provided. # http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries - order = LA.orderof(index) + order = LA.orderof(data) # Detect lat/lon - span, sampling = if eltype(index) <: Union{Missing,Dates.AbstractTime} - _cdm_period(index, metadata) + span, sampling = if eltype(data) <: Union{Missing,Dates.AbstractTime} + _cdm_period(data, metadata) else - _cdm_span(index, order) + _cdm_span(data, order) end # We only use Explicit if the span is not Regular # This is important for things like rasterizatin and conversion @@ -366,9 +435,9 @@ function _cdm_lookup( else boundskey = attr["bounds"] boundsmatrix = Array(ds[boundskey]) - locus = if mapreduce(==, &, view(boundsmatrix, 1, :), index) + locus = if mapreduce(==, &, view(boundsmatrix, 1, :), data) Start() - elseif mapreduce(==, &, view(boundsmatrix, 2, :), index) + elseif mapreduce(==, &, view(boundsmatrix, 2, :), data) End() else # TODO better checks here @@ -405,31 +474,31 @@ function _cdm_lookup(data, D::Type, order::Order, span, sampling, metadata) Sampled(data, order, span, sampling, metadata) end -function _cdm_span(index, order) - # Handle a length 1 index - length(index) == 1 && return Regular(zero(eltype(index))), Points() +function _cdm_span(data, order) + # Handle a length 1 data + length(data) == 1 && return Regular(zero(eltype(data))), Points() - step = if eltype(index) <: AbstractFloat + step = if eltype(data) <: AbstractFloat # Calculate step, avoiding as many floating point errors as possible - st = Base.step(Base.range(Float64(first(index)), Float64(last(index)); length = length(index))) - st_rd = round(st, digits = Base.floor(Int,-log10(eps(eltype(index))))) # round to nearest digit within machine epsilon - isapprox(st_rd, st; atol = eps(eltype(index))) ? st_rd : st # keep the rounded number if it is very close to the original + st = Base.step(Base.range(Float64(first(data)), Float64(last(data)); length = length(data))) + st_rd = round(st, digits = Base.floor(Int,-log10(eps(eltype(data))))) # round to nearest digit within machine epsilon + isapprox(st_rd, st; atol = eps(eltype(data))) ? st_rd : st # keep the rounded number if it is very close to the original else - index[2] - index[1] + data[2] - data[1] end - for i in 2:length(index)-1 + for i in 2:length(data)-1 # If any step sizes don't match, its Irregular - if !(index[i+1] - index[i] ≈ step) - bounds = if length(index) > 1 - beginhalfcell = abs((index[2] - index[1]) * 0.5) - endhalfcell = abs((index[end] - index[end-1]) * 0.5) + if !(data[i+1] - data[i] ≈ step) + bounds = if length(data) > 1 + beginhalfcell = abs((data[2] - data[1]) * 0.5) + endhalfcell = abs((data[end] - data[end-1]) * 0.5) if LA.isrev(order) - index[end] - endhalfcell, index[1] + beginhalfcell + data[end] - endhalfcell, data[1] + beginhalfcell else - index[1] - beginhalfcell, index[end] + endhalfcell + data[1] - beginhalfcell, data[end] + endhalfcell end else - index[1], index[1] + data[1], data[1] end return Irregular(bounds), Points() end @@ -439,7 +508,7 @@ function _cdm_span(index, order) end # delta_t and ave_period are not CF standards, but CDC -function _cdm_period(index, metadata::Metadata{<:CDMsource}) +function _cdm_period(data, metadata::Metadata{<:CDMsource}) if haskey(metadata, "delta_t") period = _parse_period(metadata["delta_t"]) period isa Nothing || return Regular(period), Points() @@ -647,8 +716,12 @@ function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:ArrayLookup}, di return nothing end function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:GeometryLookup}, dimname) +<<<<<<< HEAD l = lookup(dim) geom = _geometry_cf_encode(parent(l)) +======= + geom = _geometry_cf_encode(lookup(dim)) +>>>>>>> d5ad3c0 (read geom working) # Define base geometry attributes coord_names_raw = map(dims(l)) do d @@ -713,108 +786,21 @@ function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:Union{Sampled,Ab push!(attrib, "bounds" => boundskey) CDM.defVar(ds, boundskey, bounds, ("bnds", dimname)) end - CDM.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib) + CDM.defVar(ds, dimname, collect(lookup(dim)), (dimname,); attrib) return nothing end - -function _classify_variables(ds::AbstractDataset) -end - - -function _read_geometry(ds, key) - var = CDM.variable(ds, key) - var_attrib = CDM.attribs(var) - geom_key = var_attrib["geometry"] - geom_var = CDM.variable(ds, geom_key) - attrib = CDM.attribs(geom_var) - geometry_type = attrib["geometry_type"] - - node_coordinate_names = split(attrib["node_coordinates"], ' ') - node_coordinate_vars = map(c -> CDM.variable(ds, c), node_coordinate_names) - # Map the node coordinates to a vector of n-tuples. - node_coordinates = map(tuple, node_coordinate_vars...) - # There are three types of geometries: - # - point (can be point or multipoint depending on presence of node_count) - # - line (can be linestring or multilinestring depending on presence of part_node_count) - # - polygon (can be poly or multipoly depending on presence of part_node_count, multipoly also needs interior_ring) - # This is the meat of the function, that parses the geometry and returns a named tuple - trait, geom_parsing_args = if geometry_type == "point" - if haskey(attrib, "node_count") # actually multipoint - (GI.MultiPointTrait(), (; node_coordinates, node_count = CDM.variable(ds, attrib["node_count"]))) - else # regular points - (GI.PointTrait(), (; node_coordinates)) - end - elseif geometry_type == "line" - if haskey(attrib, "part_node_count") # actually multilinestring - (GI.MultiLineStringTrait(), (; - node_coordinates, - node_count = CDM.variable(ds, attrib["node_count"]), - part_node_count = CDM.variable(ds, attrib["part_node_count"]) - )) - else # regular linestring - (GI.LineStringTrait(), (; - node_coordinates, - node_count = CDM.variable(ds, attrib["node_count"]) - )) - end - elseif geometry_type == "polygon" - if haskey(attrib, "part_node_count") # actually multipoly - if haskey(attrib, "interior_ring") # this will usually be the case - (GI.MultiPolygonTrait(), (; - node_coordinates, - node_count = CDM.variable(ds, attrib["node_count"]), - part_node_count = CDM.variable(ds, attrib["part_node_count"]), - interior_ring = CDM.variable(ds, attrib["interior_ring"]) - )) - else # no interior ring, but part node count is present - # so, we need to assert that part_node_count has the same length as node_count - # in which case, interior_ring is always false by definition. - node_count = CDM.variable(ds, attrib["node_count"]) - part_node_count = CDM.variable(ds, attrib["part_node_count"]) - - if length(node_count) != length(part_node_count) - throw(ArgumentError(""" - When parsing geometry lookup for CDM variable $key, - the length of `node_count` ($(length(node_count))) and - `part_node_count` ($(length(part_node_count))) must be the same. - """)) - end - (GI.PolygonTrait(), (; node_coordinates, node_count)) - end - else # regular polygon - (GI.PolygonTrait(), (; node_coordinates, node_count = CDM.variable(ds, attrib["node_count"]))) - end - else - throw(ArgumentError(""" - When parsing geometry lookup for CDM variable $key, - the geometry type is $geometry_type, - but we could not figure out what that is! - Allowed geometry types are: `point`, `line`, `polygon`. - - If you need something else, please open a Github issue! - """)) - end - crs = if haskey(attrib, "grid_mapping") - grid_mapping_var = CDM.variable(ds, attrib["grid_mapping"]) - grid_mapping_attrib = CDM.attribs(grid_mapping_var) - if haskey(grid_mapping_attrib, "crs_wkt") - WellKnownText2(GeoFormatTypes.CRS(), grid_mapping_attrib["crs_wkt"]) - elseif haskey(grid_mapping_attrib, "spatial_epsg") - EPSG(parse(Int, grid_mapping_attrib["spatial_epsg"])) - elseif haskey(grid_mapping_attrib, "proj4string") - ProjString(grid_mapping_attrib["proj4string"]) - else - nothing # unparseable - TODO get CFProjections.jl to do it. - end +_cf_crs(ds, grid_mapping_key::Nothing) = nothing +function _cf_crs(ds, grid_mapping_key::String) + grid_mapping_var = CDM.variable(ds, grid_mapping_key) + grid_mapping_attrib = CDM.attribs(grid_mapping_var) + if haskey(grid_mapping_attrib, "crs_wkt") + WellKnownText2(GeoFormatTypes.CRS(), grid_mapping_attrib["crs_wkt"]) + elseif haskey(grid_mapping_attrib, "spatial_epsg") + EPSG(parse(Int, grid_mapping_attrib["spatial_epsg"])) + elseif haskey(grid_mapping_attrib, "proj4string") + ProjString(grid_mapping_attrib["proj4string"]) else - nothing + nothing # unparseable - TODO get CFProjections.jl to do it. end - - # return the geometry directly - # TODO: parse coordinate names, to get dimensions - # depends on what utils / luts we have elsewhere. - return _geometry_cf_decode(trait, ds, geom_parsing_args; crs) - - # (; dimension, internal_dimensions, geometry_type, node_count, part_node_count, interior_ring, node_coordinates, grid_mapping) end \ No newline at end of file diff --git a/src/stack.jl b/src/stack.jl index 716b1d722..b3066350b 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -472,33 +472,33 @@ function RasterStack(ds; check_multilayer_dataset(ds) scaled, missingval = _raw_check(raw, scaled, missingval, verbose) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` - layers, dimsdict, layerdims_vec, layermetata_vec = _classify_dataset(ds, name, group) - dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims_vec) + (; names_vec, layers_vec, layerdims_vec, layermetadata_vec, dim_dict) = _organise_dataset(ds, name, group) + dims = _sort_by_layerdims(isnokw(dims) ? values(dim_dict) : dims, layerdims_vec) refdims = isnokw(refdims) || isnothing(refdims) ? () : refdims metadata = isnokw(metadata) ? _metadata(ds) : metadata layermetadata_vec = if isnokw(layermetadata) - _layermetadata(ds; layers) + _layermetadata(ds; attrs=layermetadata_vec) else layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) end - name = Tuple(map(Symbol, layers.names)) + name = Tuple(map(Symbol, names_vec)) NT = NamedTuple{name} missingval_vec = if missingval isa Pair _missingval_vec(missingval, name) else - layer_mvs = map(Rasters.missingval, layers.vars, layermetadata_vec) + layer_mvs = map(Rasters.missingval, layers_vec, layermetadata_vec) _missingval_vec(missingval, layer_mvs, name) end - eltype_vec = map(eltype, layers.vars) + eltype_vec = map(eltype, layers_vec) mod_vec = _stack_mods(eltype_vec, layermetadata_vec, missingval_vec; scaled, coerce) data = if lazy vars = ntuple(i -> layers.vars[i], length(name)) mods = ntuple(i -> mod_vec[i], length(name)) FileStack{typeof(source)}(ds, filename; name, group, mods, vars) else - map(layers.vars, layermetadata_vec, mod_vec) do var, md, mod + map(layers_vec, layermetadata_vec, mod_vec) do var, md, mod modvar = _maybe_modify(var, mod) - yer checkmem && _checkobjmem(modvar) + checkmem && _checkobjmem(modvar) Array(modvar) end |> NT end @@ -605,7 +605,7 @@ _name_error(f, names, layernames) = # order that applies without permutation, preferencing the layers # with most dimensions, and those that come first. # Intentionally not type-stable -function _sort_by_layerdims(dims, layerdims) +function _sort_by_layerdims(dims, layerdims::Vector) dimlist = union(layerdims) currentorder = nothing for i in length(dims):-1:1 @@ -614,7 +614,7 @@ function _sort_by_layerdims(dims, layerdims) currentorder = _merge_dimorder(ldims, currentorder) end end - return DD.dims(dims, currentorder) + return DD.dims(Tuple(dims), currentorder) end _merge_dimorder(neworder, ::Nothing) = neworder From ee68ede3bb2e955b22a1353f01a5ba2ffc4e2564 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 4 May 2025 20:39:53 +0200 Subject: [PATCH 10/43] round trip geometrylookup --- src/geometry_lookup/io.jl | 13 ++++- src/sources/commondatamodel.jl | 91 ++++++++++++++++++++++------------ 2 files changed, 71 insertions(+), 33 deletions(-) diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index 7b9e115f0..d791cdcb4 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -1,6 +1,6 @@ # Geometry encoding and decoding from CF conventions (7.5) _cf_geometry_encode(geoms) = _cf_geometry_encode(GI.trait(first(geoms)), geoms) -_cf_geometry_encode(trait, geoms) = throw(ArgumentError("Geometry trait $trait not currently handled by Rasters")) +_cf_geometry_encode(trait::GI.AbstractTrait, geoms) = throw(ArgumentError("Geometry trait $trait not currently handled by Rasters")) function _cf_geometry_encode(::Union{GI.PointTrait,GI.MultiPointTrait}, geoms) if all(x -> GI.trait(x) isa GI.PointTrait, geoms) return (; @@ -25,10 +25,13 @@ function _cf_geometry_encode(::Union{GI.PointTrait,GI.MultiPointTrait}, geoms) # without allocating an extra array. foreach(identity, flattener) + centroids = GO.centroid.(geoms) return (; geometry_type = "line", x = flat_xs, y = flat_ys, + lon = first.(centroids), + lat = last.(centroids), node_count = GI.npoint.(geoms) ) end @@ -51,11 +54,14 @@ function _cf_geometry_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait attribs = Dict{String,Any}("geometry_type" => "linestring") # If all geoms are linestrings, we can take a fast path. + centroids = GO.centroid.(geoms) if all(x -> GI.trait(x) isa GI.LineStringTrait, geoms) return (; geometry_type = "line", x = flat_xs, y = flat_ys, + lon = first.(centroids), + lat = last.(centroids), node_count = GI.npoint.(geoms) ) end @@ -65,6 +71,8 @@ function _cf_geometry_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait geometry_type = "line", x = flat_xs, y = flat_ys, + lon = first.(centroids), + lat = last.(centroids), part_node_count = collect(GO.flatten(GI.npoint, GI.LineStringTrait, geoms)), node_count = GI.npoint.(geoms) ) @@ -127,10 +135,13 @@ function _cf_geometry_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geo # The names in this named tuple are standard CF conventions. # node_coordinates_x and node_coordinates_y are the coordinates of the nodes of the rings. # but cf encodes them as a space separated string. That's the only difference. + centroids = GO.centroid.(geoms) return (; geometry_type = "polygon", x = xs, y = ys, + lon = first.(centroids), + lat = last.(centroids), node_count = node_count_vec, part_node_count = part_node_count_vec, interior_ring = interior_ring_vec diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index ad213c647..5062e77f1 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -41,6 +41,24 @@ const CDM_DIM_MAP = Dict( "band" => Band, ) +_cf_name(dim::Dimension) = lowercase(string(name(dim))) +_cf_name(dim::Ti) = "time" +_cf_name(dim::X) = "x" +_cf_name(dim::Y) = "y" +_cf_name(dim::Z) = "z" + +_cf_axis(dim::Dimension) = nothing +_cf_axis(dim::Ti) = "T" +_cf_axis(dim::X) = "X" +_cf_axis(dim::Y) = "Y" +_cf_axis(dim::Z) = "Z" + +_cf_shortname(dim::Dimension) = nothing +_cf_shortname(dim::Ti) = "time" +_cf_shortname(dim::X) = "lon" +_cf_shortname(dim::Y) = "lat" +_cf_shortname(dim::Z) = "height" + # `Source`` from variables and datasets sourcetrait(var::CDM.CFVariable) = sourcetrait(var.var) # Dataset constructor from `Source` @@ -254,14 +272,19 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) layer_dimnames_vec[i] = dimnames = CDM.dimnames(var) # Get its coordinates coord_names = _cdm_coordinates(attr) + # Remove coordinates from metadata + if haskey(attr, "coordinates") + delete!(attr, "coordinates") + end # Check for a geometry variable (CF 7.5) geometry_key = if haskey(attr, "geometry") - attr["geometry"] + pop!(attr, "geometry") end # Check for a grid mapping variable crs = if haskey(attr, "grid_mapping") - grid_mapping_key = attr["grid_mapping"] - get!(() -> _cf_crs(ds, grid_mapping_key), crs_dict, grid_mapping_key) + grid_mapping_key = pop!(attr, "grid_mapping") + crs = get!(() -> _cf_crs(ds, grid_mapping_key), crs_dict, grid_mapping_key) + crs end # Loop over the dimensions of this layer, adding missing dims to dim_dict output_layerdims_vec[i] = map(dimnames) do dimname @@ -299,7 +322,7 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_ dimname in CDM.dimnames(vars_dict[geometry_attrs["node_count"]]) else # Points - dimname in CDM.dimnames(vars_dict[geometry_attrs["node_coordinates"]]) + dimname in CDM.dimnames(vars_dict[first(_cf_node_coordinate_names(geometry_attrs))]) end geoms = get!(geometry_dict, geometry_key) do _cf_geometry_decode(ds, geometry_attrs) @@ -309,12 +332,7 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_ dimnames = CDM.dimnames(vars_dict[coordname]) length(dimnames) == 1 && only(dimnames) == dimname end - dims = map(dimension_coord_names) do d - dim_attrs = attrs_dict[d] - Dn = _cdm_dimtype(dim_attrs, d) - Dn() - end |> Tuple - lookup = GeometryLookup(geoms, dims; crs) + lookup = GeometryLookup(geoms, (X(), Y()); crs) D = Geometry return D(lookup) end @@ -566,7 +584,7 @@ _cdm_set_axis_attrib!(atr, dim::Z) = (atr["axis"] = "Z"; atr["standard_name"] = _cdm_set_axis_attrib!(atr, dim::Ti) = (atr["axis"] = "T"; atr["standard_name"] = "time") _cdm_set_axis_attrib!(atr, dim) = nothing -_cdm_shiftlocus(dim::Dimension) = _cdmshiftlocus(lookup(dim), dim) +_cdm_shiftlocus(dim::Dimension) = _cdm_shiftlocus(lookup(dim), dim) _cdm_shiftlocus(::Lookup, dim::Dimension) = dim function _cdm_shiftlocus(lookup::AbstractSampled, dim::Dimension) # TODO improve this @@ -674,14 +692,17 @@ function writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T,N attrib["_FillValue"] = missingval_pair[1] end - key = if isnokw(name) || string(name) == "" + varname = if isnokw(name) || string(name) == "" UNNAMED_FILE_KEY else string(name) end - dimnames = lowercase.(string.(map(Rasters.name, dims(A)))) - var = CDM.defVar(ds, key, eltype, dimnames; attrib, chunksizes, kw...) + if !isempty(dims(A, x -> lookup(x) isa GeometryLookup)) + attrib["geometry"] = "geometry_container" + end + dimnames = map(_cf_name, dims(A)) + var = CDM.defVar(ds, varname, eltype, dimnames; attrib, chunksizes, kw...) if write m = _maybe_modify(parent(var), mod) @@ -705,10 +726,11 @@ end _def_dim_var!(ds::AbstractDataset, A) = map(d -> _def_dim_var!(ds, d), dims(A)) function _def_dim_var!(ds::AbstractDataset, dim::Dimension) - dimname = lowercase(string(DD.name(dim))) + dimname = _cf_name(dim) haskey(ds.dim, dimname) && return nothing CDM.defDim(ds, dimname, length(dim)) _def_lookup_var!(ds, dim, dimname) + return nothing end _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:AbstractNoLookup}, dimname) = nothing function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:ArrayLookup}, dimname) @@ -716,18 +738,13 @@ function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:ArrayLookup}, di return nothing end function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:GeometryLookup}, dimname) -<<<<<<< HEAD l = lookup(dim) - geom = _geometry_cf_encode(parent(l)) -======= - geom = _geometry_cf_encode(lookup(dim)) ->>>>>>> d5ad3c0 (read geom working) + geom = _cf_geometry_encode(lookup(dim)) + @show dims(l) # Define base geometry attributes - coord_names_raw = map(dims(l)) do d - string(DD.name(d)) + _cf_name(d) end - coord_names = if any(n -> haskey(ds, n), coord_names_raw) (dimname,) .* coord_names_raw else @@ -741,33 +758,43 @@ function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:GeometryLookup}, if haskey(geom, :node_count) # More nodes than dimension length: define a node dimension node_dimname = dimname * "_node" - CDM.defVar(ds, "node_count", geom.node_count, dimname) + lon_varname = "lon" + lat_varname = "lat" + node_count_varname = dimname * "_node_count" CDM.defDim(ds, node_dimname, length(geom.x)) - attrib["node_count"] = dimname * "_node_count" + CDM.defVar(ds, node_count_varname, geom.node_count, (dimname,)) + CDM.defVar(ds, lon_varname, geom.lon, (dimname,); + attrib=["standard_name" => "longitude", "nodes" => coord_names[1]] + ) + CDM.defVar(ds, lat_varname, geom.lat, (dimname,); + attrib=["standard_name" => "latitude", "nodes" => coord_names[2]] + ) + attrib["node_count"] = node_count_varname + attrib["coordinates"] = "lat lon" else # Node count is dimension length - node_dimame = dimname + node_dimname = dimname end if haskey(geom, :part_node_count) # We also have geometry parts - part_dimname = dimname * "part" + part_dimname = dimname * "_part" part_varname = dimname * "_part_node_count" CDM.defDim(ds, part_dimname, length(geom.part_node_count)) - CDM.defVar(ds, part_varname, geom.part_node_count, part_dimname) + CDM.defVar(ds, part_varname, geom.part_node_count, (part_dimname,)) attrib["part_node_count"] = part_varname end if haskey(geom, :interior_ring) # And interior rings ring_varname = dimname * "_interior_ring" - CDM.defVar(ds, ring_varname, geom.interior_ring, part_dimname) + CDM.defVar(ds, ring_varname, geom.interior_ring, (part_dimname,)) attrib["interior_ring"] = ring_varname end # Define coordinate variables - CDM.defVar(ds, coord_names[1], geom.x, node_dimame) - CDM.defVar(ds, coord_names[2], geom.y, node_dimame) + CDM.defVar(ds, coord_names[1], geom.x, (node_dimname,)) + CDM.defVar(ds, coord_names[2], geom.y, (node_dimname,)) # TODO: add z and m, if present # And the geometry container - CDM.defVar(ds, "geometry_container", nothing, (), attrib) + CDM.defVar(ds, "geometry_container", fill(0), (); attrib) return nothing end function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:Union{Sampled,AbstractCategorical}}, dimname) From a798c72dd5a8870361424d689a71cb69cbbaa8e9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 4 May 2025 22:29:47 +0200 Subject: [PATCH 11/43] working Raster --- src/array.jl | 2 +- src/geometry_lookup/nc_io.jl | 158 +++++++++++--------------------- src/sources/commondatamodel.jl | 151 +++++++++++++++++++++++------- test/sources/commondatamodel.jl | 6 +- 4 files changed, 176 insertions(+), 141 deletions(-) diff --git a/src/array.jl b/src/array.jl index 7dbfa923a..d39cb0c5f 100644 --- a/src/array.jl +++ b/src/array.jl @@ -329,7 +329,7 @@ function Raster(filename::AbstractString; source=nokw, kw...) end::Raster end # Load a Raster from an opened Dataset -# We need the inner method for AbstractArray ambiguit +# We need the inner method for AbstractArray ambiguity Raster(ds; kw...) = _raster(ds; kw...) function _raster(ds; dims=nokw, diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl index beb392fc6..8440294ba 100644 --- a/src/geometry_lookup/nc_io.jl +++ b/src/geometry_lookup/nc_io.jl @@ -4,13 +4,15 @@ import NCDatasets.NetCDF_jll using Test -import Rasters +using Rasters import GeoInterface as GI - ds = NCDataset("C:\\Users\\rafael.schouten\\Downloads\\i.nc") -var = ds["someData"] -keys(ds) +st = RasterStack("C:\\Users\\rafael.schouten\\Downloads\\i.nc") +write("geomtest3.nc", st; force=true) +ds = NCDataset("geomtest3.nc") +RasterStack("geomtest3.nc") +Raster("geomtest3.nc") v = CDM.variable(ds, :lon) CDM.dimnames(ds) CDM.dimnames(v) @@ -49,7 +51,55 @@ has_geometry = haskey(CDM.attribs(var), "geometry") if !has_geometry throw(ArgumentError("Variable $u_dim_name does not have a geometry attribute")) end +=# + +# generate dataset +output_path = tempname() * ".nc" +testfile = "multipolygons.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) + +ds = NCDataset(output_path) +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] +geometry_container_attribs = CDM.attribs(geometry_container_var) + +haskey(geometry_container_attribs, "geometry_type") && +geometry_container_attribs["geometry_type"] == "polygon" || +throw(ArgumentError("We only support polygon geometry types at this time, got $geometry_type")) + +geoms = Rasters._geometry_cf_decode(GI.PolygonTrait(), ds, geometry_container_attribs) + +encoded = Rasters._geometry_cf_encode(GI.PolygonTrait(), geoms) +node_count = collect(nggode_count_var) +node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) +part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) +interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.node_count == ds["node_count"] +@test encoded.part_node_count == ds["part_node_count"] +@test encoded.interior_ring == ds["interior_ring"] + +# lines now + +output_path = tempname() * ".nc" +testfile = "lines.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] +geometry_container_attribs = CDM.attribs(geometry_container_var) +geoms = Rasters._geometry_cf_decode(GI.LineStringTrait(), ds, geometry_container_attribs) + +encoded = Rasters._geometry_cf_encode(GI.LineStringTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.node_count == ds["node_count"] +@test !hasproperty(encoded, :part_node_count) output_path = tempname() * ".nc" @@ -105,102 +155,4 @@ encoded = Rasters._geometry_cf_encode(GI.PointTrait(), geoms) @test encoded.node_coordinates_x == ds["x"] @test encoded.node_coordinates_y == ds["y"] -@test !hasproperty(encoded, :node_count) - - - - -function _compile_ncgen(f, testfilename; rasterspath = Base.pathof("Rasters")) - output_path = tempname() * ".nc" - run(`$(NCDatasets.NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(rasterspath)), "test", "data", testfilename))`) - ds = NCDatasets.NCDataset(output_path) - f(ds) - close(ds) - rm(output_path) - return -end - -function _compile_ncgen(testfilename; rasterspath = Base.pathof("Rasters")) - output_path = tempname() * ".nc" - run(`$(NCDatasets.NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(rasterspath)), "test", "data", testfilename))`) - ds = NCDatasets.NCDataset(output_path) - return ds -end - -import NCDatasets -import NCDatasets.CommonDataModel as CDM -import GeoInterface as GI, GeometryOps as GO -using GeoFormatTypes - -rasterspath = joinpath(dirname(dirname(dirname(@__FILE__))), "src", "Rasters.jl") - -_compile_ncgen("multipolygons.ncgen"; rasterspath) do ds - geoms = _read_geometry(ds, "someData") - - @test length(geoms) == 2 - @test GI.nring.(geoms) == [3, 1] - - encoded = _geometry_cf_encode(GI.PolygonTrait(), geoms) - - @test encoded.node_coordinates_x == ds["x"] - @test encoded.node_coordinates_y == ds["y"] - @test encoded.node_count == ds["node_count"] - @test encoded.part_node_count == ds["part_node_count"] - @test encoded.interior_ring == ds["interior_ring"] -end - -_compile_ncgen("lines.ncgen"; rasterspath) do ds - geoms = _read_geometry(ds, "someData") - - @test all(g -> GI.trait(g) isa GI.LineStringTrait, geoms) - @test length(geoms) == 2 - - encoded = _geometry_cf_encode(GI.LineStringTrait(), geoms) - - @test encoded.node_coordinates_x == ds["x"] - @test encoded.node_coordinates_y == ds["y"] - @test encoded.node_count == ds["node_count"] - @test !hasproperty(encoded, :part_node_count) -end - -_compile_ncgen("points.ncgen"; rasterspath) do ds - geoms = _read_geometry(ds, "someData") - - @test length(geoms) == 2 - - encoded = _geometry_cf_encode(GI.PointTrait(), geoms) - - @test encoded.node_coordinates_x == ds["x"] - @test encoded.node_coordinates_y == ds["y"] - @test !hasproperty(encoded, :node_count) -end - - -_compile_ncgen("multilines.ncgen"; rasterspath) do ds - geoms = _read_geometry(ds, "someData") - - @test all(g -> GI.trait(g) isa GI.MultiLineStringTrait, geoms) - @test length(geoms) == 2 - - encoded = _geometry_cf_encode(GI.MultiLineStringTrait(), geoms) - - @test encoded.node_coordinates_x == ds["x"] - @test encoded.node_coordinates_y == ds["y"] - @test encoded.node_count == ds["node_count"] - @test encoded.part_node_count == ds["part_node_count"] - @test length(encoded) == 4 -end - - -_compile_ncgen("multipoints.ncgen"; rasterspath) do ds - geoms = _read_geometry(ds, "someData") - - @test all(g -> GI.trait(g) isa GI.MultiPointTrait, geoms) - @test length(geoms) == 2 - - encoded = _geometry_cf_encode(GI.MultiPointTrait(), geoms) - - @test encoded.node_coordinates_x == ds["x"] - @test encoded.node_coordinates_y == ds["y"] - @test encoded.node_count == ds["node_count"] -end +@test !hasproperty(encoded, :node_count) \ No newline at end of file diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 5062e77f1..819c5e27a 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -183,27 +183,28 @@ function _layer_names(ds, var_names, vars_dict, attrs_dict) coordinate_names = String[] units_metadata_names = String[] node_coordinate_names = String[] + quantization_names = String[] + climatology_names = String[] # Sometimes coordinates are not included in var attributes # So we check standard names to know vars are lookups and not data standard_names = String[] for n in var_names - var = get!(() -> CDM.variable(ds, n), vars_dict, n) - attr = get!(() -> CDM.attribs(vars_dict[n]), attrs_dict, n) + var = _get_var!(ds, vars_dict, n) + attr = _get_attr!(ds, vars_dict, attrs_dict, n) # Coordinate variables haskey(attr, "coordinates") && setdiff(coordinate_names, _cdm_coordinates(attr)) haskey(attr, "node_coordinates") && setdiff(node_coordinate_names, _cdm_node_coordinates(attr)) # Bounds variables haskey(attr, "bounds") && push!(bounds_names, attr["bounds"]) - # Geometry variables + # Quantization variables (8.4.2) + haskey(attr, "quantization") && push!(quantization_names, attr["quantization"]) + # Climatology variables (7.4) + haskey(attr, "climatology") && push!(climatology_names, attr["climatology"]) + # Geometry variables (7.5) if haskey(attr, "geometry") geometry_container_name = attr["geometry"] push!(geometry_names, geometry_container_name) - geometry_attr = get!(attrs_dict, geometry_container_name) do - var = get!(vars_dict, geometry_container_name) do - CDM.variable(ds, geometry_container_name) - end - CDM.attribs(var) - end + geometry_attr = _get_attr!(ds, vars_dict, attrs_dict, geometry_container_name) haskey(geometry_attr, "node_coordinates") && push!(geometry_names, _cf_node_coordinate_names(geometry_attr)...) haskey(geometry_attr, "node_count") && push!(geometry_names, geometry_attr["node_count"]) haskey(geometry_attr, "part_node_count") && push!(geometry_names, geometry_attr["part_node_count"]) @@ -218,7 +219,18 @@ function _layer_names(ds, var_names, vars_dict, attrs_dict) push!(standard_names, n) end dim_names = CDM.dimnames(ds) - return setdiff(var_names, dim_names, bounds_names, geometry_names, grid_mapping_names, coordinate_names, units_metadata_names, node_coordinate_names, standard_names) + return setdiff(var_names, + dim_names, + bounds_names, + quantization_names, + climatology_names, + geometry_names, + grid_mapping_names, + coordinate_names, + units_metadata_names, + node_coordinate_names, + standard_names + ) end function _cdm_coordinates(attr) @@ -283,8 +295,7 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) # Check for a grid mapping variable crs = if haskey(attr, "grid_mapping") grid_mapping_key = pop!(attr, "grid_mapping") - crs = get!(() -> _cf_crs(ds, grid_mapping_key), crs_dict, grid_mapping_key) - crs + get!(() -> _cf_crs(ds, grid_mapping_key), crs_dict, grid_mapping_key) end # Loop over the dimensions of this layer, adding missing dims to dim_dict output_layerdims_vec[i] = map(dimnames) do dimname @@ -313,16 +324,14 @@ _organise_dataset(ds::AbstractDataset, names, group) = function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_names, dimname, crs) if !isnothing(geometry_key) - geometry_attrs = get!(attrs_dict, geometry_key) do - CDM.attribs(CDM.variable(ds, geometry_key)) - end + geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) # Check that the geometry is for this dimension is_geometry_dimension = if haskey(geometry_attrs, "node_count") # Multi-part geometries - dimname in CDM.dimnames(vars_dict[geometry_attrs["node_count"]]) + dimname in CDM.dimnames(_get_var!(ds, vars_dict, geometry_attrs["node_count"])) else # Points - dimname in CDM.dimnames(vars_dict[first(_cf_node_coordinate_names(geometry_attrs))]) + dimname in CDM.dimnames(_get_var!(ds, vars_dict, first(_cf_node_coordinate_names(geometry_attrs)))) end geoms = get!(geometry_dict, geometry_key) do _cf_geometry_decode(ds, geometry_attrs) @@ -339,29 +348,29 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_ end if haskey(ds, dimname) # The dimension has a matching variable - dim_attrs = attrs_dict[dimname] + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) D = _cdm_dimtype(dim_attrs, dimname) - lookup = _cdm_lookup(ds, dim_attrs, dimname, D) + lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) D(lookup) else # No matching variable was found, so we have something more complicated # Get all the coordinates for this dimension dimension_coord_names = filter(coord_names) do coordname - dimnames = CDM.dimnames(vars_dict[coordname]) + dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) length(dimnames) == 1 && only(dimnames) == dimname end if length(dimension_coord_names) == 1 # If there is only one coordinate, just use it as the dimension # This should rarely happen linked_dimname = dimension_coord_names[1] - dim_attrs = attrs_dict[linked_dimname] + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, linked_dimname) D = _cdm_dimtype(dim_attrs, linked_dimname) lookup = _cdm_lookup(ds, dim_attrs, linked_dimname, D, crs) D(lookup) elseif length(dimension_coord_names) > 1 # Generate a MergedLookup for multi-coordinate dimensions dims = map(dimension_coord_names) do d - dim_attrs = attrs_dict[d] + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, d) D = _cdm_dimtype(dim_attrs, d) lookup = _cdm_lookup(ds, dim_attrs, d, D, crs) D(lookup) @@ -383,7 +392,8 @@ end # TODO don't load all keys here with _layers _name_or_firstname(ds::AbstractDataset, name) = Symbol(name) -function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=_layer_names(ds)) +function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}) + names = _layer_names(ds) if length(names) > 0 return Symbol(first(names)) else @@ -417,14 +427,14 @@ end # _cdm_lookup # Generate a `Lookup` from a CDM dim. -function _cdm_lookup(ds::AbstractDataset, attr, dimname, D::Type) +function _cdm_lookup(ds::AbstractDataset, attr, dimname, D::Type, crs) var = ds[dimname] data = Missings.disallowmissing(collect(var)) metadata = _metadatadict(sourcetrait(ds), attr) - return _cdm_lookup(data, ds, var, attr, dimname, D, metadata) + return _cdm_lookup(data, ds, var, attr, dimname, D, metadata, crs) end # For unknown types we just make a Categorical lookup -function _cdm_lookup(data::AbstractArray, ds::AbstractDataset, var, attr, dimname, D::Type, metadata) +function _cdm_lookup(data::AbstractArray, ds::AbstractDataset, var, attr, dimname, D::Type, metadata, crs) Categorical(data; order=Unordered(), metadata=metadata) end # For Number and AbstractTime we generate order/span/sampling @@ -432,7 +442,7 @@ end # on coordinate variables in a file and propagates here. function _cdm_lookup( data::AbstractArray{<:Union{Missing,Number,Dates.AbstractTime}}, - ds::AbstractDataset, var, attr, dimname, D::Type, metadata, + ds::AbstractDataset, var, attr, dimname, D::Type, metadata, crs ) # Assume the locus is at the center of the cell if boundaries aren't provided. # http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries @@ -464,31 +474,31 @@ function _cdm_lookup( Explicit(boundsmatrix), Intervals(locus) end end - return _cdm_lookup(data, D, order, span, sampling, metadata) + return _cdm_lookup(data, D, order, span, sampling, metadata, crs) end # For X and Y use a Mapped <: AbstractSampled lookup function _cdm_lookup( - data, D::Type{<:Union{<:XDim,<:YDim}}, order::Order, span, sampling, metadata + data, D::Type{<:Union{<:XDim,<:YDim}}, order::Order, span, sampling, metadata, crs ) # If units are degrees north/east, use EPSG:4326 as the mapped crs units = get(metadata, "units", "") - mappedcrs = if isnokw(mappedcrs) && (units in CF_DEGREES_NORTH || units in CF_DEGREES_EAST) + mappedcrs = if (units in CF_DEGREES_NORTH || units in CF_DEGREES_EAST) EPSG(4326) end # Additionally, crs and mappedcrs should be identical for Regular lookups - crs = if span isa Regular - mappedcrs + if isnokwornothing(crs) && span isa Regular + crs = mappedcrs end dim = DD.basetypeof(D)() return Mapped(data; order, span, sampling, metadata, crs, mappedcrs, dim) end # Band dims have a Categorical lookup, with order # This is not CF, just for consistency with GDAL -function _cdm_lookup(data, D::Type{<:Band}, order::Order, span, sampling, metadata) +function _cdm_lookup(data, D::Type{<:Band}, order::Order, span, sampling, metadata, crs) Categorical(data, order, metadata) end # Otherwise use a Sampled lookup -function _cdm_lookup(data, D::Type, order::Order, span, sampling, metadata) +function _cdm_lookup(data, D::Type, order::Order, span, sampling, metadata, crs) Sampled(data, order, span, sampling, metadata) end @@ -830,4 +840,77 @@ function _cf_crs(ds, grid_mapping_key::String) else nothing # unparseable - TODO get CFProjections.jl to do it. end +end + +_get_var!(ds, dict, key) = get!(() -> CDM.variable(ds, key), dict, key) +_get_attr!(ds, var_dict, attr_dict, key) = + get!(attr_dict, key) do + var = _get_var!(ds, var_dict, key) + CDM.attribs(var) + end + +# TODO just the core of this function here +function _raster(ds::CDM.AbstractDataset; + dims=nokw, + refdims=(), + name=nokw, + group=nokw, + filename=filename(ds), + metadata=nokw, + missingval=nokw, + crs=nokw, + mappedcrs=nokw, + source=sourcetrait(ds), + replace_missing=nokw, + coerce=convert, + scaled::Union{Bool,NoKW}=nokw, + verbose::Bool=true, + write::Bool=false, + lazy::Bool=false, + dropband::Bool=true, + checkmem::Bool=CHECKMEM[], + raw::Bool=false, + mod=nokw, + f=identity, +)::Raster + _maybe_warn_replace_missing(replace_missing) + # `raw` option will ignore `scaled` and `missingval` + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) + # TODO use a clearer name than filekey + name1 = filekey(ds, name) + (; names_vec, layers_vec, layerdims_vec, layermetadata_vec, dim_dict) = + _organise_dataset(ds, [string(name1)], group) + var = layers_vec[1] + # Detect the source from filename + # Open the dataset and variable specified by `name`, at `group` level if provided + # At this level we do not apply `mod`. + metadata_out = isnokw(metadata) ? Dict(layermetadata_vec[1]) : metadata + missingval_out = _read_missingval_pair(var, metadata_out, missingval) + pop!(metadata_out, "_FillValue", nothing) + pop!(metadata_out, "missing_value", nothing) + # Generate mod for scaling + mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval_out; scaled, coerce) : mod + # Define or load the data array + data_out = if lazy + # Define a lay FileArray + FileArray{typeof(source)}(var, filename; + name=name1, group, mod, write + ) + else + modvar = _maybe_modify(var, mod) + # Check the data will fit in memory + checkmem && _checkobjmem(modvar) + # Move the modified array to memory + Array(modvar) + end + # Generate dims + dims_out = isnokw(dims) ? DD.dims(Tuple(values(dim_dict)), layerdims_vec[1]) : format(dims, data_out) + # Return the data to the parent function + mv_outer = _outer_missingval(mod) + # Use name or an empty Symbol + name_out = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) + # Define the raster + raster = Raster(data_out, dims_out, refdims, name_out, metadata_out, missingval_out) + # Maybe drop a single band dimension + return _maybe_drop_single_band(raster, dropband, lazy) end \ No newline at end of file diff --git a/test/sources/commondatamodel.jl b/test/sources/commondatamodel.jl index 0cda80779..c48bebecd 100644 --- a/test/sources/commondatamodel.jl +++ b/test/sources/commondatamodel.jl @@ -3,14 +3,14 @@ import Rasters: ForwardOrdered, ReverseOrdered, Regular @testset "step" begin # test if regular indices are correctly rounded f32_indices = range(0.075f0, 10.075f0; step = 0.05f0) |> collect - @test Rasters._cdmspan(f32_indices, ForwardOrdered())[1] === Regular(0.05) + @test Rasters._cdm_span(f32_indices, ForwardOrdered())[1] === Regular(0.05) f32_indices_rev = range(10.075f0, 0.075f0; step = -0.05f0) |> collect - @test Rasters._cdmspan(f32_indices_rev, ReverseOrdered())[1] === Regular(-0.05) + @test Rasters._cdm_span(f32_indices_rev, ReverseOrdered())[1] === Regular(-0.05) # test if regular indices are not rounded when they should not indices_one_third = range(0, 10; length = 31) |> collect - @test Rasters._cdmspan(indices_one_third, ForwardOrdered())[1] === Regular(1/3) + @test Rasters._cdm_span(indices_one_third, ForwardOrdered())[1] === Regular(1/3) # test when reading a file ras = Raster(rand(X(f32_indices), Y(indices_one_third))) From ed7ca3b8c91d614cad33e932e655d8f5e03b8a17 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 5 May 2025 01:46:04 +0200 Subject: [PATCH 12/43] add cf test files --- src/sources/commondatamodel.jl | 153 +++++++++++------- src/stack.jl | 8 +- .../2_1_string_variable_representation.ncgen | 10 ++ test/cf/3_1_units_metadata.ncgen | 15 ++ test/cf/5_10_british_national_grid.ncgen | 51 ++++++ test/cf/5_11_wgs84_crs_wkt.ncgen | 18 +++ .../cf/5_14_scalar_coordinate_variables.ncgen | 32 ++++ ...ith_independent_coordinate_variables.ncgen | 24 +++ ..._pole_and_scalar_coordinate_variable.ncgen | 36 +++++ ...5_1_independent_coordinate_variables.ncgen | 23 +++ ...two_dimensional_coordinate_variables.ncgen | 28 ++++ test/cf/5_3_reduced_horizontal_grid.ncgen | 19 +++ test/cf/5_6_rotated_pole_grid.ncgen | 33 ++++ test/cf/5_8_spherical_earth.ncgen | 16 ++ test/cf/5_9_wgs84.ncgen | 17 ++ test/cf/landsea.ncgen | 17 ++ test/sources/cf.jl | 28 ++++ 17 files changed, 471 insertions(+), 57 deletions(-) create mode 100644 test/cf/2_1_string_variable_representation.ncgen create mode 100644 test/cf/3_1_units_metadata.ncgen create mode 100644 test/cf/5_10_british_national_grid.ncgen create mode 100644 test/cf/5_11_wgs84_crs_wkt.ncgen create mode 100644 test/cf/5_14_scalar_coordinate_variables.ncgen create mode 100644 test/cf/5_15_domain_with_independent_coordinate_variables.ncgen create mode 100644 test/cf/5_16_domain_with_rotaed_pole_and_scalar_coordinate_variable.ncgen create mode 100644 test/cf/5_1_independent_coordinate_variables.ncgen create mode 100644 test/cf/5_2_two_dimensional_coordinate_variables.ncgen create mode 100644 test/cf/5_3_reduced_horizontal_grid.ncgen create mode 100644 test/cf/5_6_rotated_pole_grid.ncgen create mode 100644 test/cf/5_8_spherical_earth.ncgen create mode 100644 test/cf/5_9_wgs84.ncgen create mode 100644 test/cf/landsea.ncgen create mode 100644 test/sources/cf.jl diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 819c5e27a..e155ee372 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -192,8 +192,8 @@ function _layer_names(ds, var_names, vars_dict, attrs_dict) var = _get_var!(ds, vars_dict, n) attr = _get_attr!(ds, vars_dict, attrs_dict, n) # Coordinate variables - haskey(attr, "coordinates") && setdiff(coordinate_names, _cdm_coordinates(attr)) - haskey(attr, "node_coordinates") && setdiff(node_coordinate_names, _cdm_node_coordinates(attr)) + haskey(attr, "coordinates") && append!(coordinate_names, collect(_cdm_coordinates(attr))) + haskey(attr, "node_coordinates") && append!(node_coordinate_names, collect(_cdm_node_coordinates(attr))) # Bounds variables haskey(attr, "bounds") && push!(bounds_names, attr["bounds"]) # Quantization variables (8.4.2) @@ -210,8 +210,8 @@ function _layer_names(ds, var_names, vars_dict, attrs_dict) haskey(geometry_attr, "part_node_count") && push!(geometry_names, geometry_attr["part_node_count"]) haskey(geometry_attr, "interior_ring") && push!(geometry_names, geometry_attr["interior_ring"]) end - # Grid mapping variables - haskey(attr, "grid_mapping") && push!(grid_mapping_names, attr["grid_mapping"]) + # Grid mapping variables - there may be multiple + haskey(attr, "grid_mapping") && append!(grid_mapping_names, _grid_mapping_keys(attr["grid_mapping"])) # Units metadata variables haskey(attr, "units_metadata") && push!(units_metadata_names, attr["units_metadata"]) # Common Standard names - very likely coordinates @@ -310,7 +310,6 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) output_layers_vec[i] = var output_attrs_vec[i] = attr end - # TODO output return (; names_vec=layer_names, layers_vec=output_layers_vec, @@ -323,51 +322,76 @@ _organise_dataset(ds::AbstractDataset, names, group) = _organise_dataset(ds.group[group], names, nokw) function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_names, dimname, crs) - if !isnothing(geometry_key) - geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) - # Check that the geometry is for this dimension - is_geometry_dimension = if haskey(geometry_attrs, "node_count") - # Multi-part geometries - dimname in CDM.dimnames(_get_var!(ds, vars_dict, geometry_attrs["node_count"])) + # if !isnothing(geometry_key) + # geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) + # # Check that the geometry is for this dimension + # is_geometry_dimension = if haskey(geometry_attrs, "node_count") + # # Multi-part geometries + # dimname in CDM.dimnames(_get_var!(ds, vars_dict, geometry_attrs["node_count"])) + # else + # # Points + # dimname in CDM.dimnames(_get_var!(ds, vars_dict, first(_cf_node_coordinate_names(geometry_attrs)))) + # end + # geoms = get!(geometry_dict, geometry_key) do + # _cf_geometry_decode(ds, geometry_attrs) + # end + # if is_geometry_dimension + # dimension_coord_names = filter(coord_names) do coordname + # dimnames = CDM.dimnames(vars_dict[coordname]) + # length(dimnames) == 1 && only(dimnames) == dimname + # end + # lookup = GeometryLookup(geoms, (X(), Y()); crs) + # D = Geometry + # return D(lookup) + # end + # end + if haskey(ds, dimname) && (isempty(coord_names) || dimname in coord_names) + # The dimension has a matching variable, and it's one of the coordinates + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) + D = _cdm_dimtype(dim_attrs, dimname) + lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) + return D(lookup) + end + # No matching variable was found, so we have something more complicated + # Get all the coordinates for this dimension + dimension_coord_names = filter(coord_names) do coordname + coord_dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) + @show coordname coord_dimnames + dimname in coord_dimnames + end + @show dimension_coord_names + if length(dimension_coord_names) == 1 + # If there is only one coordinate, just use it as the dimension + # This should rarely happen + linked_dimname = dimension_coord_names[1] + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, linked_dimname) + dim_var = _get_var!(ds, vars_dict, linked_dimname) + D = _cdm_dimtype(dim_attrs, linked_dimname) + if ndims(dim_var) > 1 && eltype(dim_var) <: Char + chars = collect(dim_var) + # Join char dimension as String, stripping null terminators + strings = strip.(join.(eachslice(chars; dims=2)), '\0') + lookup = Categorical(strings; order=Unordered()) + return D(lookup) else - # Points - dimname in CDM.dimnames(_get_var!(ds, vars_dict, first(_cf_node_coordinate_names(geometry_attrs)))) - end - geoms = get!(geometry_dict, geometry_key) do - _cf_geometry_decode(ds, geometry_attrs) - end - if is_geometry_dimension - dimension_coord_names = filter(coord_names) do coordname - dimnames = CDM.dimnames(vars_dict[coordname]) - length(dimnames) == 1 && only(dimnames) == dimname - end - lookup = GeometryLookup(geoms, (X(), Y()); crs) - D = Geometry + lookup = _cdm_lookup(ds, dim_attrs, linked_dimname, D, crs) return D(lookup) end end - if haskey(ds, dimname) - # The dimension has a matching variable - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) - D = _cdm_dimtype(dim_attrs, dimname) - lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) - D(lookup) - else - # No matching variable was found, so we have something more complicated - # Get all the coordinates for this dimension - dimension_coord_names = filter(coord_names) do coordname - dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) - length(dimnames) == 1 && only(dimnames) == dimname + if length(dimension_coord_names) > 1 + dim_vars = map(dimension_coord_names) do d + _get_var!(ds, vars_dict, d) end - if length(dimension_coord_names) == 1 - # If there is only one coordinate, just use it as the dimension - # This should rarely happen - linked_dimname = dimension_coord_names[1] - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, linked_dimname) - D = _cdm_dimtype(dim_attrs, linked_dimname) - lookup = _cdm_lookup(ds, dim_attrs, linked_dimname, D, crs) - D(lookup) - elseif length(dimension_coord_names) > 1 + is_multidimensional = any(v -> length(CDM.dimnames(v)) > 1, dim_vars) + @show is_multidimensional + if is_multidimensional + # Rotations + # For now we don't use ArrayLookup + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) + D = _cdm_dimtype(dim_attrs, dimname) + lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) + return D(lookup) + else # Generate a MergedLookup for multi-coordinate dimensions dims = map(dimension_coord_names) do d dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, d) @@ -376,18 +400,17 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_ D(lookup) end D = _cdm_dimtype(NoMetadata(), dimname) - merged_data = collect(zip(lookup(dims)...)) + merged_data = collect(zip(DD.lookup.(dims)...)) # Use a MergedLookup with all dimensions - D(MergedLookup(merged_data, dims)) - else - # A matching variable doesn't exist. - # Its a "discrete axis" (4.5) so NoLookup - len = CDM.dim(ds, dimname) - lookup = NoLookup(Base.OneTo(len)) - D = _cdm_dimtype(NoMetadata(), dimname) - D(lookup) + return D(MergedLookup(merged_data, dims)) end end + # A matching variable doesn't exist. + # Its a "discrete axis" (4.5) so NoLookup + len = CDM.dim(ds, dimname) + lookup = NoLookup(Base.OneTo(len)) + D = _cdm_dimtype(NoMetadata(), dimname) + D(lookup) end # TODO don't load all keys here with _layers @@ -750,7 +773,6 @@ end function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:GeometryLookup}, dimname) l = lookup(dim) geom = _cf_geometry_encode(lookup(dim)) - @show dims(l) # Define base geometry attributes coord_names_raw = map(dims(l)) do d _cf_name(d) @@ -829,6 +851,11 @@ end _cf_crs(ds, grid_mapping_key::Nothing) = nothing function _cf_crs(ds, grid_mapping_key::String) + # TODO handle multi-crs + if occursin(' ', grid_mapping_key) + _split_grid_mapping_key(grid_mapping_key)[1][1] + end + haskey(ds, grid_mapping_key) || return nothing grid_mapping_var = CDM.variable(ds, grid_mapping_key) grid_mapping_attrib = CDM.attribs(grid_mapping_var) if haskey(grid_mapping_attrib, "crs_wkt") @@ -842,6 +869,22 @@ function _cf_crs(ds, grid_mapping_key::String) end end +function _split_grid_mapping_key(grid_mapping_key::String) + items = split(grid_mapping_key, ' ') + starts = findall(contains(':'), items) + stops = append!(starts[2:end] .- 1, length(items)) + map(starts, stops) do s, p + items[s][1:end-1] => items[s+1:p] + end +end +function _grid_mapping_keys(grid_mapping_key::String) + if occursin(' ', grid_mapping_key) + first.(_split_grid_mapping_key(grid_mapping_key)) + else + [grid_mapping_key] + end +end + _get_var!(ds, dict, key) = get!(() -> CDM.variable(ds, key), dict, key) _get_attr!(ds, var_dict, attr_dict, key) = get!(attr_dict, key) do diff --git a/src/stack.jl b/src/stack.jl index b3066350b..85933f4e1 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -492,7 +492,7 @@ function RasterStack(ds; eltype_vec = map(eltype, layers_vec) mod_vec = _stack_mods(eltype_vec, layermetadata_vec, missingval_vec; scaled, coerce) data = if lazy - vars = ntuple(i -> layers.vars[i], length(name)) + vars = ntuple(i -> layers_vec[i], length(name)) mods = ntuple(i -> mod_vec[i], length(name)) FileStack{typeof(source)}(ds, filename; name, group, mods, vars) else @@ -614,7 +614,11 @@ function _sort_by_layerdims(dims, layerdims::Vector) currentorder = _merge_dimorder(ldims, currentorder) end end - return DD.dims(Tuple(dims), currentorder) + if isnothing(currentorder) + return () + else + return DD.dims(Tuple(dims), currentorder) + end end _merge_dimorder(neworder, ::Nothing) = neworder diff --git a/test/cf/2_1_string_variable_representation.ncgen b/test/cf/2_1_string_variable_representation.ncgen new file mode 100644 index 000000000..5866f536f --- /dev/null +++ b/test/cf/2_1_string_variable_representation.ncgen @@ -0,0 +1,10 @@ +netcdf string_variable_representation { +dimensions: + strings = 30 ; + strlen = 10 ; +variables: + char char_variable(strings,strlen) ; + char_variable:long_name = "strings of type char" ; + string str_variable(strings) ; + str_variable:long_name = "strings of type string" ; +} \ No newline at end of file diff --git a/test/cf/3_1_units_metadata.ncgen b/test/cf/3_1_units_metadata.ncgen new file mode 100644 index 000000000..74e1bca5e --- /dev/null +++ b/test/cf/3_1_units_metadata.ncgen @@ -0,0 +1,15 @@ +netcdf units_metadata { +variables: + float Tonscale; + Tonscale:long_name="global-mean surface temperature"; + Tonscale:standard_name="surface_temperature"; + Tonscale:units="degC"; + Tonscale:units_metadata="temperature: on_scale"; + Tonscale:cell_methods="area: mean"; + float Tdifference; + Tdifference:long_name="change in global-mean surface temperature relative to pre-industrial"; + Tdifference:standard_name="surface_temperature"; + Tdifference:units="degC"; + Tdifference:units_metadata="temperature: difference"; + Tdifference:cell_methods="area: mean"; +} \ No newline at end of file diff --git a/test/cf/5_10_british_national_grid.ncgen b/test/cf/5_10_british_national_grid.ncgen new file mode 100644 index 000000000..682ce7d0a --- /dev/null +++ b/test/cf/5_10_british_national_grid.ncgen @@ -0,0 +1,51 @@ +netcdf british_national_grid { +dimensions: + z = 100; + y = 100000 ; + x = 100000 ; + variables: + double x(x) ; + x:standard_name = "projection_x_coordinate" ; + x:long_name = "Easting" ; + x:units = "m" ; + double y(y) ; + y:standard_name = "projection_y_coordinate" ; + y:long_name = "Northing" ; + y:units = "m" ; + double z(z) ; + z:standard_name = "height_above_reference_ellipsoid" ; + z:long_name = "height_above_osgb_newlyn_datum_masl" ; + z:units = "m" ; + double lat(y, x) ; + lat:standard_name = "latitude" ; + lat:units = "degrees_north" ; + double lon(y, x) ; + lon:standard_name = "longitude" ; + lon:units = "degrees_east" ; + float temp(z, y, x) ; + temp:standard_name = "air_temperature" ; + temp:units = "K" ; + temp:coordinates = "lat lon" ; + temp:grid_mapping = "crsOSGB: x y crsWGS84: lat lon" ; + float pres(z, y, x) ; + pres:standard_name = "air_pressure" ; + pres:units = "Pa" ; + pres:coordinates = "lat lon" ; + pres:grid_mapping = "crsOSGB: x y crsWGS84: lat lon" ; + int crsOSGB ; + crsOSGB:grid_mapping_name = "transverse_mercator"; + crsOSGB:semi_major_axis = 6377563.396 ; + crsOSGB:inverse_flattening = 299.3249646 ; + crsOSGB:longitude_of_prime_meridian = 0.0 ; + crsOSGB:latitude_of_projection_origin = 49.0 ; + crsOSGB:longitude_of_central_meridian = -2.0 ; + crsOSGB:scale_factor_at_central_meridian = 0.9996012717 ; + crsOSGB:false_easting = 400000.0 ; + crsOSGB:false_northing = -100000.0 ; + crsOSGB:unit = "metre" ; + int crsWGS84 ; + crsWGS84:grid_mapping_name = "latitude_longitude"; + crsWGS84:longitude_of_prime_meridian = 0.0 ; + crsWGS84:semi_major_axis = 6378137.0 ; + crsWGS84:inverse_flattening = 298.257223563 ; +} \ No newline at end of file diff --git a/test/cf/5_11_wgs84_crs_wkt.ncgen b/test/cf/5_11_wgs84_crs_wkt.ncgen new file mode 100644 index 000000000..65650401c --- /dev/null +++ b/test/cf/5_11_wgs84_crs_wkt.ncgen @@ -0,0 +1,18 @@ +netcdf wgs84_datum_crs_wkt { +dimensions: + lat = 18 ; + lon = 36 ; +variables: + double lat(lat) ; + double lon(lon) ; + float temp(lat, lon) ; + temp:long_name = "temperature" ; + temp:units = "K" ; + temp:grid_mapping = "crs" ; + int crs ; + crs:grid_mapping_name = "latitude_longitude"; + crs:longitude_of_prime_meridian = 0.0 ; + crs:semi_major_axis = 6378137.0 ; + crs:inverse_flattening = 298.257223563 ; + crs:crs_wkt = "GEODCRS[\"WGS 84\", DATUM[\"World Geodetic System 1984\", ELLIPSOID[\"WGS 84\",6378137,298.257223563, LENGTHUNIT[\"metre\",1.0]]], PRIMEM[\"Greenwich\",0], CS[ellipsoidal,3], AXIS[\"(lat)\",north,ANGLEUNIT[\"degree\",0.0174532925199433]], AXIS[\"(lon)\",east,ANGLEUNIT[\"degree\",0.0174532925199433]], AXIS[\"ellipsoidal height (h)\",up,LENGTHUNIT[\"metre\",1.0]]]" ; +} \ No newline at end of file diff --git a/test/cf/5_14_scalar_coordinate_variables.ncgen b/test/cf/5_14_scalar_coordinate_variables.ncgen new file mode 100644 index 000000000..3b02db0ca --- /dev/null +++ b/test/cf/5_14_scalar_coordinate_variables.ncgen @@ -0,0 +1,32 @@ +netcdf scalar_coordinate_variables { +dimensions: + lat = 180 ; + lon = 360 ; + time = UNLIMITED ; +variables: + double atime ; + atime:standard_name = "forecast_reference_time" ; + atime:units = "hours since 1999-01-01 00:00" ; + double time(time) ; + time:standard_name = "time" ; + time:units = "hours since 1999-01-01 00:00" ; + double lon(lon) ; + lon:long_name = "station longitude"; + lon:units = "degrees_east"; + double lat(lat) ; + lat:long_name = "station latitude" ; + lat:units = "degrees_north" ; + double p500 ; + p500:long_name = "pressure" ; + p500:units = "hPa" ; + p500:positive = "down" ; + float height(time,lat,lon) ; + height:long_name = "geopotential height" ; + height:standard_name = "geopotential_height" ; + height:units = "m" ; + height:coordinates = "atime p500" ; +data: + time = 6., 12., 18., 24. ; + atime = 0. ; + p500 = 500. ; +} \ No newline at end of file diff --git a/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen b/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen new file mode 100644 index 000000000..b6992d004 --- /dev/null +++ b/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen @@ -0,0 +1,24 @@ +netcdf domain_with_independent_coordinate_variables { +dimensions: + lat = 18 ; + lon = 36 ; + pres = 15 ; + time = 4 ; + +variables: + char domain ; + domain:dimensions = "time pres lat lon" ; + domain:long_name = "Domain with independent coordinate variables" ; + float lon(lon) ; + lon:long_name = "longitude" ; + lon:units = "degrees_east" ; + float lat(lat) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; + float pres(pres) ; + pres:long_name = "pressure" ; + pres:units = "hPa" ; + double time(time) ; + time:long_name = "time" ; + time:units = "days since 1990-1-1 0:0:0" ; +} \ No newline at end of file diff --git a/test/cf/5_16_domain_with_rotaed_pole_and_scalar_coordinate_variable.ncgen b/test/cf/5_16_domain_with_rotaed_pole_and_scalar_coordinate_variable.ncgen new file mode 100644 index 000000000..749e5cb83 --- /dev/null +++ b/test/cf/5_16_domain_with_rotaed_pole_and_scalar_coordinate_variable.ncgen @@ -0,0 +1,36 @@ +netcdf domain_with_rotated_pole_and_scalar_coordinate_variable { +dimensions: + rlon = 128 ; + rlat = 64 ; + lev = 18 ; +variables: + char domain ; + domain:dimensions = "lev rlat rlon" ; + domain:coordinates = "lon lat time" ; + domain:grid_mapping = "rotated_pole" ; + domain:long_name = "Domain with grid mapping and scalar coordinate" ; + char rotated_pole ; + rotated_pole:grid_mapping_name = "rotated_latitude_longitude" ; + rotated_pole:grid_north_pole_latitude = 32.5 ; + rotated_pole:grid_north_pole_longitude = 170. ; + double time ; + time:standard_name = "time" ; + time:units = "days since 2000-12-01 00:00" ; + float rlon(rlon) ; + rlon:long_name = "longitude in rotated pole grid" ; + rlon:units = "degrees" ; + rlon:standard_name = "grid_longitude" ; + float rlat(rlat) ; + rlat:long_name = "latitude in rotated pole grid" ; + rlat:units = "degrees" ; + rlat:standard_name = "grid_latitude" ; + float lev(lev) ; + lev:long_name = "pressure level" ; + lev:units = "hPa" ; + float lon(rlat,rlon) ; + lon:long_name = "longitude" ; + lon:units = "degrees_east" ; + float lat(rlat,rlon) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; +} \ No newline at end of file diff --git a/test/cf/5_1_independent_coordinate_variables.ncgen b/test/cf/5_1_independent_coordinate_variables.ncgen new file mode 100644 index 000000000..fe4f9ce53 --- /dev/null +++ b/test/cf/5_1_independent_coordinate_variables.ncgen @@ -0,0 +1,23 @@ +netcdf independent_coordinate_variables { +dimensions: + lat = 18 ; + lon = 36 ; + pres = 15 ; + time = 4 ; +variables: + float xwind(time,pres,lat,lon) ; + xwind:long_name = "zonal wind" ; + xwind:units = "m/s" ; + float lon(lon) ; + lon:long_name = "longitude" ; + lon:units = "degrees_east" ; + float lat(lat) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; + float pres(pres) ; + pres:long_name = "pressure" ; + pres:units = "hPa" ; + double time(time) ; + time:long_name = "time" ; + time:units = "days since 1990-1-1 0:0:0" ; +} \ No newline at end of file diff --git a/test/cf/5_2_two_dimensional_coordinate_variables.ncgen b/test/cf/5_2_two_dimensional_coordinate_variables.ncgen new file mode 100644 index 000000000..4318f7097 --- /dev/null +++ b/test/cf/5_2_two_dimensional_coordinate_variables.ncgen @@ -0,0 +1,28 @@ +netcdf two_dimensional_coordinate_variables { +dimensions: + xc = 128 ; + yc = 64 ; + lev = 18 ; +variables: + float T(lev,yc,xc) ; + T:long_name = "temperature" ; + T:units = "K" ; + T:coordinates = "lon lat" ; + float xc(xc) ; + xc:axis = "X" ; + xc:long_name = "x-coordinate in Cartesian system" ; + xc:units = "m" ; + float yc(yc) ; + yc:axis = "Y" ; + yc:long_name = "y-coordinate in Cartesian system" ; + yc:units = "m" ; + float lev(lev) ; + lev:long_name = "pressure level" ; + lev:units = "hPa" ; + float lon(yc,xc) ; + lon:long_name = "longitude" ; + lon:units = "degrees_east" ; + float lat(yc,xc) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; +} \ No newline at end of file diff --git a/test/cf/5_3_reduced_horizontal_grid.ncgen b/test/cf/5_3_reduced_horizontal_grid.ncgen new file mode 100644 index 000000000..9a6d229bf --- /dev/null +++ b/test/cf/5_3_reduced_horizontal_grid.ncgen @@ -0,0 +1,19 @@ +netcdf reduced_horizontal_grid { +dimensions: + londim = 128 ; + latdim = 64 ; + rgrid = 6144 ; +variables: + float PS(rgrid) ; + PS:long_name = "surface pressure" ; + PS:units = "Pa" ; + PS:coordinates = "lon lat" ; + float lon(rgrid) ; + lon:long_name = "longitude" ; + lon:units = "degrees_east" ; + float lat(rgrid) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; + int rgrid(rgrid); + rgrid:compress = "latdim londim"; +} \ No newline at end of file diff --git a/test/cf/5_6_rotated_pole_grid.ncgen b/test/cf/5_6_rotated_pole_grid.ncgen new file mode 100644 index 000000000..00386ce03 --- /dev/null +++ b/test/cf/5_6_rotated_pole_grid.ncgen @@ -0,0 +1,33 @@ +netcdf rotated_pole_grid { +dimensions: + rlon = 128 ; + rlat = 64 ; + lev = 18 ; +variables: + float T(lev,rlat,rlon) ; + T:long_name = "temperature" ; + T:units = "K" ; + T:coordinates = "lon lat" ; + T:grid_mapping = "rotated_pole" ; + char rotated_pole ; + rotated_pole:grid_mapping_name = "rotated_latitude_longitude" ; + rotated_pole:grid_north_pole_latitude = 32.5 ; + rotated_pole:grid_north_pole_longitude = 170. ; + float rlon(rlon) ; + rlon:long_name = "longitude in rotated pole grid" ; + rlon:units = "degrees" ; + rlon:standard_name = "grid_longitude"; + float rlat(rlat) ; + rlat:long_name = "latitude in rotated pole grid" ; + rlat:units = "degrees" ; + rlat:standard_name = "grid_latitude"; + float lev(lev) ; + lev:long_name = "pressure level" ; + lev:units = "hPa" ; + float lon(rlat,rlon) ; + lon:long_name = "longitude" ; + lon:units = "degrees_east" ; + float lat(rlat,rlon) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; +} \ No newline at end of file diff --git a/test/cf/5_8_spherical_earth.ncgen b/test/cf/5_8_spherical_earth.ncgen new file mode 100644 index 000000000..f708c891f --- /dev/null +++ b/test/cf/5_8_spherical_earth.ncgen @@ -0,0 +1,16 @@ +netcdf spherical_earth { +dimensions: + lat = 18 ; + lon = 36 ; +variables: + double lat(lat) ; + double lon(lon) ; + float temp(lat, lon) ; + temp:long_name = "temperature" ; + temp:units = "K" ; + temp:grid_mapping = "crs" ; + int crs ; + crs:grid_mapping_name = "latitude_longitude" ; + crs:semi_major_axis = 6371000.0 ; + crs:inverse_flattening = 0 ; +} \ No newline at end of file diff --git a/test/cf/5_9_wgs84.ncgen b/test/cf/5_9_wgs84.ncgen new file mode 100644 index 000000000..fad666aea --- /dev/null +++ b/test/cf/5_9_wgs84.ncgen @@ -0,0 +1,17 @@ +netcdf wgs84_datum { +dimensions: + lat = 18 ; + lon = 36 ; +variables: + double lat(lat) ; + double lon(lon) ; + float temp(lat, lon) ; + temp:long_name = "temperature" ; + temp:units = "K" ; + temp:grid_mapping = "crs" ; + int crs ; + crs:grid_mapping_name = "latitude_longitude"; + crs:longitude_of_prime_meridian = 0.0 ; + crs:semi_major_axis = 6378137.0 ; + crs:inverse_flattening = 298.257223563 ; +} \ No newline at end of file diff --git a/test/cf/landsea.ncgen b/test/cf/landsea.ncgen new file mode 100644 index 000000000..639f19516 --- /dev/null +++ b/test/cf/landsea.ncgen @@ -0,0 +1,17 @@ +netcdf landsea { +dimensions: + lat=73; + lon=96; + maxlen=20; + ls=2; +variables: + float surface_temperature(lat,lon); + surface_temperature:cell_methods="area: mean where land"; + float surface_upward_sensible_heat_flux(ls,lat,lon); + surface_upward_sensible_heat_flux:coordinates="land_sea"; + surface_upward_sensible_heat_flux:cell_methods="area: mean where land_sea"; + char land_sea(ls,maxlen); + land_sea:standard_name="area_type"; +data: + land_sea="land","sea"; +} \ No newline at end of file diff --git a/test/sources/cf.jl b/test/sources/cf.jl new file mode 100644 index 000000000..d11518f94 --- /dev/null +++ b/test/sources/cf.jl @@ -0,0 +1,28 @@ +using NearestNeighbors +# Build all ncgen files +cfdir = joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "cf") +test_paths = map(filter(endswith(".ncgen"), readdir(cfdir))) do input_name + input_path = joinpath(cfdir, input_name) + output_path = joinpath(cfdir, "..", "data", "cf", splitext(input_name)[1] * ".nc") + NetCDF_jll.ncgen() do exe + run(`$exe -k nc4 -b -o $output_path $input_path`) + end + output_path +end +rasters = map(test_paths) do test_path + name = splitext(basename(test_path))[1] + try + name => Raster(test_path; lazy=true) + catch + name => nothing + end +end +rasters[1][2] +rasterstacks = map(test_paths) do test_path + name = splitext(basename(test_path))[1] + name == "5_1_independent_coordinate_variables" && return name => nothing + name == "5_10_british_national_grid" && return name => nothing + println(name) + name => RasterStack(test_path; lazy=true) +end ; +read(last.(rasterstacks)[1].str_variable) \ No newline at end of file From 41ce164694105a26585eb92bc480d8a38ccec951 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 6 May 2025 17:23:22 +0200 Subject: [PATCH 13/43] add more cf examples --- Project.toml | 2 + src/Rasters.jl | 2 + src/show.jl | 21 +-- src/sources/commondatamodel.jl | 133 ++++++++++++------ src/stack.jl | 6 +- ...le_forecasts_from_a_single_analysis.ncgen} | 2 +- ...areas_for_a_sqpherical_geodesic_grid.ncgen | 29 ++++ ...a_domain_with_no_specific_dimensions.ncgen | 12 ++ ...ain_containing_a_timeseries_geometry.ncgen | 40 ++++++ ..._indexed_ragged_array_representation.ncgen | 39 +++++ .../6_1_2_taxon_names_and_identifyers.ncgen | 22 +++ ...ard_heat_transport_in_atlantic_ocean.ncgen | 22 +++ test/cf/6_2_model_level_numbers.ncgen | 14 ++ .../7_10_decadal_averages_for_january.ncgen | 21 +++ ...erature_for_each_hour_of_average_day.ncgen | 43 ++++++ ..._extreme_statistics_and_spell_length.ncgen | 27 ++++ ...r_of_typical_climatological_day copy.ncgen | 43 ++++++ ...y_maximum_daily_precipitation_totals.ncgen | 21 +++ test/cf/7_15_timeseries_with_geometry.ncgen | 51 +++++++ test/cf/7_16_polygons_with_holes.ncgen | 60 ++++++++ ...n-latitude-longitude_horizontal_grid.ncgen | 17 +++ ...etric_coordinate_variable_has_bounds.ncgen | 34 +++++ ..._areas_for_a_spherical_geodesic_grid.ncgen | 28 ++++ .../7_5_methods_applied_to_a_timeseries.ncgen | 27 ++++ ...7_6_surface_air_temperature_variance.ncgen | 19 +++ ...veraged_separately_over_land_and_sea.ncgen | 17 +++ test/cf/7_9_climatological_seasons.ncgen | 22 +++ test/sources/cf.jl | 27 ++-- 28 files changed, 736 insertions(+), 65 deletions(-) rename test/cf/{5_14_scalar_coordinate_variables.ncgen => 5_14_multiple_forecasts_from_a_single_analysis.ncgen} (94%) create mode 100644 test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen create mode 100644 test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen create mode 100644 test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen create mode 100644 test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen create mode 100644 test/cf/6_1_2_taxon_names_and_identifyers.ncgen create mode 100644 test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen create mode 100644 test/cf/6_2_model_level_numbers.ncgen create mode 100644 test/cf/7_10_decadal_averages_for_january.ncgen create mode 100644 test/cf/7_11_temperature_for_each_hour_of_average_day.ncgen create mode 100644 test/cf/7_12_extreme_statistics_and_spell_length.ncgen create mode 100644 test/cf/7_13_temperature_for_each_hour_of_typical_climatological_day copy.ncgen create mode 100644 test/cf/7_14_monthly_maximum_daily_precipitation_totals.ncgen create mode 100644 test/cf/7_15_timeseries_with_geometry.ncgen create mode 100644 test/cf/7_16_polygons_with_holes.ncgen create mode 100644 test/cf/7_2_cells_in_a_non-latitude-longitude_horizontal_grid.ncgen create mode 100644 test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen create mode 100644 test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen create mode 100644 test/cf/7_5_methods_applied_to_a_timeseries.ncgen create mode 100644 test/cf/7_6_surface_air_temperature_variance.ncgen create mode 100644 test/cf/7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea.ncgen create mode 100644 test/cf/7_9_climatological_seasons.ncgen diff --git a/Project.toml b/Project.toml index 9cdf74d0e..59063a9ef 100644 --- a/Project.toml +++ b/Project.toml @@ -22,6 +22,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" @@ -77,6 +78,7 @@ Makie = "0.20, 0.21, 0.22" Missings = "0.4, 1" NCDatasets = "0.13, 0.14" OffsetArrays = "1" +OrderedCollections = "1.8" Plots = "1" ProgressMeter = "1" Proj = "1.7.2" diff --git a/src/Rasters.jl b/src/Rasters.jl index aaf8c65f1..39b1e2504 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -17,6 +17,7 @@ import Adapt, GeometryOps, GeometryOpsCore, OffsetArrays, + OrderedCollections, ProgressMeter, Missings, Mmap, @@ -37,6 +38,7 @@ using DimensionalData: Name, NoName using .Dimensions: StandardIndices, DimTuple using .Lookups: LookupTuple +using OrderedCollections: OrderedDict using Statistics: mean using RecipesBase: @recipe, @series using Base: tail, @propagate_inbounds diff --git a/src/show.jl b/src/show.jl index af5fc6287..6b23594c5 100644 --- a/src/show.jl +++ b/src/show.jl @@ -22,30 +22,35 @@ function print_geo(io, mime, A; blockwidth) show(io, mime, missingval(A)) end printstyled(io, "\n extent: "; color=:light_black) - show(io, mime, Extents.extent(A)) + str = sprint(show, mime, Extents.extent(A)) + _print_restricted(io, str; blockwidth, used=10) if crs(A) !== nothing printstyled(io, "\n crs: "; color=:light_black) str = convert(String, crs(A)) - if length(str) > (blockwidth - 7) - print(io, str[1:min(blockwidth - 10, end)] * "...") - else - print(io, str) - end + _print_restricted(io, str; blockwidth, used=7) end if mappedcrs(A) !== nothing printstyled(io, "\n mappedcrs: "; color=:light_black) - print(io, convert(String, mappedcrs(A))) + str = convert(String, mappedcrs(A)) + _print_restricted(io, str; blockwidth, used=13) end if isdisk(A) fn = filename(A) if !isnothing(fn) && !(fn == "") printstyled(io, "\n filename: "; color=:light_black) - print(io, fn) + _print_restricted(io, fn; blockwidth, used=12) end end println(io) end +function _print_restricted(io, str; blockwidth, used) + if length(str) > (blockwidth - used) + print(io, str[1:min(blockwidth - used - 3, end)] * "...") + else + print(io, str) + end +end # Stack types can be enormous. Just use nameof(T) function Base.summary(io::IO, ser::AbstractRasterSeries{T,N}) where {T,N} diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index e155ee372..08d7ee5b3 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -7,6 +7,15 @@ const CF_DEGREES_EAST = ("degrees_east", "degree_east", "degree_E", "degrees_E", const UNNAMED_FILE_KEY = "unnamed" +const CDM_PERIODS = Dict( + "years" => Year, + "months" => Month, + "days" => Day, + "hours" => Hour, + "minutes" => Minute, + "seconds" => Second, +) + const CDM_AXIS_MAP = Dict( "X" => X, "Y" => Y, @@ -265,6 +274,8 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) crs_dict = Dict{String,Any}() # Define a dimensions dict so these are only generated once dim_dict = Dict{String,Dimension}() + # Refdims need a consistent order as it is not changed later + refdim_dict = OrderedDict{String,Dimension}() # Get the layer names to target layer_names = isnokw(names) ? _layer_names(ds, var_names, vars_dict, attrs_dict) : names layer_dimnames_vec = Vector{Tuple}(undef, length(layer_names)) @@ -301,11 +312,25 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) output_layerdims_vec[i] = map(dimnames) do dimname dim = get!(dim_dict, dimname) do _cdm_dim( - ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_names, dimname, crs + ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs ) end basedims(dim) end + # Find scalar coordinates to use as refdims (5.7) + for coord_name in coord_names + haskey(refdim_dict, coord_name) && continue + coord_var = _get_var!(ds, vars_dict, coord_name) + if ndims(coord_var) == 0 + coord_attr = _get_attr!(ds, vars_dict, attrs_dict, coord_name) + data = reshape(collect(coord_var), 1) + D = _cdm_dimtype(coord_attr, coord_name) + metadata = _metadatadict(sourcetrait(ds), attr) + lookup = _cdm_lookup(data, ds, coord_var, coord_attr, coord_name, D, metadata, nothing) + refdim = D(lookup) + refdim_dict[coord_name] = refdim + end + end # Add this layer to the output output_layers_vec[i] = var output_attrs_vec[i] = attr @@ -316,38 +341,59 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) layerdims_vec=output_layerdims_vec, layermetadata_vec=output_attrs_vec, dim_dict, + refdim_dict, ) end _organise_dataset(ds::AbstractDataset, names, group) = _organise_dataset(ds.group[group], names, nokw) -function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_names, dimname, crs) - # if !isnothing(geometry_key) - # geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) - # # Check that the geometry is for this dimension - # is_geometry_dimension = if haskey(geometry_attrs, "node_count") - # # Multi-part geometries - # dimname in CDM.dimnames(_get_var!(ds, vars_dict, geometry_attrs["node_count"])) - # else - # # Points - # dimname in CDM.dimnames(_get_var!(ds, vars_dict, first(_cf_node_coordinate_names(geometry_attrs)))) - # end - # geoms = get!(geometry_dict, geometry_key) do - # _cf_geometry_decode(ds, geometry_attrs) - # end - # if is_geometry_dimension - # dimension_coord_names = filter(coord_names) do coordname - # dimnames = CDM.dimnames(vars_dict[coordname]) - # length(dimnames) == 1 && only(dimnames) == dimname - # end - # lookup = GeometryLookup(geoms, (X(), Y()); crs) - # D = Geometry - # return D(lookup) +function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs) + if !isnothing(geometry_key) + geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) + # Check that the geometry is for this dimension + is_geometry_dimension = if haskey(geometry_attrs, "node_count") + # Multi-part geometries + dimname in CDM.dimnames(_get_var!(ds, vars_dict, geometry_attrs["node_count"])) + else + # Points + dimname in CDM.dimnames(_get_var!(ds, vars_dict, first(_cf_node_coordinate_names(geometry_attrs)))) + end + geoms = get!(geometry_dict, geometry_key) do + _cf_geometry_decode(ds, geometry_attrs) + end + if is_geometry_dimension + dimension_coord_names = filter(coord_names) do coordname + dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) + length(dimnames) == 1 && only(dimnames) == dimname + end + lookup = GeometryLookup(geoms, (X(), Y()); crs) + D = Geometry + return D(lookup) + end + end + # TODO Cycled climatology dims + # if haskey(dim_attrs, "climatology") + # climatology_bounds = _get_var(ds, vars_dict, dim_attrs["climatology"]) + # var = _get_var(ds, vars_dict, var_name) + # var_attib = _get_attr!(ds, vars_dict, attrs_dict, var_name) + # # We need to know if this is over years/months/days etc + # diff = map((second, minute, hour, day, month, year)) do f + # f(climatology_bounds[:, 2]) - f(climatology_bounds[:, 1]) # end + # # Find the smallest step + # i = findfirst(>(0), diff) + # step = (Second, Minute, Hour, Day, Month, Year)[i](diff[i]) + # # Find the total time span + # bounds = extrema(climatology_bounds) + # data = collect(dim_var) + # order = LA.orderof(data) + # sampling = Regular(step) + # span = Explicit(c) + # Cyclic(data; cycle, order, sampling, span, bounds) # end if haskey(ds, dimname) && (isempty(coord_names) || dimname in coord_names) - # The dimension has a matching variable, and it's one of the coordinates dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) + # The dimension has a matching variable, and it's one of the coordinates D = _cdm_dimtype(dim_attrs, dimname) lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) return D(lookup) @@ -356,34 +402,31 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_ # Get all the coordinates for this dimension dimension_coord_names = filter(coord_names) do coordname coord_dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) - @show coordname coord_dimnames dimname in coord_dimnames end - @show dimension_coord_names - if length(dimension_coord_names) == 1 - # If there is only one coordinate, just use it as the dimension - # This should rarely happen + if length(dimension_coord_names) > 1 linked_dimname = dimension_coord_names[1] dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, linked_dimname) dim_var = _get_var!(ds, vars_dict, linked_dimname) - D = _cdm_dimtype(dim_attrs, linked_dimname) if ndims(dim_var) > 1 && eltype(dim_var) <: Char chars = collect(dim_var) # Join char dimension as String, stripping null terminators - strings = strip.(join.(eachslice(chars; dims=2)), '\0') + strings = String.(strip.(join.(eachslice(chars; dims=2)), '\0')) lookup = Categorical(strings; order=Unordered()) + D = _cdm_dimtype(dim_attrs, linked_dimname) return D(lookup) - else + end + if length(dimension_coord_names) == 1 + # If there is only one coordinate, just use it as the dimension + # This should rarely happen + D = _cdm_dimtype(dim_attrs, linked_dimname) lookup = _cdm_lookup(ds, dim_attrs, linked_dimname, D, crs) return D(lookup) end - end - if length(dimension_coord_names) > 1 dim_vars = map(dimension_coord_names) do d _get_var!(ds, vars_dict, d) end is_multidimensional = any(v -> length(CDM.dimnames(v)) > 1, dim_vars) - @show is_multidimensional if is_multidimensional # Rotations # For now we don't use ArrayLookup @@ -402,7 +445,7 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, coord_ D = _cdm_dimtype(NoMetadata(), dimname) merged_data = collect(zip(DD.lookup.(dims)...)) # Use a MergedLookup with all dimensions - return D(MergedLookup(merged_data, dims)) + return D(MergedLookup(merged_data, Tuple(dims))) end end # A matching variable doesn't exist. @@ -453,8 +496,10 @@ end function _cdm_lookup(ds::AbstractDataset, attr, dimname, D::Type, crs) var = ds[dimname] data = Missings.disallowmissing(collect(var)) + # Length one variables may be zero dimensional + data = ndims(data) == 0 ? reshape(data, 1) : data metadata = _metadatadict(sourcetrait(ds), attr) - return _cdm_lookup(data, ds, var, attr, dimname, D, metadata, crs) + _cdm_lookup(data, ds, var, attr, dimname, D, metadata, crs) end # For unknown types we just make a Categorical lookup function _cdm_lookup(data::AbstractArray, ds::AbstractDataset, var, attr, dimname, D::Type, metadata, crs) @@ -853,7 +898,7 @@ _cf_crs(ds, grid_mapping_key::Nothing) = nothing function _cf_crs(ds, grid_mapping_key::String) # TODO handle multi-crs if occursin(' ', grid_mapping_key) - _split_grid_mapping_key(grid_mapping_key)[1][1] + _split_attribute(grid_mapping_key)[1][1] end haskey(ds, grid_mapping_key) || return nothing grid_mapping_var = CDM.variable(ds, grid_mapping_key) @@ -869,17 +914,18 @@ function _cf_crs(ds, grid_mapping_key::String) end end -function _split_grid_mapping_key(grid_mapping_key::String) - items = split(grid_mapping_key, ' ') +function _split_attribute(attribute::String) + items = split(attribute, ' ') starts = findall(contains(':'), items) stops = append!(starts[2:end] .- 1, length(items)) map(starts, stops) do s, p items[s][1:end-1] => items[s+1:p] end end + function _grid_mapping_keys(grid_mapping_key::String) if occursin(' ', grid_mapping_key) - first.(_split_grid_mapping_key(grid_mapping_key)) + first.(_split_attribute(grid_mapping_key)) else [grid_mapping_key] end @@ -895,7 +941,7 @@ _get_attr!(ds, var_dict, attr_dict, key) = # TODO just the core of this function here function _raster(ds::CDM.AbstractDataset; dims=nokw, - refdims=(), + refdims=nokw, name=nokw, group=nokw, filename=filename(ds), @@ -921,12 +967,13 @@ function _raster(ds::CDM.AbstractDataset; scaled, missingval = _raw_check(raw, scaled, missingval, verbose) # TODO use a clearer name than filekey name1 = filekey(ds, name) - (; names_vec, layers_vec, layerdims_vec, layermetadata_vec, dim_dict) = + (; names_vec, layers_vec, layerdims_vec, layermetadata_vec, dim_dict, refdim_dict) = _organise_dataset(ds, [string(name1)], group) var = layers_vec[1] # Detect the source from filename # Open the dataset and variable specified by `name`, at `group` level if provided # At this level we do not apply `mod`. + refdims = isnokw(refdims) ? Tuple(values(refdim_dict)) : refdims metadata_out = isnokw(metadata) ? Dict(layermetadata_vec[1]) : metadata missingval_out = _read_missingval_pair(var, metadata_out, missingval) pop!(metadata_out, "_FillValue", nothing) diff --git a/src/stack.jl b/src/stack.jl index 85933f4e1..a4746bf60 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -452,7 +452,7 @@ function RasterStack(ds; filename=filename(ds), source=nokw, dims=nokw, - refdims=(), + refdims=nokw, name=nokw, group=nokw, metadata=nokw, @@ -472,9 +472,9 @@ function RasterStack(ds; check_multilayer_dataset(ds) scaled, missingval = _raw_check(raw, scaled, missingval, verbose) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` - (; names_vec, layers_vec, layerdims_vec, layermetadata_vec, dim_dict) = _organise_dataset(ds, name, group) + (; names_vec, layers_vec, layerdims_vec, layermetadata_vec, dim_dict, refdim_dict) = _organise_dataset(ds, name, group) dims = _sort_by_layerdims(isnokw(dims) ? values(dim_dict) : dims, layerdims_vec) - refdims = isnokw(refdims) || isnothing(refdims) ? () : refdims + refdims = isnokwornothing(refdims) ? Tuple(values(refdim_dict)) : refdims metadata = isnokw(metadata) ? _metadata(ds) : metadata layermetadata_vec = if isnokw(layermetadata) _layermetadata(ds; attrs=layermetadata_vec) diff --git a/test/cf/5_14_scalar_coordinate_variables.ncgen b/test/cf/5_14_multiple_forecasts_from_a_single_analysis.ncgen similarity index 94% rename from test/cf/5_14_scalar_coordinate_variables.ncgen rename to test/cf/5_14_multiple_forecasts_from_a_single_analysis.ncgen index 3b02db0ca..cc421a4a0 100644 --- a/test/cf/5_14_scalar_coordinate_variables.ncgen +++ b/test/cf/5_14_multiple_forecasts_from_a_single_analysis.ncgen @@ -1,4 +1,4 @@ -netcdf scalar_coordinate_variables { +netcdf multiple_forecasts_from_a_single_analysis { dimensions: lat = 180 ; lon = 360 ; diff --git a/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen b/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen new file mode 100644 index 000000000..56c2aedd9 --- /dev/null +++ b/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen @@ -0,0 +1,29 @@ +netcdf a_domain_containing_cell_areas_for_a_spherical_geodesic_grid { +dimensions: + cell = 2562 ; // number of grid cells + time = 12 ; + nv = 6 ; // maximum number of cell vertices +variables: + char domain ; + domain:dimensions = "time cell" ; + domain:coordinates = "lon lat" ; + domain:cell_measures = "area: cell_area" ; + domain:long_name = "Domain with cell measures" ; + float lon(cell) ; + lon:long_name = "longitude" ; + lon:units = "degrees_east" ; + lon:bounds = "lon_vertices" ; + float lat(cell) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; + lat:bounds = "lat_vertices" ; + float time(time) ; + time:long_name = "time" ; + time:units = "days since 1979-01-01" ; + float cell_area(cell) ; + cell_area:long_name = "area of grid cell" ; + cell_area:standard_name = "cell_area" ; + cell_area:units = "m2" ; + float lon_vertices(cell, nv) ; + float lat_vertices(cell, nv) ; +} \ No newline at end of file diff --git a/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen b/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen new file mode 100644 index 000000000..e0af4c348 --- /dev/null +++ b/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen @@ -0,0 +1,12 @@ +netcdf a_domain_with_no_specific_dimensions { +dimensions: + +variables: + char domain ; + domain:dimensions = "" ; + domain:coordinates = "t" ; + domain:long_name = "Domain with no explicit dimensions" ; + double t ; + t:standard_name = "time" ; + t:units = "days since 2021-01-01" ; +} \ No newline at end of file diff --git a/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen b/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen new file mode 100644 index 000000000..3a6531fc5 --- /dev/null +++ b/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen @@ -0,0 +1,40 @@ +netcdf a_domain_containing_a_timeseries_geometry { +dimensions: + instance = 2 ; + node = 5 ; + time = 4 ; +variables: + char domain ; + domain:dimensions = "instance time" ; + domain:coordinates = "lat lon" ; + domain:grid_mapping = "datum" ; + domain:geometry = "geometry_container" ; + domain:long_name = "Domain with a geometry variable" ; + int time(time) ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + int datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:longitude_of_prime_meridian = 0.0 ; + datum:semi_major_axis = 6378137.0 ; + datum:inverse_flattening = 298.257223563 ; + int geometry_container ; + geometry_container:geometry_type = "line" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + int node_count(instance) ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; +} \ No newline at end of file diff --git a/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen b/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen new file mode 100644 index 000000000..c9aa4266c --- /dev/null +++ b/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen @@ -0,0 +1,39 @@ +netcdf 5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation { +dimensions: + station = 23 ; + obs = UNLIMITED ; + name_strlen = 23 ; +variables: + char domain ; + domain:dimensions = "obs" ; + domain:coordinates = "time lat lon alt station_name" ; + domain:long_name = "Domain with a discrete sampling geometry" ; + float lon(station) ; + lon:standard_name = "longitude" ; + lon:long_name = "station longitude" ; + lon:units = "degrees_east" ; + float lat(station) ; + lat:standard_name = "latitude" ; + lat:long_name = "station latitude" ; + lat:units = "degrees_north" ; + float alt(station) ; + alt:long_name = "vertical distance above the surface" ; + alt:standard_name = "height" ; + alt:units = "m" ; + alt:positive = "up" ; + alt:axis = "Z" ; + char station_name(station, name_strlen) ; + station_name:long_name = "station name" ; + station_name:cf_role = "timeseries_id" ; + int station_info(station) ; + station_info:long_name = "some kind of station info" ; + int stationIndex(obs) ; + stationIndex:long_name = "which station this obs is for" ; + stationIndex:instance_dimension = "station" ; + double time(obs) ; + time:standard_name = "time" ; + time:long_name = "time of measurement" ; + time:units = "days since 1970-01-01 00:00:00" ; +// global attributes: + :featureType = "timeSeries" ; +} \ No newline at end of file diff --git a/test/cf/6_1_2_taxon_names_and_identifyers.ncgen b/test/cf/6_1_2_taxon_names_and_identifyers.ncgen new file mode 100644 index 000000000..2552569e6 --- /dev/null +++ b/test/cf/6_1_2_taxon_names_and_identifyers.ncgen @@ -0,0 +1,22 @@ +netcdf taxon_names_and_identifiers { +dimensions: + time = 100 ; + string80 = 80 ; + taxon = 2 ; +variables: + float time(time); + time:standard_name = "time" ; + time:units = "days since 2019-01-01" ; + float abundance(time,taxon) ; + abundance:standard_name = "number_concentration_of_biological_taxon_in_sea_water" ; + abundance:coordinates = "taxon_lsid taxon_name" ; + char taxon_name(taxon,string80) ; + taxon_name:standard_name = "biological_taxon_name" ; + char taxon_lsid(taxon,string80) ; + taxon_lsid:standard_name = "biological_taxon_lsid" ; +data: + time = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100 ; + abundance = 0.12767112, 0.3077098, 0.98925537, 0.39649677, 0.7229494, 0.3014989, 0.322784, 0.29285616, 0.25662208, 0.117831886, 0.9196425, 0.11931813, 0.1264885, 0.5113293, 0.89028376, 0.72099334, 0.5193584, 0.92186993, 0.6823397, 0.007336974, 0.14244181, 0.6244344, 0.19768023, 0.44542485, 0.47169906, 0.3926025, 0.71931195, 0.30820572, 0.014529705, 0.30569047, 0.049314797, 0.31771177, 0.47943252, 0.37399697, 0.58730066, 0.9361459, 0.26961315, 0.19459844, 0.3907408, 0.37988317, 0.044382393, 0.42903543, 0.3377753, 0.37569588, 0.32771045, 0.8257987, 0.21826279, 0.79244465, 0.0014693141, 0.7483437, 0.9131109, 0.24773198, 0.8663353, 0.2083258, 0.6436682, 0.09799868, 0.5177543, 0.3281548, 0.6612102, 0.23962152, 0.28922492, 0.8494298, 0.67576736, 0.59139305, 0.5834294, 0.8025192, 0.5553859, 0.79422903, 0.30486482, 0.37530547, 0.43150902, 0.8500043, 0.47215062, 0.2889346, 0.7384416, 0.8205062, 0.9501508, 0.9709469, 0.81786615, 0.66687894, 0.13149667, 0.65388376, 0.04085481, 0.13509172, 0.6391254, 0.374403, 0.45044667, 0.5265882, 0.70337373, 0.8857227, 0.6504841, 0.70418215, 0.4939022, 0.46103948, 0.1943444, 0.21802491, 0.05313009, 0.64020365, 0.2583719, 0.5843945, 0.96909535, 0.8323412, 0.9642344, 0.06428528, 0.66111577, 0.5730727, 0.177504, 0.39409912, 0.13461357, 0.5491819, 0.2750708, 0.08223128, 0.81467575, 0.363693, 0.016909182, 0.85852563, 0.21432716, 0.32096034, 0.07471895, 0.9952607, 0.7406225, 0.6962306, 0.27097827, 0.026257396, 0.6612402, 0.6620196, 0.6634072, 0.59134126, 0.62234384, 0.6152409, 0.05558741, 0.46606535, 0.7694198, 0.13313901, 0.7848925, 0.67907256, 0.91177624, 0.20892179, 0.60586435, 0.4054733, 0.98118675, 0.38457257, 0.6779362, 0.9585549, 0.21006352, 0.43963993, 0.10247308, 0.6907456, 0.459664, 0.73598236, 0.57579696, 0.67871845, 0.5636268, 0.5426653, 0.015122354, 0.02252096, 0.799381, 0.68344504, 0.69040406, 0.7290883, 0.25275213, 0.28130805, 0.99878895, 0.25900894, 0.71899116, 0.441096, 0.9315588, 0.96927273, 0.07765621, 0.8100117, 0.45869052, 0.38682938, 0.9177527, 0.08623397, 0.18650591, 0.5276618, 0.41039044, 0.1895529, 0.75130725, 0.40061736, 0.2390588, 0.794543, 0.436378, 0.54052705, 0.6319656, 0.6019509, 0.74456435, 0.4374414, 0.8458282, 0.7159625, 0.4986195, 0.33087182, 0.5789556, 0.7105424, 0.01516813, 0.4926042, 0.6438956, 0.035016358, 0.86199844, 0.9942962 ; + taxon_name = "Calanus finmarchicus", "Calanus helgolandicus" ; + taxon_lsid = "urn:lsid:marinespecies.org:taxname:104464", "urn:lsid:marinespecies.org:taxname:104466" ; +} \ No newline at end of file diff --git a/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen b/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen new file mode 100644 index 000000000..ff2dba51a --- /dev/null +++ b/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen @@ -0,0 +1,22 @@ +netcdf northward_heat_transport_in_atlantic_ocean { +dimensions: + time = 20 ; + lat = 5 ; + lbl = 1 ; +variables: + float n_heat_transport(time,lat,lbl); + n_heat_transport:units="W"; + n_heat_transport:coordinates="geo_region"; + n_heat_transport:standard_name="northward_ocean_heat_transport"; + double time(time) ; + time:long_name = "time" ; + time:units = "days since 1990-1-1 0:0:0" ; + float lat(lat) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; + string geo_region(lbl) ; + geo_region:standard_name="region" ; +data: + geo_region = "atlantic_ocean" ; + lat = 10., 20., 30., 40., 50. ; +} \ No newline at end of file diff --git a/test/cf/6_2_model_level_numbers.ncgen b/test/cf/6_2_model_level_numbers.ncgen new file mode 100644 index 000000000..5da7dd211 --- /dev/null +++ b/test/cf/6_2_model_level_numbers.ncgen @@ -0,0 +1,14 @@ +netcdf model_level_numbers { +dimensions: + sigma=5; + lat=30; +variables: + float xwind(sigma,lat); + xwind:coordinates="model_level"; + float sigma(sigma); // physical height coordinate + sigma:long_name="sigma"; + sigma:positive="down"; + int model_level(sigma); // model level number at each height + model_level:long_name="model level number"; + model_level:positive="up"; +} \ No newline at end of file diff --git a/test/cf/7_10_decadal_averages_for_january.ncgen b/test/cf/7_10_decadal_averages_for_january.ncgen new file mode 100644 index 000000000..dd5264795 --- /dev/null +++ b/test/cf/7_10_decadal_averages_for_january.ncgen @@ -0,0 +1,21 @@ +netcdf decadal_averages_for_january { +dimensions: + time=3; + nv=2; + lat=10; // added + lon=10; // added +variables: + float precipitation(time,lat,lon); + precipitation:long_name="precipitation amount"; + precipitation:cell_methods="time: sum within years time: mean over years"; + precipitation:units="kg m-2"; + double time(time); + time:climatology="climatology_bounds"; + time:units="days since 1901-1-1"; + double climatology_bounds(time,nv); +data: // time coordinates translated to datetime format + time="1965-1-15", "1975-1-15", "1985-1-15" ; + climatology_bounds="1961-1-1", "1970-2-1", + "1971-1-1", "1980-2-1", + "1981-1-1", "1990-2-1" ; +} \ No newline at end of file diff --git a/test/cf/7_11_temperature_for_each_hour_of_average_day.ncgen b/test/cf/7_11_temperature_for_each_hour_of_average_day.ncgen new file mode 100644 index 000000000..1128af0a7 --- /dev/null +++ b/test/cf/7_11_temperature_for_each_hour_of_average_day.ncgen @@ -0,0 +1,43 @@ +netcdf temperature_for_each_hour_of_average_day { +dimensions: + time=4; + nv=2; + lat=10; // added + lon=10; // added +variables: + float temperature(time,lat,lon); + temperature:long_name="surface air temperature"; + temperature:cell_methods="time: mean within days time: mean over days"; + temperature:units="K"; + double time(time); + time:climatology="climatology_bounds"; + time:units="hours since 1997-4-1"; + double climatology_bounds(time,nv); +data: // time coordinates translated to datetime format + time="1997-4-1 0:30", "1997-4-1 1:30", "1997-4-1 2:30", "1997-4-1 3:30", "1997-4-1 4:30", "1997-4-1 5:30", "1997-4-1 6:30", "1997-4-1 7:30", "1997-4-1 8:30", "1997-4-1 9:30", "1997-4-1 10:30", "1997-4-1 11:30", "1997-4-1 12:30", "1997-4-1 13:30", "1997-4-1 14:30", "1997-4-1 15:30", "1997-4-1 16:30", "1997-4-1 17:30", "1997-4-1 18:30", "1997-4-1 19:30", "1997-4-1 20:30", "1997-4-1 21:30", "1997-4-1 22:30", "1997-4-1 23:30"; + climatology_bounds="1997-4-1 0:00", "1997-4-30 1:00", + "1997-4-1 1:00", "1997-4-30 2:00", + "1997-4-1 2:00", "1997-4-30 3:00", + "1997-4-1 3:00", "1997-4-30 4:00", + "1997-4-1 4:00", "1997-4-30 5:00", + "1997-4-1 5:00", "1997-4-30 6:00", + "1997-4-1 6:00", "1997-4-30 7:00", + "1997-4-1 7:00", "1997-4-30 8:00", + "1997-4-1 8:00", "1997-4-30 9:00", + "1997-4-1 9:00", "1997-4-30 10:00", + "1997-4-1 10:00", "1997-4-30 11:00", + "1997-4-1 11:00", "1997-4-30 12:00", + "1997-4-1 12:00", "1997-4-30 13:00", + "1997-4-1 13:00", "1997-4-30 14:00", + "1997-4-1 14:00", "1997-4-30 15:00", + "1997-4-1 15:00", "1997-4-30 16:00", + "1997-4-1 16:00", "1997-4-30 17:00", + "1997-4-1 17:00", "1997-4-30 18:00", + "1997-4-1 18:00", "1997-4-30 19:00", + "1997-4-1 19:00", "1997-4-30 20:00", + "1997-4-1 20:00", "1997-4-30 21:00", + "1997-4-1 21:00", "1997-4-30 22:00", + "1997-4-1 22:00", "1997-4-30 23:00", + "1997-4-1 23:00", "1997-4-30 24:00" ; + +} \ No newline at end of file diff --git a/test/cf/7_12_extreme_statistics_and_spell_length.ncgen b/test/cf/7_12_extreme_statistics_and_spell_length.ncgen new file mode 100644 index 000000000..5d9365bd2 --- /dev/null +++ b/test/cf/7_12_extreme_statistics_and_spell_length.ncgen @@ -0,0 +1,27 @@ +netcdf extreme_statistics_and_spell_length { +dimensions: + time=1; // added length 1 time dimension + lat=10; // added + lon=10; // added + nv=2; +variables: + float n1(time,lat,lon); + n1:standard_name="number_of_days_with_air_temperature_below_threshold"; + n1:coordinates="threshold time"; + n1:cell_methods="time: minimum within days time: sum over days"; + float n2(time,lat,lon); // added time dimension here + n2:standard_name="spell_length_of_days_with_air_temperature_below_threshold"; + n2:coordinates="threshold time"; + n2:cell_methods="time: minimum within days time: maximum over days"; + float threshold; + threshold:standard_name="air_temperature"; + threshold:units="degC"; + double time; + time:climatology="climatology_bounds"; + time:units="days since 2000-6-1"; + double climatology_bounds(time,nv); +data: // time coordinates translated to datetime format + time="2008-1-16 6:00"; + climatology_bounds="2007-12-1 6:00", "2008-3-1 6:00"; + threshold=0.; +} \ No newline at end of file diff --git a/test/cf/7_13_temperature_for_each_hour_of_typical_climatological_day copy.ncgen b/test/cf/7_13_temperature_for_each_hour_of_typical_climatological_day copy.ncgen new file mode 100644 index 000000000..f2eaa750a --- /dev/null +++ b/test/cf/7_13_temperature_for_each_hour_of_typical_climatological_day copy.ncgen @@ -0,0 +1,43 @@ +netcdf temperature_for_each_hour_of_typical_climatological_day { +dimensions: + time=24; + nv=2; + lat=10; // added + lon=10; // added +variables: + float temperature(time,lat,lon); + temperature:long_name="surface air temperature"; + temperature:cell_methods="time: mean within days", + "time: mean over days time: mean over years"; + temperature:units="K"; + double time(time); + time:climatology="climatology_bounds"; + time:units="days since 1961-1-1"; + double climatology_bounds(time,nv); +data: // time coordinates translated to datetime format + time="1961-4-1 0:30", "1961-4-1 1:30", "1961-4-1 2:30", "1961-4-1 3:30", "1961-4-1 4:30", "1961-4-1 5:30", "1961-4-1 6:30", "1961-4-1 7:30", "1961-4-1 8:30", "1961-4-1 9:30", "1961-4-1 10:30", "1961-4-1 11:30", "1961-4-1 12:30", "1961-4-1 13:30", "1961-4-1 14:30", "1961-4-1 15:30", "1961-4-1 16:30", "1961-4-1 17:30", "1961-4-1 18:30", "1961-4-1 19:30", "1961-4-1 20:30", "1961-4-1 21:30", "1961-4-1 22:30", "1961-4-1 23:30" ; + climatology_bounds="1961-4-1 0:00", "1990-4-30 1:00", + "1961-4-1 1:00", "1990-4-30 2:00", + "1961-4-1 2:00", "1990-4-30 3:00", + "1961-4-1 3:00", "1990-4-30 4:00", + "1961-4-1 4:00", "1990-4-30 5:00", + "1961-4-1 5:00", "1990-4-30 6:00", + "1961-4-1 6:00", "1990-4-30 7:00", + "1961-4-1 7:00", "1990-4-30 8:00", + "1961-4-1 8:00", "1990-4-30 9:00", + "1961-4-1 9:00", "1990-4-30 10:00", + "1961-4-1 10:00", "1990-4-30 11:00", + "1961-4-1 11:00", "1990-4-30 12:00", + "1961-4-1 12:00", "1990-4-30 13:00", + "1961-4-1 13:00", "1990-4-30 14:00", + "1961-4-1 14:00", "1990-4-30 15:00", + "1961-4-1 15:00", "1990-4-30 16:00", + "1961-4-1 16:00", "1990-4-30 17:00", + "1961-4-1 17:00", "1990-4-30 18:00", + "1961-4-1 18:00", "1990-4-30 19:00", + "1961-4-1 19:00", "1990-4-30 20:00", + "1961-4-1 20:00", "1990-4-30 21:00", + "1961-4-1 21:00", "1990-4-30 22:00", + "1961-4-1 22:00", "1990-4-30 23:00", + "1961-4-1 23:00", "1990-5-1 0:00" ; +} \ No newline at end of file diff --git a/test/cf/7_14_monthly_maximum_daily_precipitation_totals.ncgen b/test/cf/7_14_monthly_maximum_daily_precipitation_totals.ncgen new file mode 100644 index 000000000..66aed4f73 --- /dev/null +++ b/test/cf/7_14_monthly_maximum_daily_precipitation_totals.ncgen @@ -0,0 +1,21 @@ +netcdf monthly_maximum_daily_precipitation_totals { +dimensions: + time=3; + nv=2; + lat=10; // added + lon=10; // added +variables: + float precipitation(time,lat,lon); + precipitation:long_name="Accumulated precipitation"; + precipitation:cell_methods="time: sum within days time: maximum over days"; + precipitation:units="kg"; + double time(time); + time:climatology="climatology_bounds"; + time:units="days since 2000-6-1"; + double climatology_bounds(time,nv); +data: // time coordinates translated to datetime format + time="2000-6-16", "2000-7-16", "2000-8-16" ; + climatology_bounds="2000-6-1 6:00:00", "2000-7-1 6:00:00", + "2000-7-1 6:00:00", "2000-8-1 6:00:00", + "2000-8-1 6:00:00", "2000-9-1 6:00:00" ; +} \ No newline at end of file diff --git a/test/cf/7_15_timeseries_with_geometry.ncgen b/test/cf/7_15_timeseries_with_geometry.ncgen new file mode 100644 index 000000000..d080cb2c9 --- /dev/null +++ b/test/cf/7_15_timeseries_with_geometry.ncgen @@ -0,0 +1,51 @@ +netcdf timeseries_with_geometry { +dimensions: + instance = 2 ; + node = 5 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + int datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:longitude_of_prime_meridian = 0.0 ; + datum:semi_major_axis = 6378137.0 ; + datum:inverse_flattening = 298.257223563 ; + int geometry_container ; + geometry_container:geometry_type = "line" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + int node_count(instance) ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + lat = 30, 50 ; + lon = 10, 60 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + node_count = 3, 2 ; + x = 30, 10, 40, 50, 50 ; + y = 10, 30, 40, 60, 50 ; +} \ No newline at end of file diff --git a/test/cf/7_16_polygons_with_holes.ncgen b/test/cf/7_16_polygons_with_holes.ncgen new file mode 100644 index 000000000..25001beb3 --- /dev/null +++ b/test/cf/7_16_polygons_with_holes.ncgen @@ -0,0 +1,60 @@ +netcdf polygons_with_holes { +dimensions: + node = 12 ; + instance = 2 ; + part = 4 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + float geometry_container ; + geometry_container:geometry_type = "polygon" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + geometry_container:grid_mapping = "datum" ; + geometry_container:coordinates = "lat lon" ; + geometry_container:part_node_count = "part_node_count" ; + geometry_container:interior_ring = "interior_ring" ; + int node_count(instance) ; + int part_node_count(part) ; + int interior_ring(part) ; + float datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:semi_major_axis = 6378137. ; + datum:inverse_flattening = 298.257223563 ; + datum:longitude_of_prime_meridian = 0. ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + x = 20, 10, 0, 5, 10, 15, 20, 10, 0, 50, 40, 30 ; + y = 0, 15, 0, 5, 10, 5, 20, 35, 20, 0, 15, 0 ; + lat = 25, 7 ; + lon = 10, 40 ; + node_count = 9, 3 ; + part_node_count = 3, 3, 3, 3 ; + interior_ring = 0, 1, 0, 0 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; +} \ No newline at end of file diff --git a/test/cf/7_2_cells_in_a_non-latitude-longitude_horizontal_grid.ncgen b/test/cf/7_2_cells_in_a_non-latitude-longitude_horizontal_grid.ncgen new file mode 100644 index 000000000..cb24cbe3f --- /dev/null +++ b/test/cf/7_2_cells_in_a_non-latitude-longitude_horizontal_grid.ncgen @@ -0,0 +1,17 @@ +netcdf cells_in_a_non-latitude-longitude_horizontal_grid { +dimensions: + imax = 128; + jmax = 64; + nv = 4; +variables: + float lat(jmax,imax); + lat:long_name = "latitude"; + lat:units = "degrees_north"; + lat:bounds = "lat_bnds"; + float lon(jmax,imax); + lon:long_name = "longitude"; + lon:units = "degrees_east"; + lon:bounds = "lon_bnds"; + float lat_bnds(jmax,imax,nv); + float lon_bnds(jmax,imax,nv); +} \ No newline at end of file diff --git a/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen b/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen new file mode 100644 index 000000000..fa9f74166 --- /dev/null +++ b/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen @@ -0,0 +1,34 @@ +netcdf specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds { +dimensions: + eta=5; + lat=10; + lon=10; + nv=2; +variables: + float eta(eta) ; + eta:long_name = "eta at full levels" ; + eta:positive = "down" ; + eta:standard_name = " atmosphere_hybrid_sigma_pressure_coordinate" ; + eta:formula_terms = "a: A b: B ps: PS p0: P0" ; + eta:bounds="eta_bnds" ; + float eta_bnds(eta,nv) ; + eta_bnds:formula_terms = "a: A_bnds b: B_bnds ps: PS p0: P0" ; // This attribute is mandatory + float A(eta) ; + A:long_name = "'a' coefficient for vertical coordinate at full levels" ; + A:units = "Pa" ; + A:bounds = "A_bnds" ; // This attribute is included for the optional second method + float B(eta) ; + B:long_name = "'b' coefficient for vertical coordinate at full levels" ; + B:units = "1" ; + B:bounds = "B_bnds" ; // This attribute is included for the optional second method + float A_bnds(eta,nv) ; + float B_bnds(eta,nv) ; + float PS(lat, lon) ; + PS:units = "Pa" ; + float P0 ; + P0:units = "Pa" ; + float temp(eta, lat, lon) ; + temp:standard_name = "air_temperature" ; + temp:units = "K"; + temp:coordinates = "A B" ; // This attribute is included for the optional second method +} \ No newline at end of file diff --git a/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen b/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen new file mode 100644 index 000000000..e16b59491 --- /dev/null +++ b/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen @@ -0,0 +1,28 @@ +netcdf cell_areas_for_a_spherical_geodesic_grid { +dimensions: + cell = 2562 ; // number of grid cells + time = 12 ; + nv = 6 ; // maximum number of cell vertices +variables: + float PS(time,cell) ; + PS:units = "Pa" ; + PS:coordinates = "lon lat" ; + PS:cell_measures = "area: cell_area" ; + float lon(cell) ; + lon:long_name = "longitude" ; + lon:units = "degrees_east" ; + lon:bounds="lon_vertices" ; + float lat(cell) ; + lat:long_name = "latitude" ; + lat:units = "degrees_north" ; + lat:bounds="lat_vertices" ; + float time(time) ; + time:long_name = "time" ; + time:units = "days since 1979-01-01 0:0:0" ; + float cell_area(cell) ; + cell_area:long_name = "area of grid cell" ; + cell_area:standard_name="cell_area" ; + cell_area:units = "m2" ; + float lon_vertices(cell,nv) ; + float lat_vertices(cell,nv) ; +} \ No newline at end of file diff --git a/test/cf/7_5_methods_applied_to_a_timeseries.ncgen b/test/cf/7_5_methods_applied_to_a_timeseries.ncgen new file mode 100644 index 000000000..6de58d848 --- /dev/null +++ b/test/cf/7_5_methods_applied_to_a_timeseries.ncgen @@ -0,0 +1,27 @@ +netcdf methods_applied_to_a_timeseries { +dimensions: + time = UNLIMITED; // (5 currently) + station = 10; + nv = 2; +variables: + float pressure(time,station); + pressure:long_name = "pressure"; + pressure:units = "kPa"; + pressure:cell_methods = "time: point"; + float maxtemp(time,station); + maxtemp:long_name = "temperature"; + maxtemp:units = "K"; + maxtemp:cell_methods = "time: maximum"; + float ppn(time,station); + ppn:long_name = "depth of water-equivalent precipitation"; + ppn:units = "mm"; + ppn:cell_methods = "time: sum"; + double time(time); + time:long_name = "time"; + time:units = "hours since 1998-4-19 6:0:0"; + time:bounds = "time_bnds"; + double time_bnds(time,nv); +data: + time = 0., 12., 24., 36., 48.; + time_bnds = -12.,0., 0.,12., 12.,24., 24.,36., 36.,48.; +} \ No newline at end of file diff --git a/test/cf/7_6_surface_air_temperature_variance.ncgen b/test/cf/7_6_surface_air_temperature_variance.ncgen new file mode 100644 index 000000000..9e7104744 --- /dev/null +++ b/test/cf/7_6_surface_air_temperature_variance.ncgen @@ -0,0 +1,19 @@ +netcdf surface_air_temperature_variance { +dimensions: + lat=90; + lon=180; + time=1; + nv=2; +variables: + float TS_var(time,lat,lon); + TS_var:long_name="surface air temperature variance"; + TS_var:units="K2"; + TS_var:cell_methods="time: variance (interval: 1 hr comment: sampled instantaneously)"; + float time(time); + time:units="days since 1990-01-01 00:00:00"; + time:bounds="time_bnds"; + float time_bnds(time,nv); +data: + time=.5; + time_bnds=0.,1.; +} \ No newline at end of file diff --git a/test/cf/7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea.ncgen b/test/cf/7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea.ncgen new file mode 100644 index 000000000..3fb4364b3 --- /dev/null +++ b/test/cf/7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea.ncgen @@ -0,0 +1,17 @@ +netcdf mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea { +dimensions: + lat=73; + lon=96; + maxlen=20; + ls=2; +variables: + float surface_temperature(lat,lon); + surface_temperature:cell_methods="area: mean where land"; + float surface_upward_sensible_heat_flux(ls,lat,lon); + surface_upward_sensible_heat_flux:coordinates="land_sea"; + surface_upward_sensible_heat_flux:cell_methods="area: mean where land_sea"; + char land_sea(ls,maxlen); + land_sea:standard_name="area_type"; +data: + land_sea="land","sea"; +} \ No newline at end of file diff --git a/test/cf/7_9_climatological_seasons.ncgen b/test/cf/7_9_climatological_seasons.ncgen new file mode 100644 index 000000000..105dfec61 --- /dev/null +++ b/test/cf/7_9_climatological_seasons.ncgen @@ -0,0 +1,22 @@ +netcdf climatological_seasons { +dimensions: + time=4; + nv=2; + lat=10; // added + lon=10; // added +variables: + float temperature(time,lat,lon); + temperature:long_name="surface air temperature"; + temperature:cell_methods="time: minimum within years time: mean over years"; + temperature:units="K"; + double time(time); + time:climatology="climatology_bounds"; + time:units="days since 1960-1-1"; + double climatology_bounds(time,nv); +data: // time coordinates translated to datetime format + time="1960-4-16", "1960-7-16", "1960-10-16", "1961-1-16" ; + climatology_bounds="1960-3-1", "1990-6-1", + "1960-6-1", "1990-9-1", + "1960-9-1", "1990-12-1", + "1960-12-1", "1991-3-1" ; +} \ No newline at end of file diff --git a/test/sources/cf.jl b/test/sources/cf.jl index d11518f94..52668569f 100644 --- a/test/sources/cf.jl +++ b/test/sources/cf.jl @@ -1,3 +1,5 @@ +using Rasters, NCDatasets +using NCDatasets.NetCDF_jll using NearestNeighbors # Build all ncgen files cfdir = joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "cf") @@ -11,18 +13,23 @@ test_paths = map(filter(endswith(".ncgen"), readdir(cfdir))) do input_name end rasters = map(test_paths) do test_path name = splitext(basename(test_path))[1] - try - name => Raster(test_path; lazy=true) - catch - name => nothing - end + name == "5_1_independent_coordinate_variables" && return name => nothing + name => Raster(test_path; lazy=true) end -rasters[1][2] -rasterstacks = map(test_paths) do test_path +rasterstacks = map(enumerate(test_paths)) do (i, test_path) name = splitext(basename(test_path))[1] + println(i, " => ", name) name == "5_1_independent_coordinate_variables" && return name => nothing - name == "5_10_british_national_grid" && return name => nothing - println(name) name => RasterStack(test_path; lazy=true) end ; -read(last.(rasterstacks)[1].str_variable) \ No newline at end of file +st = last.(rasterstacks)[12] +st = last.(rasterstacks)[14] +refdims(st) +inds = findall(.!(isempty.(refdims.(last.(rasterstacks))))) +refdims(last.(rasterstacks[inds])[1]) + +for (k, r) in rasters + println(k) + display(r) + readline() +end \ No newline at end of file From 173eaa24693361e236a2a50b1308f8111c7fc47a Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 14 May 2025 11:32:50 -0400 Subject: [PATCH 14/43] Update make.jl --- docs/make.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index a398dc075..2737a4134 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -52,8 +52,7 @@ makedocs( # Enable logging to console again Logging.disable_logging(Logging.BelowMinLevel) -deploydocs(; repo="github.com/rafaqz/Rasters.jl", - target = "build", # this is where Vitepress stores its output +DocumenterVitepress.deploydocs(; repo="github.com/rafaqz/Rasters.jl", branch = "gh-pages", devbranch = "main", push_preview = true From 014a2a5f693a101e6df60a50e49156730c9ac7c2 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 14 May 2025 15:13:52 -0400 Subject: [PATCH 15/43] add my DV branch for now --- docs/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Project.toml b/docs/Project.toml index a23685342..0b4d22efa 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -32,3 +32,4 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [sources] SpeciesDistributionModels = {url = "https://github.com/tiemvanderdeure/SpeciesDistributionModels.jl/"} Rasters = { path = ".." } +DocumenterVitepress = { url = "https://github.com/LuxDL/DocumenterVitepress.jl", rev = "as/fix-no-config" } From f25285cb65af5a0c95ccc0b4b450458d14297323 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 14 May 2025 15:47:20 -0400 Subject: [PATCH 16/43] REMOVE BEFORE MERGING: draft=true for faster iteration --- docs/make.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/make.jl b/docs/make.jl index 2737a4134..75d555470 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -47,6 +47,7 @@ makedocs( source = "src", build = "build", warnonly=true, + draft = true ) # Enable logging to console again From fcc6e13625e0d6ea14fd4b019c74e1e518bafe49 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 14 May 2025 16:00:42 -0400 Subject: [PATCH 17/43] Update make.jl --- docs/make.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index 75d555470..95a5c1f6a 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -43,7 +43,6 @@ makedocs( devbranch = "main", devurl = "dev"; ), - draft = false, source = "src", build = "build", warnonly=true, From 63c088c6fde793cf07626ada20991dea81e79400 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 14 May 2025 17:25:41 -0400 Subject: [PATCH 18/43] Update docs/Project.toml --- docs/Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 0b4d22efa..a23685342 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -32,4 +32,3 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [sources] SpeciesDistributionModels = {url = "https://github.com/tiemvanderdeure/SpeciesDistributionModels.jl/"} Rasters = { path = ".." } -DocumenterVitepress = { url = "https://github.com/LuxDL/DocumenterVitepress.jl", rev = "as/fix-no-config" } From 43622e95ed240ddc50beca2848a5643427617c79 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 17 May 2025 18:18:31 +0200 Subject: [PATCH 19/43] more cf --- src/create.jl | 1 + src/filearray.jl | 13 +++++ src/filestack.jl | 8 ++- src/lookup.jl | 35 ++++++++++--- src/modifieddiskarray.jl | 2 + src/sources/commondatamodel.jl | 50 +++++++++++++++--- src/stack.jl | 2 +- test/sources/cf.jl | 92 +++++++++++++++++++++++++--------- 8 files changed, 163 insertions(+), 40 deletions(-) diff --git a/src/create.jl b/src/create.jl index 77ec2a7c6..227360c41 100644 --- a/src/create.jl +++ b/src/create.jl @@ -248,6 +248,7 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; layerdims=nokw, layermetadata=nokw, f=identity, + lazy=false, kw... ) layerdims = isnokw(layerdims) ? map(_ -> basedims(dims), types) : layerdims diff --git a/src/filearray.jl b/src/filearray.jl index 0ea57470c..3bfead455 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -49,6 +49,19 @@ function FileArray{S}( T = _mod_eltype(var, mod) return FileArray{S,T,N}(filename, size(var); eachchunk, haschunks, mod, kw...) end +function FileArray{S}( + var::AbstractArray{Char,N}, filename; mod, kw... +) where {S<:CDMsource,N} + @show "here" + eachchunk = DA.eachchunk(var) + haschunks = DA.haschunks(var) + if Missings.nonmissingtype(eltype(mod)) isa AbstractString + return FileArray{S,eltype(mod),N-1}(filename, size(var); eachchunk, haschunks, mod, kw...) + else + T = _mod_eltype(var, mod) + return FileArray{S,T,N}(filename, size(var); eachchunk, haschunks, mod, kw...) + end +end # FileArray has S, T and N parameters not recoverable from fields ConstructionBase.constructorof(::Type{<:FileArray{S,T,N}}) where {S,T,N} = FileArray{S,T,N} diff --git a/src/filestack.jl b/src/filestack.jl index a4be989a1..29f216214 100644 --- a/src/filestack.jl +++ b/src/filestack.jl @@ -31,13 +31,16 @@ function FileStack{source}(ds::AbstractDataset, filename::AbstractString; mods, ) where {source,N} T = NamedTuple{name,Tuple{map(_mod_eltype, vars, mods)...}} - layersizes = map(size, vars) + layersizes = map(v -> _source_size(source(), v), vars) eachchunk = map(DiskArrays.eachchunk, vars) haschunks = map(DiskArrays.haschunks, vars) group = isnokw(group) ? nothing : group return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, mods, write) end +_source_size(::Source, v::AbstractArray) = size(v) +_source_size(::CDMsource, v::AbstractArray{Char}) = size(v)[2:end] + # FileStack has `S,Na,T` parameters that are not recoverable from fields. ConstructionBase.constructorof(::Type{<:FileStack{S,Na,T}}) where {S,Na,T} = FileStack{S,Na,T} @@ -61,7 +64,8 @@ function Base.getindex(fs::FileStack{S,Na,T}, name::Symbol) where {S,Na,T} haschunks = fs.haschunks[i] mod = fs.mods[i] N = length(size) - return FileArray{S,_itype(T, i),N}(filename(fs), size, name, fs.group, eachchunk, haschunks, mod, fs.write) + T1 = _itype(T, i) + return FileArray{S,T1,N}(filename(fs), size, name, fs.group, eachchunk, haschunks, mod, fs.write) end @inline _itype(::Type{<:NamedTuple{<:Any,T}}, i) where T = T.parameters[i] diff --git a/src/lookup.jl b/src/lookup.jl index cb4f5d4f3..f3027d784 100644 --- a/src/lookup.jl +++ b/src/lookup.jl @@ -127,7 +127,9 @@ Fields and behaviours are identical to [`Sampled`]($DDsampleddocs) with the addi The mapped dimension index will be used as for [`Sampled`]($DDsampleddocs), but to save in another format the underlying `crs` may be used to convert it. """ -struct Mapped{T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC<:Union{GeoFormat,Nothing},MC<:Union{GeoFormat,Nothing},D<:Union{AutoDim,Dimension}} <: AbstractProjected{T,O,Sp,Sa} +struct Mapped{ + T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC<:Union{GeoFormat,Nothing},MC<:Union{GeoFormat,Nothing},D<:Union{AutoDim,Dimension},Ds<:Union{Nothing,Tuple},DD +} <: AbstractProjected{T,O,Sp,Sa} data::A order::O span::Sp @@ -136,28 +138,47 @@ struct Mapped{T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC<:Union crs::PC mappedcrs::MC dim::D + dims::Ds + dimdata::DD end + +# dimdata = ( +# matrix +# tree +# idxvec +# distvec +# ) +# dims(lookup::Mapped) = lookup.dims +# dim(lookup::ArrayLookup) = lookup.dim +# matrix(l::ArrayLookup) = l.matrix +# tree(l::ArrayLookup) = l.tree + function Mapped(data=AutoValues(); order=AutoOrder(), span=AutoSpan(), sampling=AutoSampling(), metadata=NoMetadata(), crs::Union{GeoFormat,Nothing,NoKW}=nokw, mappedcrs::Union{GeoFormat,Nothing,NoKW}, - dim=AutoDim() + dim=AutoDim(), + dims=nothing, + mapdata=nothing, ) crs = crs isa NoKW ? nothing : crs mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs - Mapped(data, order, span, sampling, metadata, crs, mappedcrs, dim) + Mapped(data, order, span, sampling, metadata, crs, mappedcrs, dim, dims, mapdata) end function Mapped(l::Sampled; order=order(l), span=span(l), sampling=sampling(l), metadata=metadata(l), crs::Union{GeoFormat,Nothing}=nothing, mappedcrs::Union{GeoFormat,Nothing}, - dim=AutoDim() + dim=AutoDim(), + dims=nothing, + mapdata=nothing, ) - Mapped(parent(l), order, span, sampling, metadata, crs, mappedcrs, dim) + Mapped(parent(l), order, span, sampling, metadata, crs, mappedcrs, dim, dims, mapdata) end +DD.hasmultipledimensions(lookup::Mapped) = true GeoInterface.crs(lookup::Mapped) = lookup.crs mappedcrs(lookup::Mapped) = lookup.mappedcrs dim(lookup::Mapped) = lookup.dim @@ -198,6 +219,8 @@ function convertlookup(::Type{<:Mapped}, l::Projected) crs=crs(l), mappedcrs=mappedcrs(l), dim=dim(l), + dims=nothing, + mapdata=nothing, ) end function convertlookup(::Type{<:Projected}, l::Mapped) @@ -280,4 +303,4 @@ _projectedindex(lookup::Mapped, dim::Dimension) = _projectedindex(crs(lookup), l _projectedindex(crs::Nothing, lookup::Mapped, dim::Dimension) = error("No projection crs attached to $(name(dim)) dimension") _projectedindex(crs::GeoFormat, lookup::Mapped, dim::Dimension) = - reproject(mappedcrs(dim), crs, dim, index(dim)) + reproject(mappedcrs(dim), crs, dim, index(dim)) \ No newline at end of file diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 4628e37bb..3deb12362 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -66,6 +66,8 @@ end _maybe_modify(A::AbstractArray, mod::AbstractModifications) = ModifiedDiskArray(A, mod) _maybe_modify(A::AbstractArray, ::Nothing) = A +_maybe_modify(A::AbstractArray{Char}, mod::Mod{<:Union{Missing,String}}) = + ModifiedDiskArray(DiskCharToString(A), mod) filename(A::ModifiedDiskArray) = filename(parent(A)) missingval(A::ModifiedDiskArray) = A.missingval diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 08d7ee5b3..481d44c03 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -89,6 +89,7 @@ end # Mode to open file in - read or append openmode(write::Bool) = write ? "a" : "r" +missingval(var::AbstractArray, md::Metadata{<:CDMsource}) = missingval(md) missingval(var::CDM.AbstractVariable, md::Metadata{<:CDMsource}) = missingval(md) missingval(var::CDM.AbstractVariable, args...) = @@ -127,6 +128,25 @@ end _open(f, ::CDMsource, var::AbstractArray; mod=NoMod(), kw...) = cleanreturn(f(_maybe_modify(var, mod))) +# Converts Char arrays to String folowing CF chapter 2 +struct DiskCharToString{N,M} <: DiskArrays.AbstractDiskArray{String,N} + parent::AbstractArray{Char,M} +end +DiskCharToString(A::AbstractArray{Char,M}) where M = DiskCharToString{M-1,M}(A) + +Base.parent(A::DiskCharToString) = A.parent +Base.size(A::DiskCharToString) = size(parent(A))[2:end] +DiskArrays.haschunks(A::DiskCharToString) = DiskArrays.haschunks(parent(A)) +function DiskArrays.readblock!(A::DiskCharToString, dest, I...) + src = parent(A)[:, I...] + for I in CartesianIndices(dest) + dest[I] = _chars_to_string(view(src, :, I)) + end + return dest +end + +_chars_to_string(chars) = String(strip(join(chars), '\0')) + # This allows arbitrary group nesting _getgroup(ds, ::Union{Nothing,NoKW}) = ds _getgroup(ds, group::Union{Symbol,AbstractString}) = ds.group[String(group)] @@ -308,6 +328,10 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) grid_mapping_key = pop!(attr, "grid_mapping") get!(() -> _cf_crs(ds, grid_mapping_key), crs_dict, grid_mapping_key) end + if eltype(var) <: Char && ndims(var) > 1 + # Remove the char dimension + layer_dimnames_vec[i] = dimnames = dimnames[2:end] + end # Loop over the dimensions of this layer, adding missing dims to dim_dict output_layerdims_vec[i] = map(dimnames) do dimname dim = get!(dim_dict, dimname) do @@ -391,13 +415,13 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na # span = Explicit(c) # Cyclic(data; cycle, order, sampling, span, bounds) # end - if haskey(ds, dimname) && (isempty(coord_names) || dimname in coord_names) - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) - # The dimension has a matching variable, and it's one of the coordinates - D = _cdm_dimtype(dim_attrs, dimname) - lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) - return D(lookup) - end + # if haskey(ds, dimname) && (isempty(coord_names) || dimname in coord_names) + # dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) + # # The dimension has a matching variable, and it's one of the coordinates + # D = _cdm_dimtype(dim_attrs, dimname) + # lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) + # return D(lookup) + # end # No matching variable was found, so we have something more complicated # Get all the coordinates for this dimension dimension_coord_names = filter(coord_names) do coordname @@ -910,6 +934,12 @@ function _cf_crs(ds, grid_mapping_key::String) elseif haskey(grid_mapping_attrib, "proj4string") ProjString(grid_mapping_attrib["proj4string"]) else + if haskey(grid_mapping_attrib, "grid_mapping_name") + grid_mapping_name = grid_mapping_attrib["grid_mapping_name"] + if grid_mapping_name == "latitude_longitude" + return EPSG(4326) + end + end nothing # unparseable - TODO get CFProjections.jl to do it. end end @@ -938,6 +968,10 @@ _get_attr!(ds, var_dict, attr_dict, key) = CDM.attribs(var) end +_eltype(::Source, var) = eltype(var) +_eltype(::CDMsource, var::AbstractArray) = eltype(var) +_eltype(::CDMsource, var::AbstractArray{Char}) = String + # TODO just the core of this function here function _raster(ds::CDM.AbstractDataset; dims=nokw, @@ -979,7 +1013,7 @@ function _raster(ds::CDM.AbstractDataset; pop!(metadata_out, "_FillValue", nothing) pop!(metadata_out, "missing_value", nothing) # Generate mod for scaling - mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval_out; scaled, coerce) : mod + mod = isnokw(mod) ? _mod(_eltype(source, var), metadata_out, missingval_out; scaled, coerce) : mod # Define or load the data array data_out = if lazy # Define a lay FileArray diff --git a/src/stack.jl b/src/stack.jl index a4746bf60..92dbf7e7d 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -489,7 +489,7 @@ function RasterStack(ds; layer_mvs = map(Rasters.missingval, layers_vec, layermetadata_vec) _missingval_vec(missingval, layer_mvs, name) end - eltype_vec = map(eltype, layers_vec) + eltype_vec = map(l -> _eltype(source, l), layers_vec) mod_vec = _stack_mods(eltype_vec, layermetadata_vec, missingval_vec; scaled, coerce) data = if lazy vars = ntuple(i -> layers_vec[i], length(name)) diff --git a/test/sources/cf.jl b/test/sources/cf.jl index 52668569f..3ca715504 100644 --- a/test/sources/cf.jl +++ b/test/sources/cf.jl @@ -1,9 +1,13 @@ using Rasters, NCDatasets using NCDatasets.NetCDF_jll using NearestNeighbors +using OrderedCollections +using Rasters.Lookups +using Test + # Build all ncgen files cfdir = joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "cf") -test_paths = map(filter(endswith(".ncgen"), readdir(cfdir))) do input_name +example_paths = map(filter(endswith(".ncgen"), readdir(cfdir))) do input_name input_path = joinpath(cfdir, input_name) output_path = joinpath(cfdir, "..", "data", "cf", splitext(input_name)[1] * ".nc") NetCDF_jll.ncgen() do exe @@ -11,25 +15,67 @@ test_paths = map(filter(endswith(".ncgen"), readdir(cfdir))) do input_name end output_path end -rasters = map(test_paths) do test_path - name = splitext(basename(test_path))[1] - name == "5_1_independent_coordinate_variables" && return name => nothing - name => Raster(test_path; lazy=true) -end -rasterstacks = map(enumerate(test_paths)) do (i, test_path) - name = splitext(basename(test_path))[1] - println(i, " => ", name) - name == "5_1_independent_coordinate_variables" && return name => nothing - name => RasterStack(test_path; lazy=true) -end ; -st = last.(rasterstacks)[12] -st = last.(rasterstacks)[14] -refdims(st) -inds = findall(.!(isempty.(refdims.(last.(rasterstacks))))) -refdims(last.(rasterstacks[inds])[1]) - -for (k, r) in rasters - println(k) - display(r) - readline() -end \ No newline at end of file +# rasters = map(example_paths) do example_path +# name = splitext(basename(example_path))[1] +# name == "5_1_independent_coordinate_variables" && return name => nothing +# name => Raster(example_path; lazy=true) +# end +# rasterstacks = map(enumerate(example_paths)) do (i, example_path) +# name = splitext(basename(example_path))[1] +# println(i, " => ", name) +# name == "5_1_independent_coordinate_variables" && return name => nothing +# name => RasterStack(example_path; lazy=true) +# end ; +# st = last.(rasterstacks)[12] +# st = last.(rasterstacks)[14] +# refdims(st) +# inds = findall(.!(isempty.(refdims.(last.(rasterstacks))))) +# refdims(last.(rasterstacks[inds])[1]) + +# for (k, r) in rasters +# println(k) +# display(r) +# readline() +# end + +example_ids = replace.(strip.(first.(split.(basename.(example_paths), r"[A-Za-z]")), '_'), ("_" => ".",)) +examples = OrderedDict(example_ids .=> example_paths) + +@testset "2.1" begin + rast = RasterStack(examples["2.1"]) + @test rast.char_variable == rast.str_variable + # Also works lazily + @test read(RasterStack(examples["2.1"]; lazy=true).char_variable) == + read(RasterStack(examples["2.1"]; lazy=true)).char_variable == + rast.str_variable + # Fails due to DiskArrays.jl size checks during the comparison iteration + @test_broken RasterStack(examples["2.1"]; lazy=true).char_variable == rast.str_variable +end + +@testset "3.1" begin + rast = RasterStack(examples["3.1"]) + md_os = metadata(rast[:Tonscale]) + md_diff = metadata(rast[:Tdifference]) + @test md_os["units"] == md_diff["units"] == "degC" + @test md_os["cell_methods"] == md_diff["cell_methods"] == "area: mean" + @test md_os["standard_name"] == md_diff["standard_name"] == "surface_temperature" + + @test md_os["long_name"] == "global-mean surface temperature" + @test md_diff["long_name"] == "change in global-mean surface temperature relative to pre-industrial" + @test md_os["units_metadata"] == "temperature: on_scale" + @test md_diff["units_metadata"] == "temperature: difference" +end + +@testset "5.1" begin + rast = RasterStack(examples["5.1"]) + @test dims(rast) == (X(NoLookup(1:36)), Y(NoLookup(1:18)), Dim{:pres}(NoLookup(1:15)), Ti(NoLookup(1:4))) + @test metadata(rast.xwind)["long_name"] == "zonal wind" + @test metadata(rast.xwind)["units"] == "m/s" + # TODO add real vars to the file so there is also dimension metadata +end + +rast = RasterStack(examples["5.2"]) +rast = RasterStack(examples["5.3"]) +rast = RasterStack(examples["5.6"]) +rast = RasterStack(examples["5.8"]) + From e21dbeaa6368c96cbd80ad39da2f44ca66031437 Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Sun, 18 May 2025 01:14:18 +0300 Subject: [PATCH 20/43] warnonly=false and import packages (#954) --- docs/make.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index a398dc075..56ebe4228 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -3,6 +3,7 @@ using Documenter, Rasters, Plots, Logging, Statistics, Dates, import Makie, CairoMakie using DocumenterVitepress using Rasters.LookupArrays, Rasters.Dimensions +import Shapefile, DataFrames, NaturalEarth # to avoid precompilation in doctests # Don't output huge svgs for Makie plots CairoMakie.activate!(type = "png") @@ -46,7 +47,7 @@ makedocs( draft = false, source = "src", build = "build", - warnonly=true, + warnonly=false, ) # Enable logging to console again From 41e74363ef0e78f555bd14a3653b8759f69eded6 Mon Sep 17 00:00:00 2001 From: Alex Gardner Date: Thu, 22 May 2025 11:14:06 -0700 Subject: [PATCH 21/43] move logo to top right --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index be1eb1d86..c69fab9dc 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Rasters - + [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/rafaqz/Rasters.jl/blob/main/LICENSE) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://rafaqz.github.io/Rasters.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://rafaqz.github.io/Rasters.jl/dev) @@ -8,8 +8,6 @@ [![Aqua.jl Quality Assurance](https://img.shields.io/badge/Aquajl-%F0%9F%8C%A2-aqua.svg)](https://github.com/JuliaTesting/Aqua.jl) [![Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/Rasters&label=Downloads)](https://pkgs.genieframework.com?packages=Rasters) - - [Rasters.jl](https://rafaqz.github.io/Rasters.jl/dev) defines common types and methods for reading, writing and manipulating rasterized spatial data. From 030e3e44a84dfbb198a8e8eada5736422ab13512 Mon Sep 17 00:00:00 2001 From: Alex Gardner Date: Thu, 22 May 2025 11:15:09 -0700 Subject: [PATCH 22/43] add new aqua logo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c69fab9dc..3522c1af5 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://rafaqz.github.io/Rasters.jl/dev) [![CI](https://github.com/rafaqz/Rasters.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/rafaqz/Rasters.jl/actions/workflows/ci.yml) [![Codecov](https://codecov.io/gh/rafaqz/Rasters.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/rafaqz/Rasters.jl) -[![Aqua.jl Quality Assurance](https://img.shields.io/badge/Aquajl-%F0%9F%8C%A2-aqua.svg)](https://github.com/JuliaTesting/Aqua.jl) +[![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) [![Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/Rasters&label=Downloads)](https://pkgs.genieframework.com?packages=Rasters) [Rasters.jl](https://rafaqz.github.io/Rasters.jl/dev) defines common types and methods for reading, writing and From fddd0ebf42d756348d7116db7f31fe92b7e9a532 Mon Sep 17 00:00:00 2001 From: Alex Gardner Date: Thu, 22 May 2025 11:16:11 -0700 Subject: [PATCH 23/43] remove broken downloads --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 3522c1af5..00a11b421 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ [![CI](https://github.com/rafaqz/Rasters.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/rafaqz/Rasters.jl/actions/workflows/ci.yml) [![Codecov](https://codecov.io/gh/rafaqz/Rasters.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/rafaqz/Rasters.jl) [![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) -[![Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/Rasters&label=Downloads)](https://pkgs.genieframework.com?packages=Rasters) [Rasters.jl](https://rafaqz.github.io/Rasters.jl/dev) defines common types and methods for reading, writing and manipulating rasterized spatial data. From 64e40443d0135bbf591d983aa71b79e734aa5f10 Mon Sep 17 00:00:00 2001 From: Alex Gardner Date: Thu, 22 May 2025 11:41:28 -0700 Subject: [PATCH 24/43] modify text --- README.md | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 00a11b421..a93c196ca 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,19 @@ [![Codecov](https://codecov.io/gh/rafaqz/Rasters.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/rafaqz/Rasters.jl) [![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) -[Rasters.jl](https://rafaqz.github.io/Rasters.jl/dev) defines common types and methods for reading, writing and -manipulating rasterized spatial data. +[Rasters.jl](https://rafaqz.github.io/Rasters.jl/dev) is a powerful Julia package for working with spatial raster data. It provides a unified interface for reading, writing, and manipulating raster data. The package extends [DimensionalData.jl](https://rafaqz.github.io/DimensionalData.jl/dev/) to enable intuitive spatial indexing and manipulation of raster data. -These currently include raster arrays like GeoTIFF and NetCDF, R grd files, -multi-layered stacks, and multi-file series of arrays and stacks. +Key features: +- Support for multiple raster formats (e.g. GeoTIFF, NetCDF, GRD) +- Support for multi-layered stacks and multi-file series of arrays +- Lazy loading of large datasets +- Intuitive spatial indexing with named dimensions (X, Y, Time) +- Efficient handling of multi-layered stacks and time series +- Built-in support for coordinate reference systems (CRS) +- High-performance operations optimized for spatial data # Quick start + Install the package by typing: ```julia @@ -28,7 +34,7 @@ using Rasters Using `Rasters` to read GeoTiff or NetCDF files will output something similar to the following toy examples. This is possible because Rasters.jl extends -[DimensionalData.jl](https://github.com/rafaqz/DimensionalData.jl) so that +[DimensionalData.jl](https://rafaqz.github.io/DimensionalData.jl/dev/) so that spatial data can be indexed using named dimensions like `X`, `Y` and `Ti` (time) and e.g. spatial coordinates. @@ -38,7 +44,7 @@ lon, lat = X(25:1:30), Y(25:1:30) ti = Ti(DateTime(2001):Month(1):DateTime(2002)) ras = Raster(rand(lon, lat, ti)) # this generates random numbers with the dimensions given ``` -``` +```julia 6×6×13 Raster{Float64,3} with dimensions: X Sampled{Int64} 25:1:30 ForwardOrdered Regular Points, Y Sampled{Int64} 25:1:30 ForwardOrdered Regular Points, @@ -61,7 +67,7 @@ Rasters reduces its dependencies to keep the `using` time low. But, it means you have to manually load packages you need for each backend or additional functionality. -For example, to use the GDAL backend, and download RasterDataSources files, you now need to do: +For example, to use the GDAL backend, and download RasterDataSources files, you need to do: ```julia using Rasters, ArchGDAL, RasterDataSources @@ -77,7 +83,7 @@ Sources and packages needed: Other functionality in extensions: - Raster data downloads, like `Worldclim{Climate}`: `using RasterDataSources` - Makie plots: `using GLMakie` (opengl interactive) or `using CairoMakie` (print) etc. -- Coordinate transformations for gdal rasters: `using CoordinateTransformations` +- Coordinate transformations for GDAL rasters: `using CoordinateTransformations` ## Getting the `lookup` array from dimensions @@ -97,7 +103,7 @@ Selecting a time slice by `index` is done via ```julia ras[Ti(1)] ``` -``` +```julia 6×6 Raster{Float64,2} with dimensions: X Sampled{Int64} 25:1:30 ForwardOrdered Regular Points, Y Sampled{Int64} 25:1:30 ForwardOrdered Regular Points @@ -117,7 +123,7 @@ values: 25 26 27 28 29 30 ```julia ras[Ti=1] ``` -``` +```julia 6×6 Raster{Float64,2} with dimensions: X Sampled{Int64} 25:1:30 ForwardOrdered Regular Points, Y Sampled{Int64} 25:1:30 ForwardOrdered Regular Points @@ -139,7 +145,7 @@ or and interval of indices using the syntax `=a:b` or `(a:b)` ```julia ras[Ti(1:10)] ``` -``` +```julia 6×6×10 Raster{Float64,3} with dimensions: X Sampled{Int64} 25:1:30 ForwardOrdered Regular Points, Y Sampled{Int64} 25:1:30 ForwardOrdered Regular Points, @@ -162,7 +168,7 @@ values: [:, :, 1] ```julia ras[Ti=At(DateTime(2001))] ``` -``` +```julia 6×6 Raster{Float64,2} with dimensions: X Sampled{Int64} 25:1:30 ForwardOrdered Regular Points, Y Sampled{Int64} 25:1:30 ForwardOrdered Regular Points From eb7b5acb354332bc8b7a38b3d4e7a216b01ca854 Mon Sep 17 00:00:00 2001 From: Alex Gardner Date: Fri, 23 May 2025 08:49:14 -0700 Subject: [PATCH 25/43] fix Downloads badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a93c196ca..44aecb047 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![CI](https://github.com/rafaqz/Rasters.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/rafaqz/Rasters.jl/actions/workflows/ci.yml) [![Codecov](https://codecov.io/gh/rafaqz/Rasters.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/rafaqz/Rasters.jl) [![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) +[![Downloads](https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Fmonthly_downloads%2FDataFrames&query=total_requests&suffix=%2Fmonth&label=Downloads)](https://juliapkgstats.com/pkg/Rasters) [Rasters.jl](https://rafaqz.github.io/Rasters.jl/dev) is a powerful Julia package for working with spatial raster data. It provides a unified interface for reading, writing, and manipulating raster data. The package extends [DimensionalData.jl](https://rafaqz.github.io/DimensionalData.jl/dev/) to enable intuitive spatial indexing and manipulation of raster data. From 2cbda6d53ccb353fd73b7d1c4adfd4a9fa8ad6fd Mon Sep 17 00:00:00 2001 From: Alex Gardner Date: Fri, 23 May 2025 08:50:53 -0700 Subject: [PATCH 26/43] fix downloads badge request --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 44aecb047..f69bb8da9 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![CI](https://github.com/rafaqz/Rasters.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/rafaqz/Rasters.jl/actions/workflows/ci.yml) [![Codecov](https://codecov.io/gh/rafaqz/Rasters.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/rafaqz/Rasters.jl) [![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) -[![Downloads](https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Fmonthly_downloads%2FDataFrames&query=total_requests&suffix=%2Fmonth&label=Downloads)](https://juliapkgstats.com/pkg/Rasters) +[![Downloads](https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Fmonthly_downloads%2FRasters&query=total_requests&suffix=%2Fmonth&label=Downloads)](https://juliapkgstats.com/pkg/Rasters) [Rasters.jl](https://rafaqz.github.io/Rasters.jl/dev) is a powerful Julia package for working with spatial raster data. It provides a unified interface for reading, writing, and manipulating raster data. The package extends [DimensionalData.jl](https://rafaqz.github.io/DimensionalData.jl/dev/) to enable intuitive spatial indexing and manipulation of raster data. From 2ff1087dc03d793c95438161adabad547d7cc9d6 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Fri, 23 May 2025 12:19:19 -0400 Subject: [PATCH 27/43] Update docs/make.jl --- docs/make.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index 95a5c1f6a..8b5f2d7ab 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -46,7 +46,6 @@ makedocs( source = "src", build = "build", warnonly=true, - draft = true ) # Enable logging to console again From abc3d02651859d1fdd1ed03514dd0ef4150d87c5 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Fri, 23 May 2025 13:14:48 -0400 Subject: [PATCH 28/43] Update config.mts --- docs/src/.vitepress/config.mts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/src/.vitepress/config.mts b/docs/src/.vitepress/config.mts index 48508e86f..8acbdbc5d 100644 --- a/docs/src/.vitepress/config.mts +++ b/docs/src/.vitepress/config.mts @@ -99,6 +99,9 @@ export default defineConfig({ ], ignoreDeadLinks: false, vite: { + define: { + __DEPLOY_ABSPATH__: JSON.stringify('REPLACE_ME_DOCUMENTER_VITEPRESS_DEPLOY_ABSPATH'), + }, resolve: { alias: { '@': path.resolve(__dirname, '../components') From febbc763153d233058c13d78b2098cdc75cb7111 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 2 Jun 2025 11:36:34 +0200 Subject: [PATCH 29/43] fix lazy in create without filename (#979) * fix lazy in create without filename * fix Raster ambiguity --- src/array.jl | 2 +- src/create.jl | 4 +++- src/stack.jl | 5 +++-- test/create.jl | 8 ++++++++ test/runtests.jl | 2 +- 5 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/array.jl b/src/array.jl index 7dbfa923a..431f7b7ad 100644 --- a/src/array.jl +++ b/src/array.jl @@ -401,4 +401,4 @@ filekey(filename::String) = Symbol(splitext(basename(filename))[1]) # Add a `dimconstructor` method so `AbstractProjected` lookups create a Raster # TODO this should be unwrapped to `DD.lookupconstructor` to avoid future ambiguities -DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{Dimension}}) = Raster \ No newline at end of file +DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{Dimension}}) = Raster diff --git a/src/create.jl b/src/create.jl index 77ec2a7c6..031af64a9 100644 --- a/src/create.jl +++ b/src/create.jl @@ -247,9 +247,11 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; fill=nokw, layerdims=nokw, layermetadata=nokw, + lazy=false, f=identity, kw... ) + lazy && throw(ArgumentError("`lazy` cannot be `true` without passing a `filename` keyword")) layerdims = isnokw(layerdims) ? map(_ -> basedims(dims), types) : layerdims layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, types) layerfill = fill isa NamedTuple ? fill : map(_ -> fill, types) @@ -355,4 +357,4 @@ end _warn_keyword_not_used(label, obj) = @warn "`$label` of `$obj` found. But `chunks` are not used for in-memory rasters" _missingval_pair(missingval::Pair) = missingval -_missingval_pair(missingval) = missingval => missingval \ No newline at end of file +_missingval_pair(missingval) = missingval => missingval diff --git a/src/stack.jl b/src/stack.jl index d738a90c5..ba3843d68 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -293,6 +293,7 @@ function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{AbstractDimArray}}}; metadata=NoMetadata(), layermetadata::NamedTuple{K}=map(DD.metadata, _layers), layerdims::NamedTuple{K}=map(DD.basedims, _layers), + lazy=false, kw... ) where K data = map(parent, _layers) @@ -682,7 +683,7 @@ function RasterStack(dt::AbstractDimTree; keep=nothing) end end # TODO resolve the meaning of Raster(::RasterStack) -Raster(stack::AbstractDimStack) = cat(values(stack)...; dims=Band([keys(stack)...])) +Raster(stack::AbstractDimStack; kw...) = Raster(cat(values(stack)...; dims=Band([keys(stack)...])); kw...) # In DD it would be # Raster(st::AbstractDimStack) = # Raster([st[D] for D in DimIndices(st)]; dims=dims(st), metadata=metadata(st)) @@ -694,4 +695,4 @@ defaultmappedcrs(::Source, crs) = crs defaultmappedcrs(s::Source, ::NoKW) = defaultmappedcrs(s) defaultmappedcrs(::Source) = nothing -check_multilayer_dataset(ds) = throw(ArgumentError("$(typeof(ds)) is not a multilayer raster dataset")) \ No newline at end of file +check_multilayer_dataset(ds) = throw(ArgumentError("$(typeof(ds)) is not a multilayer raster dataset")) diff --git a/test/create.jl b/test/create.jl index 230dfdfd1..6120db504 100644 --- a/test/create.jl +++ b/test/create.jl @@ -128,6 +128,14 @@ end @test Rasters.name(st1) == (:c, :d) @test missingval(st1) === (c=0x00, d=Int16(1)) end + + @testset "lazy must have filename" begin + @test_throws ArgumentError Rasters.create((a=Int32, b=Float64, c=Bool), Extents.Extent(X=(0, 10), Y=(0, 5)); + size=(X=1024, Y=1024), + lazy=true, + ) do st + end + end end ext = ".nc" diff --git a/test/runtests.jl b/test/runtests.jl index 9b897b63f..7d0c85053 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -74,4 +74,4 @@ using ZarrDatasets Aqua.test_unbound_args(ext) Aqua.test_undefined_exports(ext) end -end \ No newline at end of file +end From 815bf5acfe570c336cc763c5254b5d73626ad780 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 2 Jun 2025 11:37:53 +0200 Subject: [PATCH 30/43] bump patch version to 0.14.5 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 210753fee..6232c0318 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Rasters" uuid = "a3a2b9e3-a471-40c9-b274-f788e487c689" authors = ["Rafael Schouten "] -version = "0.14.4" +version = "0.14.5" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From 06ae9f48eddf77e5f6d113270752abb0c4c81b5a Mon Sep 17 00:00:00 2001 From: Tiem van der Deure Date: Wed, 18 Jun 2025 14:45:34 +0200 Subject: [PATCH 31/43] remove unnecessary `replace_missing` (#982) --- docs/src/tutorials/spatial_mean.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/src/tutorials/spatial_mean.md b/docs/src/tutorials/spatial_mean.md index c3cc51606..351b35681 100644 --- a/docs/src/tutorials/spatial_mean.md +++ b/docs/src/tutorials/spatial_mean.md @@ -163,11 +163,10 @@ GCMs = Dim{:gcm}([GFDL_ESM4, IPSL_CM6A_LR]) # These are different general circul precip_future = (@d getfutureprec.(SSPs, GCMs)) |> RasterSeries |> Rasters.combine ```` -Since the format of WorldClim's datasets for future climate is slightly different from the dataset for the historical period, this actually returned a 5-dimensional raster, with a `Band` dimension that represents months. Here we'll just select the 6th month, matching the selection above (but note that the analysis would also work for all Bands simultaneously). We will also replace the `NaN` missing value by the more standard `missing` using [`replace_missing`](@ref). +Since the format of WorldClim's datasets for future climate is slightly different from the dataset for the historical period, this actually returned a 5-dimensional raster, with a `Band` dimension that represents months. Here we'll just select the 6th month, matching the selection above (but note that the analysis would also work for all Bands simultaneously). ````@example cellarea precip_future = precip_future[Band = 6] -precip_future = replace_missing(precip_future) ```` On our 4-dimensional raster, functions like `crop` and `mask`, as well as broadcasting, will still work. From c11690e7351c974144a7031e6d6f399c2901c9ae Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 23 Jun 2025 14:32:15 +0200 Subject: [PATCH 32/43] check if dimtree exists (#985) --- src/stack.jl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/stack.jl b/src/stack.jl index ba3843d68..178a5e8b0 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -673,15 +673,19 @@ end Base.convert(::Type{RasterStack}, src::AbstractDimStack) = RasterStack(src) -# For ambiguity. TODO: remove this method from DD ? -function RasterStack(dt::AbstractDimTree; keep=nothing) - if isnothing(keep) - pruned = DD.prune(dt; keep) - RasterStack(pruned[Tuple(keys(pruned))]) - else - RasterStack(dt[Tuple(keys(dt))]) +@static if :AbstractDimTree in names(DimensionalData) + # For ambiguity. TODO: remove this method from DD ? + function RasterStack(dt::AbstractDimTree; keep=nothing) + if isnothing(keep) + pruned = DD.prune(dt; keep) + RasterStack(pruned[Tuple(keys(pruned))]) + else + RasterStack(dt[Tuple(keys(dt))]) + end end end + + # TODO resolve the meaning of Raster(::RasterStack) Raster(stack::AbstractDimStack; kw...) = Raster(cat(values(stack)...; dims=Band([keys(stack)...])); kw...) # In DD it would be From 284022e427e27007f963c7fd49c6f5e907055438 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 23 Jun 2025 14:33:10 +0200 Subject: [PATCH 33/43] bump patch version to 0.14.6 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6232c0318..fbba93dee 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Rasters" uuid = "a3a2b9e3-a471-40c9-b274-f788e487c689" authors = ["Rafael Schouten "] -version = "0.14.5" +version = "0.14.6" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From 92b95c1e505de6e23bfbe4c58b750a7306a94155 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 5 Jul 2025 09:09:36 +1000 Subject: [PATCH 34/43] more passing cf --- src/Rasters.jl | 1 - src/create.jl | 1 - src/geometry_lookup/geometry_lookup.jl | 145 +++++++++++++- src/geometry_lookup/lookups.jl | 159 --------------- src/geometry_lookup/nc_io.jl | 158 --------------- src/sources/commondatamodel.jl | 49 +++-- ...areas_for_a_sqpherical_geodesic_grid.ncgen | 4 +- ...5_1_independent_coordinate_variables.ncgen | 9 +- ..._indexed_ragged_array_representation.ncgen | 4 +- ...two_dimensional_coordinate_variables.ncgen | 2 +- test/cf/5_3_reduced_horizontal_grid.ncgen | 8 +- ...ard_heat_transport_in_atlantic_ocean.ncgen | 7 +- test/cf/6_2_model_level_numbers.ncgen | 5 +- ...etric_coordinate_variable_has_bounds.ncgen | 2 +- ..._areas_for_a_spherical_geodesic_grid.ncgen | 4 +- ...7_6_surface_air_temperature_variance.ncgen | 2 +- test/cf/landsea.ncgen | 17 -- test/sources/cf.jl | 188 +++++++++++++++--- 18 files changed, 366 insertions(+), 399 deletions(-) delete mode 100644 src/geometry_lookup/lookups.jl delete mode 100644 src/geometry_lookup/nc_io.jl delete mode 100644 test/cf/landsea.ncgen diff --git a/src/Rasters.jl b/src/Rasters.jl index 39b1e2504..bc0378395 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -118,7 +118,6 @@ include("utils.jl") include("skipmissing.jl") include("geometry_lookup/geometry_lookup.jl") -include("geometry_lookup/lookups.jl") include("geometry_lookup/methods.jl") include("geometry_lookup/io.jl") diff --git a/src/create.jl b/src/create.jl index 62491d72e..3f83421d1 100644 --- a/src/create.jl +++ b/src/create.jl @@ -247,7 +247,6 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; fill=nokw, layerdims=nokw, layermetadata=nokw, - lazy=false, f=identity, lazy=false, kw... diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index 54a2bbb0c..3a2b9b09e 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -147,4 +147,147 @@ function DD.rebuild( new_manifold = isnokw(manifold) ? lookup.manifold : manifold return GeometryLookup(new_manifold, Missings.disallowmissing(data), new_tree, dims, new_crs) -end \ No newline at end of file +end + +# Bounds - get the bounds of the lookup +function Lookups.bounds(lookup::GeometryLookup) + if isempty(lookup.data) + Extents.Extent(NamedTuple{DD.name.(lookup.dims)}(ntuple(2) do i; (nothing, nothing); end)) + else + if isnothing(lookup.tree) + mapreduce(GI.extent, Extents.union, lookup.data) + else + GI.extent(lookup.tree) + end + end +end + +# Return an `Int` or Vector{Bool} +# Base case: got a standard index that can go into getindex on a base Array +Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel +# other cases: +# - decompose selectors +function Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) + selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) +end +function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K + dimsel = map(rebuild, map(name2dim, K), values(sel)) + selectindices(lookup, dimsel) +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) + if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) + i = findfirst(x -> all(map(Dimensions._matches, sel, x)), lookup) + isnothing(i) && _coord_not_found_error(sel) + return i + else + return [Dimensions._matches(sel, x) for x in lookup] + end +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + filter(potential_candidates) do candidate + GO.contains(lookup.data[candidate], val(sel)) + end +end +function Lookups.selectindices(lookup::GeometryLookup, sel::At) + if GI.trait(val(sel)) isa GI.PointTrait + Lookups.selectindices(lookup, (At(GI.x(val(sel))), At(GI.y(val(sel))))) + else # invoke the default method + Lookups.at(lookup, sel) + end +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Near) + geom = val(sel) + @assert GI.isgeometry(geom) + # TODO: temporary + @assert GI.trait(geom) isa GI.PointTrait "Only point geometries are supported for the near lookup at this point! We will add more geometry support in the future." + + # Get the nearest geometry + # TODO: this sucks! Use some branch and bound algorithm + # on the spatial tree instead. + # if pointtrait + return findmin(x -> GO.distance(geom, x), lookup.data)[2] + # else + # findmin(x -> GO.distance(GO.GEOS(), geom, x), lookup.data)[2] + # end + # this depends on LibGEOS being installed. + +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) + sel_ext = GI.extent(val(sel)) + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + return filter(potential_candidates) do candidate + GO.intersects(lookup.data[candidate], val(sel)) + end +end +function Lookups.selectindices( + lookup::GeometryLookup, + (xs, ys)::Tuple{Union{<:Touches}, Union{<:Touches}} +) + target_ext = Extents.Extent(X = (first(xs), last(xs)), Y = (first(ys), last(ys))) + potential_candidates = _maybe_get_candidates(lookup, target_ext) + return filter(potential_candidates) do candidate + GO.intersects(lookup.data[candidate], target_ext) + end +end +function Lookups.selectindices( + lookup::GeometryLookup, + (xs, ys)::Tuple{Union{<:DD.IntervalSets.ClosedInterval},Union{<:DD.IntervalSets.ClosedInterval}} +) + target_ext = Extents.Extent(X = extrema(xs), Y = extrema(ys)) + potential_candidates = _maybe_get_candidates(lookup, target_ext) + filter(potential_candidates) do candidate + GO.covers(target_ext, lookup.data[candidate]) + end +end +function Lookups.selectindices( + lookup::GeometryLookup, + (xs, ys)::Tuple{Union{<:At,<:Contains}, Union{<:At,<:Contains}} +) + xval, yval = val(xs), val(ys) + lookup_ext = Lookups.bounds(lookup) + + if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] + potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) + isempty(potential_candidates) && return Int[] + filter(potential_candidates) do candidate + GO.contains(lookup.data[candidate], (xval, yval)) + end + else + return Int[] + end +end + +@inline Lookups.reducelookup(l::GeometryLookup) = NoLookup(OneTo(1)) + +function Lookups.show_properties(io::IO, mime, lookup::GeometryLookup) + print(io, " ") + show(IOContext(io, :inset => "", :dimcolor => 244), mime, DD.basedims(lookup)) +end + +# Dimension methods + +@inline _reducedims(lookup::GeometryLookup, dim::DD.Dimension) = + rebuild(dim, [map(x -> zero(x), dim.val[1])]) + +function DD.format(dim::DD.Dimension{<:GeometryLookup}, axis::AbstractRange) + checkaxis(dim, axis) + return dim +end + +# Local functions +_val_or_nothing(::Nothing) = nothing +_val_or_nothing(d::DD.Dimension) = val(d) + +# Get the candidates for the selector extent. If the selector extent is disjoint from the tree rootnode extent, return an error. +function _maybe_get_candidates(lookup::GeometryLookup, selector_extent) + tree = lookup.tree + isnothing(tree) && return 1:length(lookup) + Extents.disjoint(GI.extent(tree), selector_extent) && return Int[] + potential_candidates = GO.SpatialTreeInterface.query( + tree, + Base.Fix1(Extents.intersects, selector_extent) + ) + isempty(potential_candidates) && return Int[] + return potential_candidates +end diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl deleted file mode 100644 index 885f49d8d..000000000 --- a/src/geometry_lookup/lookups.jl +++ /dev/null @@ -1,159 +0,0 @@ -#= -# Lookups methods - -Here we define the methods for the Lookups API. -The main entry point is `selectindices`, which is used to select the indices of the geometries that match the selector. - -We need to define methods that take selectors and convert them to extents, then GeometryOps needs -=# - -# Bounds - get the bounds of the lookup -function Lookups.bounds(lookup::GeometryLookup) - if isempty(lookup.data) - Extents.Extent(NamedTuple{DD.name.(lookup.dims)}(ntuple(2) do i; (nothing, nothing); end)) - else - if isnothing(lookup.tree) - mapreduce(GI.extent, Extents.union, lookup.data) - else - GI.extent(lookup.tree) - end - end -end - -# Return an `Int` or Vector{Bool} -# Base case: got a standard index that can go into getindex on a base Array -Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel -# other cases: -# - decompose selectors -function Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) - selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) -end -function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K - dimsel = map(rebuild, map(name2dim, K), values(sel)) - selectindices(lookup, dimsel) -end -function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) - if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) - i = findfirst(x -> all(map(Dimensions._matches, sel, x)), lookup) - isnothing(i) && _coord_not_found_error(sel) - return i - else - return [Dimensions._matches(sel, x) for x in lookup] - end -end -function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) - potential_candidates = _maybe_get_candidates(lookup, sel_ext) - filter(potential_candidates) do candidate - GO.contains(lookup.data[candidate], val(sel)) - end -end -function Lookups.selectindices(lookup::GeometryLookup, sel::At) - if GI.trait(val(sel)) isa GI.PointTrait - Lookups.selectindices(lookup, (At(GI.x(val(sel))), At(GI.y(val(sel))))) - else # invoke the default method - Lookups.at(lookup, sel) - end -end -function Lookups.selectindices(lookup::GeometryLookup, sel::Near) - geom = val(sel) - @assert GI.isgeometry(geom) - # TODO: temporary - @assert GI.trait(geom) isa GI.PointTrait "Only point geometries are supported for the near lookup at this point! We will add more geometry support in the future." - - # Get the nearest geometry - # TODO: this sucks! Use some branch and bound algorithm - # on the spatial tree instead. - # if pointtrait - return findmin(x -> GO.distance(geom, x), lookup.data)[2] - # else - # findmin(x -> GO.distance(GO.GEOS(), geom, x), lookup.data)[2] - # end - # this depends on LibGEOS being installed. - -end -function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) - sel_ext = GI.extent(val(sel)) - potential_candidates = _maybe_get_candidates(lookup, sel_ext) - return filter(potential_candidates) do candidate - GO.intersects(lookup.data[candidate], val(sel)) - end -end -function Lookups.selectindices( - lookup::GeometryLookup, - (xs, ys)::Tuple{Union{<:Touches}, Union{<:Touches}} -) - target_ext = Extents.Extent(X = (first(xs), last(xs)), Y = (first(ys), last(ys))) - potential_candidates = _maybe_get_candidates(lookup, target_ext) - return filter(potential_candidates) do candidate - GO.intersects(lookup.data[candidate], target_ext) - end -end -function Lookups.selectindices( - lookup::GeometryLookup, - (xs, ys)::Tuple{Union{<:DD.IntervalSets.ClosedInterval},Union{<:DD.IntervalSets.ClosedInterval}} -) - target_ext = Extents.Extent(X = extrema(xs), Y = extrema(ys)) - potential_candidates = _maybe_get_candidates(lookup, target_ext) - filter(potential_candidates) do candidate - GO.covers(target_ext, lookup.data[candidate]) - end -end -function Lookups.selectindices( - lookup::GeometryLookup, - (xs, ys)::Tuple{Union{<:At,<:Contains}, Union{<:At,<:Contains}} -) - xval, yval = val(xs), val(ys) - lookup_ext = Lookups.bounds(lookup) - - if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] - potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) - isempty(potential_candidates) && return Int[] - filter(potential_candidates) do candidate - GO.contains(lookup.data[candidate], (xval, yval)) - end - else - return Int[] - end -end - -@inline Lookups.reducelookup(l::GeometryLookup) = NoLookup(OneTo(1)) - -function Lookups.show_properties(io::IO, mime, lookup::GeometryLookup) - print(io, " ") - show(IOContext(io, :inset => "", :dimcolor => 244), mime, DD.basedims(lookup)) -end - -# Dimension methods - -@inline _reducedims(lookup::GeometryLookup, dim::DD.Dimension) = - rebuild(dim, [map(x -> zero(x), dim.val[1])]) - -function DD._format(dim::DD.Dimension{<:GeometryLookup}, axis::AbstractRange) - checkaxis(dim, axis) - return dim -end - -# Local functions -_val_or_nothing(::Nothing) = nothing -_val_or_nothing(d::DD.Dimension) = val(d) - - -# Selector implementations that use geometry (like Contains, Touches, etc.) -""" - _maybe_get_candidates(lookup, selector_extent) - -Get the candidates for the selector extent. If the selector extent is disjoint from the tree rootnode extent, return an error. -""" -function _maybe_get_candidates(lookup::GeometryLookup, selector_extent) - tree = lookup.tree - isnothing(tree) && return 1:length(lookup) - Extents.disjoint(GI.extent(tree), selector_extent) && return Int[] - potential_candidates = GO.SpatialTreeInterface.query( - tree, - Base.Fix1(Extents.intersects, selector_extent) - ) - isempty(potential_candidates) && return Int[] - return potential_candidates -end - - diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl deleted file mode 100644 index 8440294ba..000000000 --- a/src/geometry_lookup/nc_io.jl +++ /dev/null @@ -1,158 +0,0 @@ -using NCDatasets -import NCDatasets.CommonDataModel as CDM -import NCDatasets.NetCDF_jll - -using Test - -using Rasters -import GeoInterface as GI - -ds = NCDataset("C:\\Users\\rafael.schouten\\Downloads\\i.nc") -st = RasterStack("C:\\Users\\rafael.schouten\\Downloads\\i.nc") -write("geomtest3.nc", st; force=true) -ds = NCDataset("geomtest3.nc") -RasterStack("geomtest3.nc") -Raster("geomtest3.nc") -v = CDM.variable(ds, :lon) -CDM.dimnames(ds) -CDM.dimnames(v) -CDM.attribnames(var) -CDM.attrib(var, "coordinates") -map(CDM.keys(ds)) do v - var = CDM.variable(ds, v) - @show v - v => CDM.dimnames(var) - # all(CDM.dimnames(var)) do d - # @show d - # d in CDM.dimnames(ds) - # end ? v => :Alligned : v => :Unalligned -end - -geom = CDM.variable(ds, :geometry_container) -geom.attrib -coords = split(geom.attrib["coordinates"], ' ') -x = _read_geometry(ds, :geometry_container) |> pairs - -knowndims = Rasters._dims(var) - -unknowndims_idxs = findall(Rasters.isnolookup ∘ Rasters.lookup, knowndims) - -if length(unknowndims_idxs) > 1 - throw(ArgumentError("Only one unknown dimension is supported")) -elseif length(unknowndims_idxs) == 0 - return knowndims -end - -u_idx = only(unknowndims_idxs) - -u_dim_name = CDM.dimnames(var)[u_idx] - -has_geometry = haskey(CDM.attribs(var), "geometry") -if !has_geometry - throw(ArgumentError("Variable $u_dim_name does not have a geometry attribute")) -end -=# - -# generate dataset -output_path = tempname() * ".nc" -testfile = "multipolygons.ncgen" -run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) - -ds = NCDataset(output_path) -geometry_container_varname = CDM.attribs(var)["geometry"] -geometry_container_var = ds[geometry_container_varname] - -geometry_container_attribs = CDM.attribs(geometry_container_var) - -haskey(geometry_container_attribs, "geometry_type") && -geometry_container_attribs["geometry_type"] == "polygon" || -throw(ArgumentError("We only support polygon geometry types at this time, got $geometry_type")) - -geoms = Rasters._geometry_cf_decode(GI.PolygonTrait(), ds, geometry_container_attribs) - -encoded = Rasters._geometry_cf_encode(GI.PolygonTrait(), geoms) -node_count = collect(nggode_count_var) -node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) -part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) -interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) - -@test encoded.node_coordinates_x == ds["x"] -@test encoded.node_coordinates_y == ds["y"] -@test encoded.node_count == ds["node_count"] -@test encoded.part_node_count == ds["part_node_count"] -@test encoded.interior_ring == ds["interior_ring"] - -# lines now - -output_path = tempname() * ".nc" -testfile = "lines.ncgen" -run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) -ds = NCDataset(output_path) -var = ds["someData"] -geometry_container_varname = CDM.attribs(var)["geometry"] -geometry_container_var = ds[geometry_container_varname] -geometry_container_attribs = CDM.attribs(geometry_container_var) -geoms = Rasters._geometry_cf_decode(GI.LineStringTrait(), ds, geometry_container_attribs) - -encoded = Rasters._geometry_cf_encode(GI.LineStringTrait(), geoms) - -@test encoded.node_coordinates_x == ds["x"] -@test encoded.node_coordinates_y == ds["y"] -@test encoded.node_count == ds["node_count"] -@test !hasproperty(encoded, :part_node_count) - - -output_path = tempname() * ".nc" -testfile = "multilines.ncgen" -run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) -ds = NCDataset(output_path) -var = ds["someData"] -geometry_container_varname = CDM.attribs(var)["geometry"] -geometry_container_var = ds[geometry_container_varname] -geometry_container_attribs = CDM.attribs(geometry_container_var) -geoms = Rasters._geometry_cf_decode(GI.MultiLineStringTrait(), ds, geometry_container_attribs) - -encoded = Rasters._geometry_cf_encode(GI.MultiLineStringTrait(), geoms) - -@test encoded.node_coordinates_x == ds["x"] -@test encoded.node_coordinates_y == ds["y"] -@test encoded.part_node_count == ds["part_node_count"] -@test encoded.node_count == ds["node_count"] - - - - -output_path = tempname() * ".nc" -testfile = "multipoints.ncgen" -run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) -ds = NCDataset(output_path) - -var = ds["someData"] -geometry_container_varname = CDM.attribs(var)["geometry"] -geometry_container_var = ds[geometry_container_varname] -geometry_container_attribs = CDM.attribs(geometry_container_var) -geoms = Rasters._geometry_cf_decode(GI.MultiPointTrait(), ds, geometry_container_attribs) - -encoded = Rasters._geometry_cf_encode(GI.MultiPointTrait(), geoms) - -@test encoded.node_coordinates_x == ds["x"] -@test encoded.node_coordinates_y == ds["y"] -@test encoded.node_count == ds["node_count"] - - -output_path = tempname() * ".nc" -testfile = "points.ncgen" -run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) -ds = NCDataset(output_path) - -var = ds["someData"] -geometry_container_varname = CDM.attribs(var)["geometry"] -geometry_container_var = ds[geometry_container_varname] -geometry_container_attribs = CDM.attribs(geometry_container_var) -geoms = Rasters._geometry_cf_decode(GI.PointTrait(), ds, geometry_container_attribs) - -encoded = Rasters._geometry_cf_encode(GI.PointTrait(), geoms) - -@test encoded.node_coordinates_x == ds["x"] -@test encoded.node_coordinates_y == ds["y"] -@test !hasproperty(encoded, :node_count) \ No newline at end of file diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 481d44c03..ae22dfa97 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -196,8 +196,6 @@ function _layermetadata(ds::AbstractDataset; attrs) end -# Utils ######################################################################## - # Get the names of all data layers, removing # coordinate, bounds, and geometry variables etc function _layer_names(ds) @@ -243,9 +241,9 @@ function _layer_names(ds, var_names, vars_dict, attrs_dict) haskey(attr, "grid_mapping") && append!(grid_mapping_names, _grid_mapping_keys(attr["grid_mapping"])) # Units metadata variables haskey(attr, "units_metadata") && push!(units_metadata_names, attr["units_metadata"]) - # Common Standard names - very likely coordinates - haskey(attr, "standard_name") && attr["standard_name"] in keys(CDM_STANDARD_NAME_MAP) && - push!(standard_names, n) + # Common Standard names - very likely coordinates, but is this needed? + # haskey(attr, "standard_name") && attr["standard_name"] in keys(CDM_STANDARD_NAME_MAP) && + # push!(standard_names, n) end dim_names = CDM.dimnames(ds) return setdiff(var_names, @@ -415,20 +413,13 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na # span = Explicit(c) # Cyclic(data; cycle, order, sampling, span, bounds) # end - # if haskey(ds, dimname) && (isempty(coord_names) || dimname in coord_names) - # dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) - # # The dimension has a matching variable, and it's one of the coordinates - # D = _cdm_dimtype(dim_attrs, dimname) - # lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) - # return D(lookup) - # end # No matching variable was found, so we have something more complicated # Get all the coordinates for this dimension dimension_coord_names = filter(coord_names) do coordname coord_dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) dimname in coord_dimnames end - if length(dimension_coord_names) > 1 + if length(dimension_coord_names) > 0 linked_dimname = dimension_coord_names[1] dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, linked_dimname) dim_var = _get_var!(ds, vars_dict, linked_dimname) @@ -436,13 +427,13 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na chars = collect(dim_var) # Join char dimension as String, stripping null terminators strings = String.(strip.(join.(eachslice(chars; dims=2)), '\0')) - lookup = Categorical(strings; order=Unordered()) + metadata = _metadatadict(sourcetrait(ds), dim_attrs) + lookup = Categorical(strings; order=Unordered(), metadata) D = _cdm_dimtype(dim_attrs, linked_dimname) return D(lookup) end if length(dimension_coord_names) == 1 # If there is only one coordinate, just use it as the dimension - # This should rarely happen D = _cdm_dimtype(dim_attrs, linked_dimname) lookup = _cdm_lookup(ds, dim_attrs, linked_dimname, D, crs) return D(lookup) @@ -451,6 +442,7 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na _get_var!(ds, vars_dict, d) end is_multidimensional = any(v -> length(CDM.dimnames(v)) > 1, dim_vars) + @show is_multidimensional if is_multidimensional # Rotations # For now we don't use ArrayLookup @@ -472,12 +464,21 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na return D(MergedLookup(merged_data, Tuple(dims))) end end + + if haskey(ds, dimname) + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) + # The dimension has a matching variable, and it's one of the coordinates + D = _cdm_dimtype(dim_attrs, dimname) + lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) + return D(lookup) + end + # A matching variable doesn't exist. - # Its a "discrete axis" (4.5) so NoLookup + # It must be a "discrete axis" (CF 4.5) so NoLookup len = CDM.dim(ds, dimname) lookup = NoLookup(Base.OneTo(len)) D = _cdm_dimtype(NoMetadata(), dimname) - D(lookup) + return D(lookup) end # TODO don't load all keys here with _layers @@ -961,18 +962,22 @@ function _grid_mapping_keys(grid_mapping_key::String) end end +# Get a variable and store it in a dict _get_var!(ds, dict, key) = get!(() -> CDM.variable(ds, key), dict, key) -_get_attr!(ds, var_dict, attr_dict, key) = + +# Get atributes and store them in a dict +function _get_attr!(ds, var_dict, attr_dict, key) get!(attr_dict, key) do var = _get_var!(ds, var_dict, key) CDM.attribs(var) end +end -_eltype(::Source, var) = eltype(var) -_eltype(::CDMsource, var::AbstractArray) = eltype(var) +# We need a custom `eltype` method because Char is convered to String _eltype(::CDMsource, var::AbstractArray{Char}) = String +_eltype(::Source, var) = eltype(var) -# TODO just the core of this function here +# TODO keep just the core of this function here function _raster(ds::CDM.AbstractDataset; dims=nokw, refdims=nokw, @@ -1037,4 +1042,4 @@ function _raster(ds::CDM.AbstractDataset; raster = Raster(data_out, dims_out, refdims, name_out, metadata_out, missingval_out) # Maybe drop a single band dimension return _maybe_drop_single_band(raster, dropband, lazy) -end \ No newline at end of file +end diff --git a/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen b/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen index 56c2aedd9..ddd3cef23 100644 --- a/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen +++ b/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen @@ -1,4 +1,4 @@ -netcdf a_domain_containing_cell_areas_for_a_spherical_geodesic_grid { +netcdf 5_17_a_domain_containing_cell_areas_for_a_spherical_geodesic_grid { dimensions: cell = 2562 ; // number of grid cells time = 12 ; @@ -26,4 +26,4 @@ variables: cell_area:units = "m2" ; float lon_vertices(cell, nv) ; float lat_vertices(cell, nv) ; -} \ No newline at end of file +} diff --git a/test/cf/5_1_independent_coordinate_variables.ncgen b/test/cf/5_1_independent_coordinate_variables.ncgen index fe4f9ce53..6a125a0d9 100644 --- a/test/cf/5_1_independent_coordinate_variables.ncgen +++ b/test/cf/5_1_independent_coordinate_variables.ncgen @@ -20,4 +20,11 @@ variables: double time(time) ; time:long_name = "time" ; time:units = "days since 1990-1-1 0:0:0" ; -} \ No newline at end of file +data: + pres = 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15. ; + lat = 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18. ; + lon = 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36. ; + time = 0, 1, 2, 3 ; +} + + diff --git a/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen b/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen index c9aa4266c..5738277fd 100644 --- a/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen +++ b/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen @@ -1,4 +1,4 @@ -netcdf 5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation { +netcdf a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation { dimensions: station = 23 ; obs = UNLIMITED ; @@ -36,4 +36,4 @@ variables: time:units = "days since 1970-01-01 00:00:00" ; // global attributes: :featureType = "timeSeries" ; -} \ No newline at end of file +} diff --git a/test/cf/5_2_two_dimensional_coordinate_variables.ncgen b/test/cf/5_2_two_dimensional_coordinate_variables.ncgen index 4318f7097..098b5b5db 100644 --- a/test/cf/5_2_two_dimensional_coordinate_variables.ncgen +++ b/test/cf/5_2_two_dimensional_coordinate_variables.ncgen @@ -25,4 +25,4 @@ variables: float lat(yc,xc) ; lat:long_name = "latitude" ; lat:units = "degrees_north" ; -} \ No newline at end of file +} diff --git a/test/cf/5_3_reduced_horizontal_grid.ncgen b/test/cf/5_3_reduced_horizontal_grid.ncgen index 9a6d229bf..460f6191b 100644 --- a/test/cf/5_3_reduced_horizontal_grid.ncgen +++ b/test/cf/5_3_reduced_horizontal_grid.ncgen @@ -2,7 +2,7 @@ netcdf reduced_horizontal_grid { dimensions: londim = 128 ; latdim = 64 ; - rgrid = 6144 ; + rgrid = 4 ; variables: float PS(rgrid) ; PS:long_name = "surface pressure" ; @@ -16,4 +16,8 @@ variables: lat:units = "degrees_north" ; int rgrid(rgrid); rgrid:compress = "latdim londim"; -} \ No newline at end of file +data: + PS = 0.1, 0.2, 0.3, 0.4 ; + lon=1., 2., 3., 4. ; + lat=10., 20., 30., 40. ; +} diff --git a/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen b/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen index ff2dba51a..1e6f39188 100644 --- a/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen +++ b/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen @@ -1,6 +1,6 @@ netcdf northward_heat_transport_in_atlantic_ocean { dimensions: - time = 20 ; + time = 4 ; lat = 5 ; lbl = 1 ; variables: @@ -15,8 +15,9 @@ variables: lat:long_name = "latitude" ; lat:units = "degrees_north" ; string geo_region(lbl) ; - geo_region:standard_name="region" ; + geo_region:standard_name="region"; data: geo_region = "atlantic_ocean" ; lat = 10., 20., 30., 40., 50. ; -} \ No newline at end of file + time = 0, 1, 2, 3 ; +} diff --git a/test/cf/6_2_model_level_numbers.ncgen b/test/cf/6_2_model_level_numbers.ncgen index 5da7dd211..88862a666 100644 --- a/test/cf/6_2_model_level_numbers.ncgen +++ b/test/cf/6_2_model_level_numbers.ncgen @@ -11,4 +11,7 @@ variables: int model_level(sigma); // model level number at each height model_level:long_name="model level number"; model_level:positive="up"; -} \ No newline at end of file +data: + sigma = -1., -2., -3., -4., -5. ; + model_level = 10, 20, 30, 40, 50 ; +} diff --git a/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen b/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen index fa9f74166..06a2e172b 100644 --- a/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen +++ b/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen @@ -31,4 +31,4 @@ variables: temp:standard_name = "air_temperature" ; temp:units = "K"; temp:coordinates = "A B" ; // This attribute is included for the optional second method -} \ No newline at end of file +} diff --git a/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen b/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen index e16b59491..41846cbb4 100644 --- a/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen +++ b/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen @@ -25,4 +25,6 @@ variables: cell_area:units = "m2" ; float lon_vertices(cell,nv) ; float lat_vertices(cell,nv) ; -} \ No newline at end of file +data: + time = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ; +} diff --git a/test/cf/7_6_surface_air_temperature_variance.ncgen b/test/cf/7_6_surface_air_temperature_variance.ncgen index 9e7104744..94c6641db 100644 --- a/test/cf/7_6_surface_air_temperature_variance.ncgen +++ b/test/cf/7_6_surface_air_temperature_variance.ncgen @@ -16,4 +16,4 @@ variables: data: time=.5; time_bnds=0.,1.; -} \ No newline at end of file +} diff --git a/test/cf/landsea.ncgen b/test/cf/landsea.ncgen deleted file mode 100644 index 639f19516..000000000 --- a/test/cf/landsea.ncgen +++ /dev/null @@ -1,17 +0,0 @@ -netcdf landsea { -dimensions: - lat=73; - lon=96; - maxlen=20; - ls=2; -variables: - float surface_temperature(lat,lon); - surface_temperature:cell_methods="area: mean where land"; - float surface_upward_sensible_heat_flux(ls,lat,lon); - surface_upward_sensible_heat_flux:coordinates="land_sea"; - surface_upward_sensible_heat_flux:cell_methods="area: mean where land_sea"; - char land_sea(ls,maxlen); - land_sea:standard_name="area_type"; -data: - land_sea="land","sea"; -} \ No newline at end of file diff --git a/test/sources/cf.jl b/test/sources/cf.jl index 3ca715504..4d58785a5 100644 --- a/test/sources/cf.jl +++ b/test/sources/cf.jl @@ -4,44 +4,45 @@ using NearestNeighbors using OrderedCollections using Rasters.Lookups using Test +using Rasters: name +using Dates +using GeoInterface # Build all ncgen files -cfdir = joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "cf") +testdir = joinpath(dirname(dirname(Base.pathof(Rasters))), "test") +cfdir = joinpath(testdir, "cf") +cfdatadir = joinpath(testdir, "data", "cf") +mkpath(cfdatadir) example_paths = map(filter(endswith(".ncgen"), readdir(cfdir))) do input_name - input_path = joinpath(cfdir, input_name) - output_path = joinpath(cfdir, "..", "data", "cf", splitext(input_name)[1] * ".nc") + input_path = realpath(joinpath(cfdir, input_name)) + output_path = joinpath(cfdatadir, splitext(input_name)[1] * ".nc") NetCDF_jll.ncgen() do exe run(`$exe -k nc4 -b -o $output_path $input_path`) end output_path end + +example_ids = replace.(strip.(first.(split.(basename.(example_paths), r"[A-Za-z]")), '_'), ("_" => ".",)) +examples = OrderedDict(example_ids .=> example_paths) + +rasterstacks = map(enumerate(example_paths)) do (i, example_path) + name = splitext(basename(example_path))[1] + println(i, " => ", name) + name => RasterStack(example_path; lazy=true) +end |> OrderedDict + # rasters = map(example_paths) do example_path # name = splitext(basename(example_path))[1] # name == "5_1_independent_coordinate_variables" && return name => nothing # name => Raster(example_path; lazy=true) # end -# rasterstacks = map(enumerate(example_paths)) do (i, example_path) -# name = splitext(basename(example_path))[1] -# println(i, " => ", name) -# name == "5_1_independent_coordinate_variables" && return name => nothing -# name => RasterStack(example_path; lazy=true) -# end ; # st = last.(rasterstacks)[12] # st = last.(rasterstacks)[14] # refdims(st) # inds = findall(.!(isempty.(refdims.(last.(rasterstacks))))) # refdims(last.(rasterstacks[inds])[1]) -# for (k, r) in rasters -# println(k) -# display(r) -# readline() -# end - -example_ids = replace.(strip.(first.(split.(basename.(example_paths), r"[A-Za-z]")), '_'), ("_" => ".",)) -examples = OrderedDict(example_ids .=> example_paths) - -@testset "2.1" begin +@testset "2.1 string variable representation" begin rast = RasterStack(examples["2.1"]) @test rast.char_variable == rast.str_variable # Also works lazily @@ -52,7 +53,7 @@ examples = OrderedDict(example_ids .=> example_paths) @test_broken RasterStack(examples["2.1"]; lazy=true).char_variable == rast.str_variable end -@testset "3.1" begin +@testset "3.1 units metadata" begin rast = RasterStack(examples["3.1"]) md_os = metadata(rast[:Tonscale]) md_diff = metadata(rast[:Tdifference]) @@ -66,7 +67,7 @@ end @test md_diff["units_metadata"] == "temperature: difference" end -@testset "5.1" begin +@testset "5.1 independent coordinate variables" begin rast = RasterStack(examples["5.1"]) @test dims(rast) == (X(NoLookup(1:36)), Y(NoLookup(1:18)), Dim{:pres}(NoLookup(1:15)), Ti(NoLookup(1:4))) @test metadata(rast.xwind)["long_name"] == "zonal wind" @@ -74,8 +75,145 @@ end # TODO add real vars to the file so there is also dimension metadata end -rast = RasterStack(examples["5.2"]) -rast = RasterStack(examples["5.3"]) -rast = RasterStack(examples["5.6"]) -rast = RasterStack(examples["5.8"]) +@testset "5.2 two-dimensional coordinate variables" begin + rast = RasterStack(examples["5.2"]; lazy=true) + # TODO multi-dimensional mapped + @test_broken rast[Lat=1.0, Lon=2.0] isa Float32 +end + +@testset "5.3 reduced horizontal grid" begin + rast = RasterStack(examples["5.3"]; lazy=true) + @test rast[rgrid=2].PS === 0.2f0 + @test rast[rgrid=At(3.0, 30.0)].PS === 0.3f0 + @test_broken rast[X=At(4.0)].PS == [0.4] + @test_broken rast[Y=At(30.0)].PS == [0.3] + @test_broken rast[X=At(1.0), Y=At(10.0)] == 0.1 +end + +@testset "5.6 rotated pole grid" begin + rast = RasterStack(examples["5.6"]; lazy=true) + @test_broken rast[Lat=1.0, Lon=2.0] isa Float32 +end + +@testset "5.8-9 WGS 84 EPSG" begin + rast = RasterStack(examples["5.8"]; lazy=true) + @test crs(rast) isa EPSG + @test crs(rast) == EPSG(4326) + rast = RasterStack(examples["5.9"]; lazy=true) + @test crs(rast) isa EPSG + @test crs(rast) == EPSG(4326) +end + +@testset "5.10 British national grid" begin + rast = RasterStack(examples["5.10"]; lazy=true) + @test_broken !isnothing(crs(rast)) + @test keys(rast) == (:temp, :pres) +end + +@testset "5.11 WGS 84 WellKnownText2" begin + rast = RasterStack(examples["5.11"]; lazy=true) + @test crs(rast) isa WellKnownText2 +end + +@testset "5.14 refdims from scalar coordinates" begin + rast = RasterStack(examples["5.14"]; lazy=true) + @test name(refdims(rast)) == (:atime, :p500) +end + +@testset "domains" begin + RasterStack(examples["5.15"]; lazy=true) + RasterStack(examples["5.16"]; lazy=true) + @testset "5.17 domain defines dimension and coords" begin + RasterStack(examples["5.17"]; lazy=true) + end + RasterStack(examples["5.18"]; lazy=true) + RasterStack(examples["5.19"]; lazy=true) + RasterStack(examples["5.20"]; lazy=true) +end + +@testset "taxon name/id" begin + rast = Raster(examples["6.1.2"]; lazy=true) + @test name(rast) == :abundance + @test lookup(rast, :taxon_lsid) isa Categorical{String,Vector{String},Unordered} + @test lookup(rast, :taxon_lsid)[1] == "urn:lsid:marinespecies.org:taxname:104464" + @test metadata(lookup(rast, :taxon_lsid))["standard_name"] == "biological_taxon_lsid" + # Second lookup not yet implemented + @test_broken lookup(rast, :taxon_name)[1] == "Calanus finmarchicus" +end + +@testset "6.1 region" begin + rast = RasterStack(examples["6.1"]; lazy=true) + @test name(rast) == (:n_heat_transport,) + @test lookup(rast, :geo_region) isa Categorical{String,Vector{String},Unordered} + @test lookup(rast, :geo_region)[1] == "atlantic_ocean" + @test metadata(lookup(rast, :geo_region))["standard_name"] == "region" + @test lookup(rast, Y) == 10.0:10:50 + @test lookup(rast, Ti)[1] == DateTime(1990) +end + +@testset "6.2 seondary lookup coordinates" begin + rast = Raster(examples["6.2"]; lazy=true) + @test name(rast) == :xwind + @test lookup(rast, :sigma) == -1:-1:-5 + # Second lookup not yet implemented + @test_broken lookup(rast, :model_level) == 10:10:50 +end + +@testset "7.2 non-aligned horizontal grid" begin + RasterStack(examples["7.2"]; lazy=true) +end + +@testset "7.3-4 formula terms" begin + # These are not implemented + RasterStack(examples["7.3"]; lazy=true) + RasterStack(examples["7.4"]; lazy=true) +end + +@testset "7.5 methods applied to a timeseries" begin + rast = RasterStack(examples["7.5"]; lazy=true) +end + +@testset "7.6 spacing of data" begin + rast = RasterStack(examples["7.6"]; lazy=true) + @test lookup(rast, Ti) == [DateTime(1990, 1, 1, 12)] + @test Rasters.bounds(rast, Ti) == (DateTime(1990, 1, 1, 0), DateTime(1990, 1, 2, 0)) + @test metadata(rast.TS_var)["cell_methods"] == "time: variance (interval: 1 hr comment: sampled instantaneously)" +end + +@testset "7.7 labelled categorical axis" begin + rast = RasterStack(examples["7.7"]; lazy=true) + @test lookup(rast, :land_sea) == ["land", "sea"] +end + +@testset "climatology bounds" begin + # Not implemented + RasterStack(examples["7.9"]; lazy=true) + RasterStack(examples["7.10"]; lazy=true) + RasterStack(examples["7.11"]; lazy=true) + RasterStack(examples["7.12"]; lazy=true) + RasterStack(examples["7.13"]; lazy=true) + RasterStack(examples["7.14"]; lazy=true) +end +@testset "Geometry lookups" begin + @testset "7.15 linestring" begin + rast = RasterStack(examples["7.15"]) + @test size(rast) == (4, 2) + @test dims(rast, :Geometry)[1] isa GeoInterface.Wrappers.LineString + @test GeoInterface.getpoint(dims(rast, :Geometry)[1]) == [(30.0, 10.0), (10.0, 30.0), (40.0, 40.0)] + end + @testset "7.16 polygon" begin + rast = RasterStack(examples["7.16"]) + multipoly = dims(rast, :Geometry)[1] + @test multipoly isa GeoInterface.MultiPolygon + poly = GeoInterface.getgeom(dims(rast, :Geometry)[1], 1) + @test poly isa GeoInterface.Polygon + GeoInterface.nring(poly) == 2 + GeoInterface.nhole(poly) == 1 + @test GeoInterface.getgeom(dims(rast, :Geometry)[1], 1) isa GeoInterface.Polygon + @test collect(GeoInterface.getpoint(dims(rast, :Geometry)[1])) == + [(20.0, 0.0), (10.0, 15.0), (0.0, 0.0), (20.0, 0.0), + (5.0, 5.0), (10.0, 10.0), (15.0, 5.0), (5.0, 5.0), + (20.0, 20.0), (10.0, 35.0), (0.0, 20.0), (20.0, 20.0)] + end +end From 1fce5ade07153b4b72659322be40d339e3c7fb71 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 5 Jul 2025 10:09:41 +1000 Subject: [PATCH 35/43] add UGRID mesh --- ...nsional_UGRID_mesh_topology_variable.ncgen | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 test/cf/5_21_a_two_dimensional_UGRID_mesh_topology_variable.ncgen diff --git a/test/cf/5_21_a_two_dimensional_UGRID_mesh_topology_variable.ncgen b/test/cf/5_21_a_two_dimensional_UGRID_mesh_topology_variable.ncgen new file mode 100644 index 000000000..f179b0c05 --- /dev/null +++ b/test/cf/5_21_a_two_dimensional_UGRID_mesh_topology_variable.ncgen @@ -0,0 +1,54 @@ +netcdf a_two_dimensional_UGRID_mesh_topology_variable { +dimensions: + node = 5 ; // Number of mesh nodes + edge = 6 ; // Number of mesh edges + face = 2 ; // Number of mesh faces + two = 2 ; // Number of nodes per edge + four = 4 ; // Maximum number of nodes per face + time = 12 ; +variables: + // Mesh topology variable + integer mesh ; + mesh:cf_role = "mesh_topology" ; + mesh:long_name = "Topology of a 2-d unstructured mesh" ; + mesh:topology_dimension = 2 ; + mesh:node_coordinates = "mesh_node_x mesh_node_y" ; + mesh:edge_node_connectivity = "mesh_edge_nodes" ; + mesh:face_node_connectivity = "mesh_face_nodes" ; + // Mesh node coordinates + double mesh_node_x(node) ; + mesh_node_x:standard_name = "longitude" ; + mesh_node_x:units = "degrees_east" ; + double mesh_node_y(node) ; + mesh_node_y:standard_name = "latitude" ; + mesh_node_y:units = "degrees_north" ; + // Mesh connectivity variables + integer mesh_face_nodes(face, four) ; + mesh_face_nodes:long_name = "Maps each face to its 3 or 4 corner nodes" ; + integer mesh_edge_nodes(edge, two) ; + mesh_edge_nodes:long_name = "Maps each edge to the 2 nodes it connects" ; + // Coordinate variables + float time(time) ; + time:standard_name = "time" ; + time:units = "days since 2004-06-01" ; + // Data at mesh faces + double volume_at_faces(time, face) ; + volume_at_faces:standard_name = "air_density" ; + volume_at_faces:units = "kg m-3" ; + volume_at_faces:mesh = "mesh" ; + volume_at_faces:location = "face" ; + // Data at mesh edges + double flux_at_edges(time, edge) ; + flux_at_edges:standard_name = "northward_wind" ; + flux_at_edges:units = "m s-1" ; + flux_at_edges:mesh = "mesh" ; + flux_at_edges:location = "edge" ; + // Data at mesh nodes + double height_at_nodes(time, node) ; + height_at_nodes:standard_name = "sea_surface_height_above_geoid" ; + height_at_nodes:units = "m" ; + height_at_nodes:mesh = "mesh" ; + height_at_nodes:location = "node" ; +data: + time = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ; +} From e7d86b52bc6d1ea22549d5e65771c5e14b99c699 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 6 Jul 2025 23:55:59 +1000 Subject: [PATCH 36/43] refactor cf --- src/lookup.jl | 3 +- src/sources/commondatamodel.jl | 217 +++++++++++++++++++-------------- 2 files changed, 129 insertions(+), 91 deletions(-) diff --git a/src/lookup.jl b/src/lookup.jl index f3027d784..5a43ae16f 100644 --- a/src/lookup.jl +++ b/src/lookup.jl @@ -182,6 +182,7 @@ DD.hasmultipledimensions(lookup::Mapped) = true GeoInterface.crs(lookup::Mapped) = lookup.crs mappedcrs(lookup::Mapped) = lookup.mappedcrs dim(lookup::Mapped) = lookup.dim +DD.dim(lookup::Mapped) = lookup.dims """ convertlookup(dstlookup::Type{<:Lookup}, x) @@ -303,4 +304,4 @@ _projectedindex(lookup::Mapped, dim::Dimension) = _projectedindex(crs(lookup), l _projectedindex(crs::Nothing, lookup::Mapped, dim::Dimension) = error("No projection crs attached to $(name(dim)) dimension") _projectedindex(crs::GeoFormat, lookup::Mapped, dim::Dimension) = - reproject(mappedcrs(dim), crs, dim, index(dim)) \ No newline at end of file + reproject(mappedcrs(dim), crs, dim, index(dim)) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index ae22dfa97..d5d5f5642 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -369,111 +369,83 @@ end _organise_dataset(ds::AbstractDataset, names, group) = _organise_dataset(ds.group[group], names, nokw) + function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs) - if !isnothing(geometry_key) - geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) - # Check that the geometry is for this dimension - is_geometry_dimension = if haskey(geometry_attrs, "node_count") - # Multi-part geometries - dimname in CDM.dimnames(_get_var!(ds, vars_dict, geometry_attrs["node_count"])) - else - # Points - dimname in CDM.dimnames(_get_var!(ds, vars_dict, first(_cf_node_coordinate_names(geometry_attrs)))) - end - geoms = get!(geometry_dict, geometry_key) do - _cf_geometry_decode(ds, geometry_attrs) - end - if is_geometry_dimension - dimension_coord_names = filter(coord_names) do coordname - dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) - length(dimnames) == 1 && only(dimnames) == dimname - end - lookup = GeometryLookup(geoms, (X(), Y()); crs) - D = Geometry - return D(lookup) - end + # First check for geometry dimensions (remove most complicted first) + if is_geometry(ds, vars_dict, attrs_dict, geometry_key, dimname) + return _geometry_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, crs) end - # TODO Cycled climatology dims - # if haskey(dim_attrs, "climatology") - # climatology_bounds = _get_var(ds, vars_dict, dim_attrs["climatology"]) - # var = _get_var(ds, vars_dict, var_name) - # var_attib = _get_attr!(ds, vars_dict, attrs_dict, var_name) - # # We need to know if this is over years/months/days etc - # diff = map((second, minute, hour, day, month, year)) do f - # f(climatology_bounds[:, 2]) - f(climatology_bounds[:, 1]) - # end - # # Find the smallest step - # i = findfirst(>(0), diff) - # step = (Second, Minute, Hour, Day, Month, Year)[i](diff[i]) - # # Find the total time span - # bounds = extrema(climatology_bounds) - # data = collect(dim_var) - # order = LA.orderof(data) - # sampling = Regular(step) - # span = Explicit(c) - # Cyclic(data; cycle, order, sampling, span, bounds) - # end - # No matching variable was found, so we have something more complicated - # Get all the coordinates for this dimension + + # Then get all the coordinates for this dimension dimension_coord_names = filter(coord_names) do coordname coord_dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) dimname in coord_dimnames end - if length(dimension_coord_names) > 0 - linked_dimname = dimension_coord_names[1] - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, linked_dimname) - dim_var = _get_var!(ds, vars_dict, linked_dimname) - if ndims(dim_var) > 1 && eltype(dim_var) <: Char - chars = collect(dim_var) - # Join char dimension as String, stripping null terminators - strings = String.(strip.(join.(eachslice(chars; dims=2)), '\0')) - metadata = _metadatadict(sourcetrait(ds), dim_attrs) - lookup = Categorical(strings; order=Unordered(), metadata) - D = _cdm_dimtype(dim_attrs, linked_dimname) - return D(lookup) - end - if length(dimension_coord_names) == 1 - # If there is only one coordinate, just use it as the dimension - D = _cdm_dimtype(dim_attrs, linked_dimname) - lookup = _cdm_lookup(ds, dim_attrs, linked_dimname, D, crs) - return D(lookup) + # Now dimension/lookup will depend on if there are associated coordinates + if isempty(dimension_coord_names) + if haskey(ds, dimname) # The var name is the dim name + return _standard_dim(ds, vars_dict, attrs_dict, dimname, crs) + else # A matching variable doesn't exist. + return _discrete_axis_dim(ds, dimname) end - dim_vars = map(dimension_coord_names) do d - _get_var!(ds, vars_dict, d) + else # Use coord names + linked_coord_name = dimension_coord_names[1] + dim_var = _get_var!(ds, vars_dict, linked_coord_name) + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, linked_coord_name) + # Short circuit for char dims + if is_char_categorical(dim_var) + return _char_categorical_dim(ds, dim_var, dim_attrs, linked_coord_name) end - is_multidimensional = any(v -> length(CDM.dimnames(v)) > 1, dim_vars) - @show is_multidimensional - if is_multidimensional - # Rotations - # For now we don't use ArrayLookup - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) - D = _cdm_dimtype(dim_attrs, dimname) - lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) - return D(lookup) + # Otherwise dims/lookup depend on the number of associated coords + if length(dimension_coord_names) == 1 + # Only one coordinate, so just use it as the dimension/lookup + return _standard_dim(ds, vars_dict, attrs_dict, linked_coord_name, crs) else - # Generate a MergedLookup for multi-coordinate dimensions - dims = map(dimension_coord_names) do d - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, d) - D = _cdm_dimtype(dim_attrs, d) - lookup = _cdm_lookup(ds, dim_attrs, d, D, crs) - D(lookup) + if is_multidimensional(ds, vars_dict, dimension_coord_names) + return _rotated_dim(ds, vars_dict, attrs_dict, dimname, crs) + else + return _merged_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) end - D = _cdm_dimtype(NoMetadata(), dimname) - merged_data = collect(zip(DD.lookup.(dims)...)) - # Use a MergedLookup with all dimensions - return D(MergedLookup(merged_data, Tuple(dims))) end end + # TODO: allow Categorical, Sampled etc to have a nested dimension + # remaining_coord_names = setdiff(dimension_coord_names, [linked_coord_name]) + # subdim = _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, remaining_coord_names, dimname, crs) +end - if haskey(ds, dimname) - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) - # The dimension has a matching variable, and it's one of the coordinates +is_char_categorical(dim_var) = ndims(dim_var) > 1 && eltype(dim_var) <: Char +is_climatology(dim_attrs) = haskey(dim_attrs, "climatology") +function is_multidimensional(ds, vars_dict, dimension_coord_names) + dim_vars = map(dimension_coord_names) do d + _get_var!(ds, vars_dict, d) + end + return any(v -> length(CDM.dimnames(v)) > 1, dim_vars) +end +function is_geometry(ds, vars_dict, attrs_dict, geometry_key, dimname) + isnothing(geometry_key) && return false + geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) + # Check that the geometry is for this dimension + is_geometry_dimension = if haskey(geometry_attrs, "node_count") + # Multi-part geometries + dimname in CDM.dimnames(_get_var!(ds, vars_dict, geometry_attrs["node_count"])) + else + # Points + dimname in CDM.dimnames(_get_var!(ds, vars_dict, first(_cf_node_coordinate_names(geometry_attrs)))) + end +end + +function _standard_dim(ds, vars_dict, attrs_dict, dimname, crs) + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) + # if haskey(dim_attrs, "climatology") + # return _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, dimname) + # else D = _cdm_dimtype(dim_attrs, dimname) lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) return D(lookup) - end + # end +end - # A matching variable doesn't exist. +function _discrete_axis_dim(ds, dimname) # It must be a "discrete axis" (CF 4.5) so NoLookup len = CDM.dim(ds, dimname) lookup = NoLookup(Base.OneTo(len)) @@ -481,6 +453,71 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na return D(lookup) end +function _geometry_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, crs) + geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) + geoms = get!(geometry_dict, geometry_key) do + _cf_geometry_decode(ds, geometry_attrs) + end + lookup = GeometryLookup(geoms, (X(), Y()); crs) + D = Geometry + return D(lookup) +end + +function _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, var_name) + climatology_bounds = _get_var!(ds, vars_dict, dim_attrs["climatology"]) + var = _get_var!(ds, vars_dict, var_name) + var_attib = _get_attr!(ds, vars_dict, attrs_dict, var_name) + # We need to know if this is over years/months/days etc + diff = map((second, minute, hour, day, month, year)) do f + f(climatology_bounds[:, 2]) - f(climatology_bounds[:, 1]) + end + # Find the smallest step + i = findfirst(>(0), diff) + step = (Second, Minute, Hour, Day, Month, Year)[i](diff[i]) + # Find the total time span + bounds = extrema(climatology_bounds) + data = collect(dim_var) + order = LA.orderof(data) + sampling = Regular(step) + span = Explicit(c) + lookup = Cyclic(data; cycle, order, sampling, span, bounds) + D = _cdm_dimtype(dim_attrs, name) + return D(lookup) +end + +function _char_categorical_dim(ds, dim_var, dim_attrs, name) + chars = collect(dim_var) + # Join char dimension as String, stripping null terminators + strings = String.(strip.(join.(eachslice(chars; dims=2)), '\0')) + metadata = _metadatadict(sourcetrait(ds), dim_attrs) + + lookup = Categorical(strings; order=Unordered(), metadata) + D = _cdm_dimtype(dim_attrs, name) + return D(lookup) +end + +function _rotated_dim(ds, vars_dict, attrs_dict, dimname, crs) + # Rotations. For now we don't use ArrayLookup + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) + D = _cdm_dimtype(dim_attrs, dimname) + lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) + return D(lookup) +end + +function _merged_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) + # Generate a MergedLookup for multi-coordinate dimensions + dims = map(dimension_coord_names) do d + dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, d) + D = _cdm_dimtype(dim_attrs, d) + lookup = _cdm_lookup(ds, dim_attrs, d, D, crs) + D(lookup) + end + D = _cdm_dimtype(NoMetadata(), dimname) + merged_data = collect(zip(DD.lookup.(dims)...)) + # Use a MergedLookup with all dimensions + return D(MergedLookup(merged_data, Tuple(dims))) +end + # TODO don't load all keys here with _layers _name_or_firstname(ds::AbstractDataset, name) = Symbol(name) function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}) @@ -571,7 +608,7 @@ function _cdm_lookup( end # For X and Y use a Mapped <: AbstractSampled lookup function _cdm_lookup( - data, D::Type{<:Union{<:XDim,<:YDim}}, order::Order, span, sampling, metadata, crs + data, D::Type{<:Union{<:XDim,<:YDim}}, order::Order, span, sampling, metadata, crs; dims=nothing ) # If units are degrees north/east, use EPSG:4326 as the mapped crs units = get(metadata, "units", "") @@ -583,7 +620,7 @@ function _cdm_lookup( crs = mappedcrs end dim = DD.basetypeof(D)() - return Mapped(data; order, span, sampling, metadata, crs, mappedcrs, dim) + return Mapped(data; order, span, sampling, metadata, crs, mappedcrs, dim, dims) end # Band dims have a Categorical lookup, with order # This is not CF, just for consistency with GDAL From 9cddb026f6a86e38f2c5d0119b0ed2a325b24032 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 7 Jul 2025 17:56:23 +1000 Subject: [PATCH 37/43] mostly working climatology bounds --- src/sources/commondatamodel.jl | 42 +++++++++++++++--------- test/cf/7_9_climatological_seasons.ncgen | 19 +++++++---- test/sources/cf.jl | 32 +++++++++++++++--- 3 files changed, 66 insertions(+), 27 deletions(-) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index d5d5f5642..8e0bedf8d 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -436,13 +436,13 @@ end function _standard_dim(ds, vars_dict, attrs_dict, dimname, crs) dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) - # if haskey(dim_attrs, "climatology") - # return _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, dimname) - # else + if haskey(dim_attrs, "climatology") + return _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, dimname) + else D = _cdm_dimtype(dim_attrs, dimname) lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) return D(lookup) - # end + end end function _discrete_axis_dim(ds, dimname) @@ -463,24 +463,36 @@ function _geometry_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, c return D(lookup) end -function _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, var_name) - climatology_bounds = _get_var!(ds, vars_dict, dim_attrs["climatology"]) - var = _get_var!(ds, vars_dict, var_name) - var_attib = _get_attr!(ds, vars_dict, attrs_dict, var_name) +function _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, name) + periodtypes = (Year, Month, Day, Hour, Minute, Second) + oneperiods = map(p -> p(1), periodtypes) + dim_var = ds[name] + climatology_bounds = collect(CDM.cfvariable(ds, dim_attrs["climatology"])) # We need to know if this is over years/months/days etc - diff = map((second, minute, hour, day, month, year)) do f - f(climatology_bounds[:, 2]) - f(climatology_bounds[:, 1]) + diff = map(oneperiods) do p + length(climatology_bounds[1, 1]:p:climatology_bounds[2, 1]) - 1 end # Find the smallest step - i = findfirst(>(0), diff) - step = (Second, Minute, Hour, Day, Month, Year)[i](diff[i]) + i = findfirst(d -> first(d) > 0, diff) + # todo use if/else for type stability + period = periodtypes[i](diff[i][1]) + # TODO assuming 1 year/1 month is not correct + cycle = period / period.value + stepdiff = map(oneperiods) do p + length(climatology_bounds[1, 1]:p:(climatology_bounds[2, 1] - period)) - 1 + end + i = findfirst(d -> first(d) > 0, stepdiff) + step = periodtypes[i](stepdiff[i][1]) # Find the total time span bounds = extrema(climatology_bounds) data = collect(dim_var) order = LA.orderof(data) - sampling = Regular(step) - span = Explicit(c) - lookup = Cyclic(data; cycle, order, sampling, span, bounds) + intervalbounds = climatology_bounds + @show intervalbounds + intervalbounds[2, :] .-= period + sampling = Intervals(Center()) + span = Explicit(intervalbounds) + lookup = Cyclic(data; cycle, order, sampling, span)#, bounds) D = _cdm_dimtype(dim_attrs, name) return D(lookup) end diff --git a/test/cf/7_9_climatological_seasons.ncgen b/test/cf/7_9_climatological_seasons.ncgen index 105dfec61..d660b9f47 100644 --- a/test/cf/7_9_climatological_seasons.ncgen +++ b/test/cf/7_9_climatological_seasons.ncgen @@ -13,10 +13,15 @@ variables: time:climatology="climatology_bounds"; time:units="days since 1960-1-1"; double climatology_bounds(time,nv); -data: // time coordinates translated to datetime format - time="1960-4-16", "1960-7-16", "1960-10-16", "1961-1-16" ; - climatology_bounds="1960-3-1", "1990-6-1", - "1960-6-1", "1990-9-1", - "1960-9-1", "1990-12-1", - "1960-12-1", "1991-3-1" ; -} \ No newline at end of file +data: + // time="1960-4-16", "1960-7-16", "1960-10-16", "1961-1-16"; + time=106, 197, 289, 381; + // climatology_bounds="1960-3-1", "1990-6-1", + // "1960-6-1", "1990-9-1", + // "1960-9-1", "1990-12-1", + // "1960-12-1", "1991-3-1"; + climatology_bounds=60, 11109, + 152, 11201, + 244, 11292, + 335, 11382; +} diff --git a/test/sources/cf.jl b/test/sources/cf.jl index 4d58785a5..1703e77ed 100644 --- a/test/sources/cf.jl +++ b/test/sources/cf.jl @@ -1,13 +1,23 @@ -using Rasters, NCDatasets -using NCDatasets.NetCDF_jll +using Rasters +import NCDatasets +import NCDatasets.NetCDF_jll using NearestNeighbors using OrderedCollections using Rasters.Lookups using Test -using Rasters: name +using Rasters: name, bounds using Dates using GeoInterface +# using NCDatasets +# using NCDatasets.NetCDF_jll + +# NetCDF_jll.ncdump() do exe +# run(`$exe clim.nc`) +# end +# ds = collect(NCDataset("clim.nc")["time"]) +# ds = collect(NCDataset("clim.nc")["climatology_bounds"]) + # Build all ncgen files testdir = joinpath(dirname(dirname(Base.pathof(Rasters))), "test") cfdir = joinpath(testdir, "cf") @@ -160,7 +170,7 @@ end end @testset "7.2 non-aligned horizontal grid" begin - RasterStack(examples["7.2"]; lazy=true) + rast = RasterStack(examples["7.2"]; lazy=true) end @testset "7.3-4 formula terms" begin @@ -187,7 +197,19 @@ end @testset "climatology bounds" begin # Not implemented - RasterStack(examples["7.9"]; lazy=true) + rast = RasterStack(examples["7.9"]; lazy=true) + @test rast[Ti=Near(DateTime(1960, 2))] == rast[Ti=1] + @test rast[Ti=At(DateTime(1960, 4, 16))] == + rast[Ti=At(DateTime(1970, 4, 16))] == + rast[Ti=At(DateTime(1990, 4, 16))] == rast[Ti=1] + # We need to add an `cycle_bounds` keyword or something + # to Cyclic so the cycle itself is bounded over a valid period + @test_broken @test_throws BoundsError rast[Ti=At(DateTime(2000, 4, 16))] + @test_broken @test_throws BoundsError rast[Ti=At(DateTime(1950, 4, 16))] + # Contains is just broken for Cyclic, this should work + @test_broken rast[Ti=Contains(DateTime(1970, 6, 1))] == rast[Ti=2] + @test_broken rast[Ti=Contains(DateTime(1970, 5, 31))] == rast[Ti=1] + RasterStack(examples["7.10"]; lazy=true) RasterStack(examples["7.11"]; lazy=true) RasterStack(examples["7.12"]; lazy=true) From ec80d6c94c6c3859eea69b2871546b3ed7ab4594 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 13 Jul 2025 01:36:42 +1000 Subject: [PATCH 38/43] unaligned --- Project.toml | 4 +- src/dimensions.jl | 24 +- src/filearray.jl | 1 - src/sources/commondatamodel.jl | 224 ++++++++++++------ .../2_1_string_variable_representation.ncgen | 4 +- test/cf/3_1_units_metadata.ncgen | 4 +- test/cf/5_10_british_national_grid.ncgen | 4 +- test/cf/5_11_wgs84_crs_wkt.ncgen | 4 +- ...ple_forecasts_from_a_single_analysis.ncgen | 4 +- ...ith_independent_coordinate_variables.ncgen | 4 +- ..._pole_and_scalar_coordinate_variable.ncgen | 4 +- ...a_domain_with_no_specific_dimensions.ncgen | 4 +- ...ain_containing_a_timeseries_geometry.ncgen | 4 +- ...5_1_independent_coordinate_variables.ncgen | 2 +- ..._indexed_ragged_array_representation.ncgen | 2 +- ...nsional_UGRID_mesh_topology_variable.ncgen | 2 +- ...two_dimensional_coordinate_variables.ncgen | 2 +- test/cf/5_3_reduced_horizontal_grid.ncgen | 2 +- test/cf/5_6_rotated_pole_grid.ncgen | 4 +- test/cf/5_8_spherical_earth.ncgen | 4 +- test/cf/5_9_wgs84.ncgen | 4 +- .../6_1_2_taxon_names_and_identifyers.ncgen | 4 +- ...ard_heat_transport_in_atlantic_ocean.ncgen | 8 +- test/cf/6_2_model_level_numbers.ncgen | 2 +- .../7_10_decadal_averages_for_january.ncgen | 16 +- ...erature_for_each_hour_of_average_day.ncgen | 62 ++--- ..._extreme_statistics_and_spell_length.ncgen | 12 +- ...r_of_typical_climatological_day copy.ncgen | 56 ++--- ...y_maximum_daily_precipitation_totals.ncgen | 16 +- test/cf/7_15_timeseries_with_geometry.ncgen | 4 +- test/cf/7_16_polygons_with_holes.ncgen | 4 +- ...n-latitude-longitude_horizontal_grid.ncgen | 11 +- ...etric_coordinate_variable_has_bounds.ncgen | 2 +- ..._areas_for_a_spherical_geodesic_grid.ncgen | 2 +- .../7_5_methods_applied_to_a_timeseries.ncgen | 4 +- ...7_6_surface_air_temperature_variance.ncgen | 2 +- ...veraged_separately_over_land_and_sea.ncgen | 4 +- test/cf/7_9_climatological_seasons.ncgen | 2 +- test/sources/cf.jl | 61 +++-- 39 files changed, 368 insertions(+), 215 deletions(-) diff --git a/Project.toml b/Project.toml index f5a547c64..bc09e203c 100644 --- a/Project.toml +++ b/Project.toml @@ -74,7 +74,7 @@ GeoInterface = "1.0" GeometryBasics = "0.4" GeometryOps = "0.1" GeometryOpsCore = "0.1" -Makie = "0.20, 0.21, 0.22" +Makie = "0.20, 0.21, 0.22, 0.23, 0.24" Missings = "0.4, 1" NCDatasets = "0.13, 0.14" OffsetArrays = "1" @@ -122,4 +122,4 @@ test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", [sources] DimensionalData = {url = "https://github.com/rafaqz/DimensionalData.jl", rev = "as/individualindexing"} -GeometryOps = {url = "https://github.com/JuliaGeo/GeometryOps.jl", rev = "as/extentforwarding_for_predicates"} \ No newline at end of file +GeometryOps = {url = "https://github.com/JuliaGeo/GeometryOps.jl", rev = "as/extentforwarding_for_predicates"} diff --git a/src/dimensions.jl b/src/dimensions.jl index 4fec2e82c..d7f654caa 100644 --- a/src/dimensions.jl +++ b/src/dimensions.jl @@ -19,6 +19,28 @@ mean(A; dims=Band) """ @dim Band +""" + Lat <: Dimension + + Lat(val=:) + +Used for holding degrees north lookups. + +Will error on lookup construction if metadata of `units="degrees_north"` does not exist. +""" +@dim Lat + +""" + Lon <: Dimension + + Lon(val=:) + +Used for holding degrees east lookups. + +Will error on lookup construction if metadata of `units="degrees_east"` does not exist. +""" +@dim Lon + """ Geometry <: Dimension @@ -37,4 +59,4 @@ val = A[Geometry(Touches(other_geom))] # this is automatically accelerated by sp mean(A; dims=Geometry) ``` """ -@dim Geometry \ No newline at end of file +@dim Geometry diff --git a/src/filearray.jl b/src/filearray.jl index 3bfead455..8f02e2af7 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -52,7 +52,6 @@ end function FileArray{S}( var::AbstractArray{Char,N}, filename; mod, kw... ) where {S<:CDMsource,N} - @show "here" eachchunk = DA.eachchunk(var) haschunks = DA.haschunks(var) if Missings.nonmissingtype(eltype(mod)) isa AbstractString diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 8e0bedf8d..40b7b19a9 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -216,8 +216,8 @@ function _layer_names(ds, var_names, vars_dict, attrs_dict) # So we check standard names to know vars are lookups and not data standard_names = String[] for n in var_names - var = _get_var!(ds, vars_dict, n) - attr = _get_attr!(ds, vars_dict, attrs_dict, n) + var = _get_var!(vars_dict, ds, n) + attr = _get_attr!(attrs_dict, vars_dict, ds, n) # Coordinate variables haskey(attr, "coordinates") && append!(coordinate_names, collect(_cdm_coordinates(attr))) haskey(attr, "node_coordinates") && append!(node_coordinate_names, collect(_cdm_node_coordinates(attr))) @@ -231,7 +231,7 @@ function _layer_names(ds, var_names, vars_dict, attrs_dict) if haskey(attr, "geometry") geometry_container_name = attr["geometry"] push!(geometry_names, geometry_container_name) - geometry_attr = _get_attr!(ds, vars_dict, attrs_dict, geometry_container_name) + geometry_attr = _get_attr!(attrs_dict, vars_dict, ds, geometry_container_name) haskey(geometry_attr, "node_coordinates") && push!(geometry_names, _cf_node_coordinate_names(geometry_attr)...) haskey(geometry_attr, "node_count") && push!(geometry_names, geometry_attr["node_count"]) haskey(geometry_attr, "part_node_count") && push!(geometry_names, geometry_attr["part_node_count"]) @@ -322,9 +322,11 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) pop!(attr, "geometry") end # Check for a grid mapping variable - crs = if haskey(attr, "grid_mapping") + if haskey(attr, "grid_mapping") grid_mapping_key = pop!(attr, "grid_mapping") - get!(() -> _cf_crs(ds, grid_mapping_key), crs_dict, grid_mapping_key) + crs = get!(() -> _cf_crs(ds, grid_mapping_key), crs_dict, grid_mapping_key) + else + crs = grid_mapping_key = nothing end if eltype(var) <: Char && ndims(var) > 1 # Remove the char dimension @@ -332,19 +334,25 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) end # Loop over the dimensions of this layer, adding missing dims to dim_dict output_layerdims_vec[i] = map(dimnames) do dimname - dim = get!(dim_dict, dimname) do + get!(dim_dict, dimname) do _cdm_dim( - ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs + ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs, grid_mapping_key ) end - basedims(dim) + end + unformatted_dims = map(dimnames) do dimname + dim_dict[dimname] + end + formatted_dims = format(unformatted_dims, var) + map(dimnames, formatted_dims) do dimname, d + dim_dict[dimname] = d end # Find scalar coordinates to use as refdims (5.7) for coord_name in coord_names haskey(refdim_dict, coord_name) && continue - coord_var = _get_var!(ds, vars_dict, coord_name) + coord_var = _get_var!(vars_dict, ds, coord_name) if ndims(coord_var) == 0 - coord_attr = _get_attr!(ds, vars_dict, attrs_dict, coord_name) + coord_attr = _get_attr!(attrs_dict, vars_dict, ds, coord_name) data = reshape(collect(coord_var), 1) D = _cdm_dimtype(coord_attr, coord_name) metadata = _metadatadict(sourcetrait(ds), attr) @@ -357,6 +365,7 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) output_layers_vec[i] = var output_attrs_vec[i] = attr end + return (; names_vec=layer_names, layers_vec=output_layers_vec, @@ -370,7 +379,7 @@ _organise_dataset(ds::AbstractDataset, names, group) = _organise_dataset(ds.group[group], names, nokw) -function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs) +function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs, grid_mapping_key) # First check for geometry dimensions (remove most complicted first) if is_geometry(ds, vars_dict, attrs_dict, geometry_key, dimname) return _geometry_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, crs) @@ -378,7 +387,7 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na # Then get all the coordinates for this dimension dimension_coord_names = filter(coord_names) do coordname - coord_dimnames = CDM.dimnames(_get_var!(ds, vars_dict, coordname)) + coord_dimnames = CDM.dimnames(_get_var!(vars_dict, ds, coordname)) dimname in coord_dimnames end # Now dimension/lookup will depend on if there are associated coordinates @@ -389,20 +398,32 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na return _discrete_axis_dim(ds, dimname) end else # Use coord names - linked_coord_name = dimension_coord_names[1] - dim_var = _get_var!(ds, vars_dict, linked_coord_name) - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, linked_coord_name) - # Short circuit for char dims - if is_char_categorical(dim_var) - return _char_categorical_dim(ds, dim_var, dim_attrs, linked_coord_name) - end # Otherwise dims/lookup depend on the number of associated coords if length(dimension_coord_names) == 1 - # Only one coordinate, so just use it as the dimension/lookup - return _standard_dim(ds, vars_dict, attrs_dict, linked_coord_name, crs) + if haskey(ds, dimname) # The var name is the dim name + # Only one coordinate, so just use it as the dimension/lookup + return _standard_dim(ds, vars_dict, attrs_dict, dimname, crs) + else + return _standard_dim(ds, vars_dict, attrs_dict, dimension_coord_names[1], crs) + end else + first_coord_name = dimension_coord_names[1] + first_coord_var = _get_var!(vars_dict, ds, first_coord_name) + # Short circuit for char dims + if is_char_categorical(first_coord_var) + first_coord_attrs = _get_attr!(attrs_dict, vars_dict, ds, first_coord_name) + return _char_categorical_dim(ds, first_coord_var, first_coord_attrs, first_coord_name) + end if is_multidimensional(ds, vars_dict, dimension_coord_names) - return _rotated_dim(ds, vars_dict, attrs_dict, dimname, crs) + if is_rotated_longitude_latitude(ds, vars_dict, attrs_dict, dimname, grid_mapping_key) + r = _roated_longitude_latudtude_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) + return r + elseif is_unalligned(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, grid_mapping_key) + u = _unalligned_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) + return u + else + return _standard_dim(ds, vars_dict, attrs_dict, dimname, crs) + end else return _merged_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) end @@ -413,30 +434,50 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na # subdim = _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, remaining_coord_names, dimname, crs) end + +function is_rotated_longitude_latitude(ds, vars_dict, attrs_dict, dimname, grid_mapping_key) + if !isnothing(grid_mapping_key) + grid_mapping = _get_attr!(attrs_dict, vars_dict, ds, grid_mapping_key) + grid_mapping_name = grid_mapping["grid_mapping_name"] + if grid_mapping_name == "rotated_latitude_longitude" && get(dim_attrs, "standard_name", "") in ("grid_latitude", "grid_longitude") + return true + end + end + return false +end +function is_unalligned(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, grid_mapping_key) + units = map(dimension_coord_names) do coord_name + coord_attrs = _get_attr!(attrs_dict, vars_dict, ds, coord_name) + get(coord_attrs, "units", "") + end + return "degrees_north" in units && "degrees_east" in units +end +# Dimension categorisation is_char_categorical(dim_var) = ndims(dim_var) > 1 && eltype(dim_var) <: Char -is_climatology(dim_attrs) = haskey(dim_attrs, "climatology") +is_climatology(dim_attrs::AbstractDict) = haskey(dim_attrs, "climatology") function is_multidimensional(ds, vars_dict, dimension_coord_names) dim_vars = map(dimension_coord_names) do d - _get_var!(ds, vars_dict, d) + _get_var!(vars_dict, ds, d) end return any(v -> length(CDM.dimnames(v)) > 1, dim_vars) end function is_geometry(ds, vars_dict, attrs_dict, geometry_key, dimname) isnothing(geometry_key) && return false - geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) + geometry_attrs = _get_attr!(attrs_dict, vars_dict, ds, geometry_key) # Check that the geometry is for this dimension is_geometry_dimension = if haskey(geometry_attrs, "node_count") # Multi-part geometries - dimname in CDM.dimnames(_get_var!(ds, vars_dict, geometry_attrs["node_count"])) - else - # Points - dimname in CDM.dimnames(_get_var!(ds, vars_dict, first(_cf_node_coordinate_names(geometry_attrs)))) + dimname in CDM.dimnames(_get_var!(vars_dict, ds, geometry_attrs["node_count"])) + else + # Points + dimname in CDM.dimnames(_get_var!(vars_dict, ds, first(_cf_node_coordinate_names(geometry_attrs)))) end end +# Lookup/Dimension generation function _standard_dim(ds, vars_dict, attrs_dict, dimname, crs) - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) - if haskey(dim_attrs, "climatology") + dim_attrs = _get_attr!(attrs_dict, vars_dict, ds, dimname) + if is_climatology(dim_attrs) return _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, dimname) else D = _cdm_dimtype(dim_attrs, dimname) @@ -454,7 +495,7 @@ function _discrete_axis_dim(ds, dimname) end function _geometry_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, crs) - geometry_attrs = _get_attr!(ds, vars_dict, attrs_dict, geometry_key) + geometry_attrs = _get_attr!(attrs_dict, vars_dict, ds, geometry_key) geoms = get!(geometry_dict, geometry_key) do _cf_geometry_decode(ds, geometry_attrs) end @@ -464,35 +505,42 @@ function _geometry_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, c end function _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, name) - periodtypes = (Year, Month, Day, Hour, Minute, Second) - oneperiods = map(p -> p(1), periodtypes) + periodfuncs = (year, month, day, hour, minute, second) + + # Get needed variables dim_var = ds[name] climatology_bounds = collect(CDM.cfvariable(ds, dim_attrs["climatology"])) - # We need to know if this is over years/months/days etc - diff = map(oneperiods) do p - length(climatology_bounds[1, 1]:p:climatology_bounds[2, 1]) - 1 + + # Detect the cycling pattern from the difference between periods in a step + step_diff = map(periodfuncs) do f + start = f(climatology_bounds[1, 1]) + stop = f(climatology_bounds[2, 1]) + stop - start end - # Find the smallest step - i = findfirst(d -> first(d) > 0, diff) - # todo use if/else for type stability - period = periodtypes[i](diff[i][1]) - # TODO assuming 1 year/1 month is not correct - cycle = period / period.value - stepdiff = map(oneperiods) do p - length(climatology_bounds[1, 1]:p:(climatology_bounds[2, 1] - period)) - 1 + cycle, duration = if step_diff[1] != 0 && any(!=(0), step_diff[3:6]) # all change so Year and Day both cycle + (Year(1), Day(1) => Month(1)), Year(step_diff[1]) + elseif step_diff[1] != 0 # year changes but not day/hour/minute/second so Year is the cycle + Year(1), Year(step_diff[1]) + elseif any(!=(0), step_diff[3:6]) # day/hour/minute/second changes so Day is the cycle + Day(1), Day(step_diff[3]) + else + # There is no full cycle, but set it to 1 year for read/write round trip of the climatology bounds. + # It will have no effect on selectors. + Year(1), Year(step_diff[1]) end - i = findfirst(d -> first(d) > 0, stepdiff) - step = periodtypes[i](stepdiff[i][1]) - # Find the total time span - bounds = extrema(climatology_bounds) - data = collect(dim_var) - order = LA.orderof(data) - intervalbounds = climatology_bounds - @show intervalbounds - intervalbounds[2, :] .-= period - sampling = Intervals(Center()) - span = Explicit(intervalbounds) - lookup = Cyclic(data; cycle, order, sampling, span)#, bounds) + + # Define lookup traits and values + data = ndims(dim_var) == 0 ? [dim_var[]] : collect(dim_var) + order = ForwardOrdered() # CF default ? + sampling = Intervals(Center()) # CF default + bounds = climatology_bounds[begin], climatology_bounds[end] + # Reuse the vector but subtract duration from upper bounds + # This converts climatology bounds to normal bounds + climatology_bounds[2, :] .-= duration + span = Explicit(climatology_bounds) + lookup = Cyclic(data; cycle, order, sampling, span, bounds) + + # Detect dimention D = _cdm_dimtype(dim_attrs, name) return D(lookup) end @@ -508,18 +556,62 @@ function _char_categorical_dim(ds, dim_var, dim_attrs, name) return D(lookup) end -function _rotated_dim(ds, vars_dict, attrs_dict, dimname, crs) - # Rotations. For now we don't use ArrayLookup - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, dimname) +# Must have "standard_name" in dimension attributes +function _rotated_longitude_latudtude_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) + lookup = _unaligned_lookup(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) + standard_name = _get_attr!(attrs_dict, vars_dict, ds)["standard_name"] + D = if standard_name == "grid_latitude" + Y + elseif standard_name == "grid_longitude" + X + else + error("standard_name $standard_name not recognized") + end + return D(lookup) +end +# Must have "degrees_north" and "degrees_east" in coordinate units +function _unalligned_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) + dim_attrs = _get_attr!(attrs_dict, vars_dict, ds, dimname) D = _cdm_dimtype(dim_attrs, dimname) - lookup = _cdm_lookup(ds, dim_attrs, dimname, D, crs) + lookup = _unaligned_lookup(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, D, crs) return D(lookup) end +function _unaligned_lookup(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, D, crs) + # Rotations. For now we don't use ArrayLookup + lon_key = lat_key = "" + for coord_name in dimension_coord_names + coord_attrs = _get_attr!(attrs_dict, vars_dict, ds, coord_name) + units = get(coord_attrs, "units", nothing) + if units == "degrees_north" + lat_key = coord_name + elseif units == "degrees_east" + lon_key = coord_name + end + end + lat_metadata, lon_metadata = map((lat_key, lon_key)) do key + _metadatadict(sourcetrait(ds), _get_attr!(attrs_dict, vars_dict, ds, key)) + end + dims = Lat(AutoValues(); metadata=lat_metadata), + Lon(AutoValues(); metadata=lon_metadata) + data = if haskey(ds, dimname) + _get_attr!(attrs_dict, vars_dict, ds, dimname) + else + Base.OneTo(CDM.dim(ds, dimname)) + end + if D <: X + matrix = collect(_get_var!(vars_dict, ds, lon_key)) + return ArrayLookup(matrix; data, dims) + elseif D == Y + matrix = collect(_get_var!(vars_dict, ds, lat_key)) + return ArrayLookup(matrix; data, dims) + end +end + function _merged_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) # Generate a MergedLookup for multi-coordinate dimensions dims = map(dimension_coord_names) do d - dim_attrs = _get_attr!(ds, vars_dict, attrs_dict, d) + dim_attrs = _get_attr!(attrs_dict, vars_dict, ds, d) D = _cdm_dimtype(dim_attrs, d) lookup = _cdm_lookup(ds, dim_attrs, d, D, crs) D(lookup) @@ -1012,12 +1104,12 @@ function _grid_mapping_keys(grid_mapping_key::String) end # Get a variable and store it in a dict -_get_var!(ds, dict, key) = get!(() -> CDM.variable(ds, key), dict, key) +_get_var!(dict::AbstractDict, ds, key::String) = get!(() -> CDM.variable(ds, key), dict, key) # Get atributes and store them in a dict -function _get_attr!(ds, var_dict, attr_dict, key) +function _get_attr!(attr_dict::AbstractDict, var_dict::AbstractDict, ds, key::String) get!(attr_dict, key) do - var = _get_var!(ds, var_dict, key) + var = _get_var!(var_dict, ds, key) CDM.attribs(var) end end @@ -1082,7 +1174,7 @@ function _raster(ds::CDM.AbstractDataset; Array(modvar) end # Generate dims - dims_out = isnokw(dims) ? DD.dims(Tuple(values(dim_dict)), layerdims_vec[1]) : format(dims, data_out) + dims_out = isnokw(dims) ? DD.dims(Tuple(values(dim_dict)), layerdims_vec[1]) : dims # Return the data to the parent function mv_outer = _outer_missingval(mod) # Use name or an empty Symbol diff --git a/test/cf/2_1_string_variable_representation.ncgen b/test/cf/2_1_string_variable_representation.ncgen index 5866f536f..a04a5cac8 100644 --- a/test/cf/2_1_string_variable_representation.ncgen +++ b/test/cf/2_1_string_variable_representation.ncgen @@ -1,4 +1,4 @@ -netcdf string_variable_representation { +netcdf 2_1_string_variable_representation { dimensions: strings = 30 ; strlen = 10 ; @@ -7,4 +7,4 @@ variables: char_variable:long_name = "strings of type char" ; string str_variable(strings) ; str_variable:long_name = "strings of type string" ; -} \ No newline at end of file +} diff --git a/test/cf/3_1_units_metadata.ncgen b/test/cf/3_1_units_metadata.ncgen index 74e1bca5e..6ad330ad1 100644 --- a/test/cf/3_1_units_metadata.ncgen +++ b/test/cf/3_1_units_metadata.ncgen @@ -1,4 +1,4 @@ -netcdf units_metadata { +netcdf 3_1_units_metadata { variables: float Tonscale; Tonscale:long_name="global-mean surface temperature"; @@ -12,4 +12,4 @@ variables: Tdifference:units="degC"; Tdifference:units_metadata="temperature: difference"; Tdifference:cell_methods="area: mean"; -} \ No newline at end of file +} diff --git a/test/cf/5_10_british_national_grid.ncgen b/test/cf/5_10_british_national_grid.ncgen index 682ce7d0a..2dda1d59f 100644 --- a/test/cf/5_10_british_national_grid.ncgen +++ b/test/cf/5_10_british_national_grid.ncgen @@ -1,4 +1,4 @@ -netcdf british_national_grid { +netcdf 5_10_british_national_grid { dimensions: z = 100; y = 100000 ; @@ -48,4 +48,4 @@ dimensions: crsWGS84:longitude_of_prime_meridian = 0.0 ; crsWGS84:semi_major_axis = 6378137.0 ; crsWGS84:inverse_flattening = 298.257223563 ; -} \ No newline at end of file +} diff --git a/test/cf/5_11_wgs84_crs_wkt.ncgen b/test/cf/5_11_wgs84_crs_wkt.ncgen index 65650401c..29cd1e216 100644 --- a/test/cf/5_11_wgs84_crs_wkt.ncgen +++ b/test/cf/5_11_wgs84_crs_wkt.ncgen @@ -1,4 +1,4 @@ -netcdf wgs84_datum_crs_wkt { +netcdf 5_11_wgs84_datum_crs_wkt { dimensions: lat = 18 ; lon = 36 ; @@ -15,4 +15,4 @@ variables: crs:semi_major_axis = 6378137.0 ; crs:inverse_flattening = 298.257223563 ; crs:crs_wkt = "GEODCRS[\"WGS 84\", DATUM[\"World Geodetic System 1984\", ELLIPSOID[\"WGS 84\",6378137,298.257223563, LENGTHUNIT[\"metre\",1.0]]], PRIMEM[\"Greenwich\",0], CS[ellipsoidal,3], AXIS[\"(lat)\",north,ANGLEUNIT[\"degree\",0.0174532925199433]], AXIS[\"(lon)\",east,ANGLEUNIT[\"degree\",0.0174532925199433]], AXIS[\"ellipsoidal height (h)\",up,LENGTHUNIT[\"metre\",1.0]]]" ; -} \ No newline at end of file +} diff --git a/test/cf/5_14_multiple_forecasts_from_a_single_analysis.ncgen b/test/cf/5_14_multiple_forecasts_from_a_single_analysis.ncgen index cc421a4a0..98a437dc6 100644 --- a/test/cf/5_14_multiple_forecasts_from_a_single_analysis.ncgen +++ b/test/cf/5_14_multiple_forecasts_from_a_single_analysis.ncgen @@ -1,4 +1,4 @@ -netcdf multiple_forecasts_from_a_single_analysis { +netcdf 5_14_multiple_forecasts_from_a_single_analysis { dimensions: lat = 180 ; lon = 360 ; @@ -29,4 +29,4 @@ data: time = 6., 12., 18., 24. ; atime = 0. ; p500 = 500. ; -} \ No newline at end of file +} diff --git a/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen b/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen index b6992d004..8f299e62a 100644 --- a/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen +++ b/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen @@ -1,4 +1,4 @@ -netcdf domain_with_independent_coordinate_variables { +netcdf 5_15_domain_with_independent_coordinate_variables { dimensions: lat = 18 ; lon = 36 ; @@ -21,4 +21,4 @@ variables: double time(time) ; time:long_name = "time" ; time:units = "days since 1990-1-1 0:0:0" ; -} \ No newline at end of file +} diff --git a/test/cf/5_16_domain_with_rotaed_pole_and_scalar_coordinate_variable.ncgen b/test/cf/5_16_domain_with_rotaed_pole_and_scalar_coordinate_variable.ncgen index 749e5cb83..e278b6108 100644 --- a/test/cf/5_16_domain_with_rotaed_pole_and_scalar_coordinate_variable.ncgen +++ b/test/cf/5_16_domain_with_rotaed_pole_and_scalar_coordinate_variable.ncgen @@ -1,4 +1,4 @@ -netcdf domain_with_rotated_pole_and_scalar_coordinate_variable { +netcdf 5_16_domain_with_rotated_pole_and_scalar_coordinate_variable { dimensions: rlon = 128 ; rlat = 64 ; @@ -33,4 +33,4 @@ variables: float lat(rlat,rlon) ; lat:long_name = "latitude" ; lat:units = "degrees_north" ; -} \ No newline at end of file +} diff --git a/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen b/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen index e0af4c348..902f2abdf 100644 --- a/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen +++ b/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen @@ -1,4 +1,4 @@ -netcdf a_domain_with_no_specific_dimensions { +netcdf 5_18_a_domain_with_no_specific_dimensions { dimensions: variables: @@ -9,4 +9,4 @@ variables: double t ; t:standard_name = "time" ; t:units = "days since 2021-01-01" ; -} \ No newline at end of file +} diff --git a/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen b/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen index 3a6531fc5..7b4c1e0d3 100644 --- a/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen +++ b/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen @@ -1,4 +1,4 @@ -netcdf a_domain_containing_a_timeseries_geometry { +netcdf 5_19_a_domain_containing_a_timeseries_geometry { dimensions: instance = 2 ; node = 5 ; @@ -37,4 +37,4 @@ variables: y:units = "degrees_north" ; y:standard_name = "latitude" ; y:axis = "Y" ; -} \ No newline at end of file +} diff --git a/test/cf/5_1_independent_coordinate_variables.ncgen b/test/cf/5_1_independent_coordinate_variables.ncgen index 6a125a0d9..a03b974b8 100644 --- a/test/cf/5_1_independent_coordinate_variables.ncgen +++ b/test/cf/5_1_independent_coordinate_variables.ncgen @@ -1,4 +1,4 @@ -netcdf independent_coordinate_variables { +netcdf 5_1_independent_coordinate_variables { dimensions: lat = 18 ; lon = 36 ; diff --git a/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen b/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen index 5738277fd..ac641dd46 100644 --- a/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen +++ b/test/cf/5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation.ncgen @@ -1,4 +1,4 @@ -netcdf a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation { +netcdf 5_20_a_domain_containing_a_timeseries_of_station_data_in_the_indexed_ragged_array_representation { dimensions: station = 23 ; obs = UNLIMITED ; diff --git a/test/cf/5_21_a_two_dimensional_UGRID_mesh_topology_variable.ncgen b/test/cf/5_21_a_two_dimensional_UGRID_mesh_topology_variable.ncgen index f179b0c05..5f2d98b84 100644 --- a/test/cf/5_21_a_two_dimensional_UGRID_mesh_topology_variable.ncgen +++ b/test/cf/5_21_a_two_dimensional_UGRID_mesh_topology_variable.ncgen @@ -1,4 +1,4 @@ -netcdf a_two_dimensional_UGRID_mesh_topology_variable { +netcdf 5_21_a_two_dimensional_UGRID_mesh_topology_variable { dimensions: node = 5 ; // Number of mesh nodes edge = 6 ; // Number of mesh edges diff --git a/test/cf/5_2_two_dimensional_coordinate_variables.ncgen b/test/cf/5_2_two_dimensional_coordinate_variables.ncgen index 098b5b5db..d5ef7b937 100644 --- a/test/cf/5_2_two_dimensional_coordinate_variables.ncgen +++ b/test/cf/5_2_two_dimensional_coordinate_variables.ncgen @@ -1,4 +1,4 @@ -netcdf two_dimensional_coordinate_variables { +netcdf 5_2_two_dimensional_coordinate_variables { dimensions: xc = 128 ; yc = 64 ; diff --git a/test/cf/5_3_reduced_horizontal_grid.ncgen b/test/cf/5_3_reduced_horizontal_grid.ncgen index 460f6191b..f2e9ff712 100644 --- a/test/cf/5_3_reduced_horizontal_grid.ncgen +++ b/test/cf/5_3_reduced_horizontal_grid.ncgen @@ -1,4 +1,4 @@ -netcdf reduced_horizontal_grid { +netcdf 5_3_reduced_horizontal_grid { dimensions: londim = 128 ; latdim = 64 ; diff --git a/test/cf/5_6_rotated_pole_grid.ncgen b/test/cf/5_6_rotated_pole_grid.ncgen index 00386ce03..9ea510e02 100644 --- a/test/cf/5_6_rotated_pole_grid.ncgen +++ b/test/cf/5_6_rotated_pole_grid.ncgen @@ -1,4 +1,4 @@ -netcdf rotated_pole_grid { +netcdf 5_6_rotated_pole_grid { dimensions: rlon = 128 ; rlat = 64 ; @@ -30,4 +30,4 @@ variables: float lat(rlat,rlon) ; lat:long_name = "latitude" ; lat:units = "degrees_north" ; -} \ No newline at end of file +} diff --git a/test/cf/5_8_spherical_earth.ncgen b/test/cf/5_8_spherical_earth.ncgen index f708c891f..9674d9b4f 100644 --- a/test/cf/5_8_spherical_earth.ncgen +++ b/test/cf/5_8_spherical_earth.ncgen @@ -1,4 +1,4 @@ -netcdf spherical_earth { +netcdf 5_8_spherical_earth { dimensions: lat = 18 ; lon = 36 ; @@ -13,4 +13,4 @@ variables: crs:grid_mapping_name = "latitude_longitude" ; crs:semi_major_axis = 6371000.0 ; crs:inverse_flattening = 0 ; -} \ No newline at end of file +} diff --git a/test/cf/5_9_wgs84.ncgen b/test/cf/5_9_wgs84.ncgen index fad666aea..9c16b78a4 100644 --- a/test/cf/5_9_wgs84.ncgen +++ b/test/cf/5_9_wgs84.ncgen @@ -1,4 +1,4 @@ -netcdf wgs84_datum { +netcdf 5_9_wgs84_datum { dimensions: lat = 18 ; lon = 36 ; @@ -14,4 +14,4 @@ variables: crs:longitude_of_prime_meridian = 0.0 ; crs:semi_major_axis = 6378137.0 ; crs:inverse_flattening = 298.257223563 ; -} \ No newline at end of file +} diff --git a/test/cf/6_1_2_taxon_names_and_identifyers.ncgen b/test/cf/6_1_2_taxon_names_and_identifyers.ncgen index 2552569e6..11fdd4d7c 100644 --- a/test/cf/6_1_2_taxon_names_and_identifyers.ncgen +++ b/test/cf/6_1_2_taxon_names_and_identifyers.ncgen @@ -1,4 +1,4 @@ -netcdf taxon_names_and_identifiers { +netcdf 6_1_2_taxon_names_and_identifiers { dimensions: time = 100 ; string80 = 80 ; @@ -19,4 +19,4 @@ data: abundance = 0.12767112, 0.3077098, 0.98925537, 0.39649677, 0.7229494, 0.3014989, 0.322784, 0.29285616, 0.25662208, 0.117831886, 0.9196425, 0.11931813, 0.1264885, 0.5113293, 0.89028376, 0.72099334, 0.5193584, 0.92186993, 0.6823397, 0.007336974, 0.14244181, 0.6244344, 0.19768023, 0.44542485, 0.47169906, 0.3926025, 0.71931195, 0.30820572, 0.014529705, 0.30569047, 0.049314797, 0.31771177, 0.47943252, 0.37399697, 0.58730066, 0.9361459, 0.26961315, 0.19459844, 0.3907408, 0.37988317, 0.044382393, 0.42903543, 0.3377753, 0.37569588, 0.32771045, 0.8257987, 0.21826279, 0.79244465, 0.0014693141, 0.7483437, 0.9131109, 0.24773198, 0.8663353, 0.2083258, 0.6436682, 0.09799868, 0.5177543, 0.3281548, 0.6612102, 0.23962152, 0.28922492, 0.8494298, 0.67576736, 0.59139305, 0.5834294, 0.8025192, 0.5553859, 0.79422903, 0.30486482, 0.37530547, 0.43150902, 0.8500043, 0.47215062, 0.2889346, 0.7384416, 0.8205062, 0.9501508, 0.9709469, 0.81786615, 0.66687894, 0.13149667, 0.65388376, 0.04085481, 0.13509172, 0.6391254, 0.374403, 0.45044667, 0.5265882, 0.70337373, 0.8857227, 0.6504841, 0.70418215, 0.4939022, 0.46103948, 0.1943444, 0.21802491, 0.05313009, 0.64020365, 0.2583719, 0.5843945, 0.96909535, 0.8323412, 0.9642344, 0.06428528, 0.66111577, 0.5730727, 0.177504, 0.39409912, 0.13461357, 0.5491819, 0.2750708, 0.08223128, 0.81467575, 0.363693, 0.016909182, 0.85852563, 0.21432716, 0.32096034, 0.07471895, 0.9952607, 0.7406225, 0.6962306, 0.27097827, 0.026257396, 0.6612402, 0.6620196, 0.6634072, 0.59134126, 0.62234384, 0.6152409, 0.05558741, 0.46606535, 0.7694198, 0.13313901, 0.7848925, 0.67907256, 0.91177624, 0.20892179, 0.60586435, 0.4054733, 0.98118675, 0.38457257, 0.6779362, 0.9585549, 0.21006352, 0.43963993, 0.10247308, 0.6907456, 0.459664, 0.73598236, 0.57579696, 0.67871845, 0.5636268, 0.5426653, 0.015122354, 0.02252096, 0.799381, 0.68344504, 0.69040406, 0.7290883, 0.25275213, 0.28130805, 0.99878895, 0.25900894, 0.71899116, 0.441096, 0.9315588, 0.96927273, 0.07765621, 0.8100117, 0.45869052, 0.38682938, 0.9177527, 0.08623397, 0.18650591, 0.5276618, 0.41039044, 0.1895529, 0.75130725, 0.40061736, 0.2390588, 0.794543, 0.436378, 0.54052705, 0.6319656, 0.6019509, 0.74456435, 0.4374414, 0.8458282, 0.7159625, 0.4986195, 0.33087182, 0.5789556, 0.7105424, 0.01516813, 0.4926042, 0.6438956, 0.035016358, 0.86199844, 0.9942962 ; taxon_name = "Calanus finmarchicus", "Calanus helgolandicus" ; taxon_lsid = "urn:lsid:marinespecies.org:taxname:104464", "urn:lsid:marinespecies.org:taxname:104466" ; -} \ No newline at end of file +} diff --git a/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen b/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen index 1e6f39188..2ba108eac 100644 --- a/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen +++ b/test/cf/6_1_northward_heat_transport_in_atlantic_ocean.ncgen @@ -1,13 +1,13 @@ -netcdf northward_heat_transport_in_atlantic_ocean { +netcdf 6_1_northward_heat_transport_in_atlantic_ocean { dimensions: time = 4 ; lat = 5 ; lbl = 1 ; variables: float n_heat_transport(time,lat,lbl); - n_heat_transport:units="W"; - n_heat_transport:coordinates="geo_region"; - n_heat_transport:standard_name="northward_ocean_heat_transport"; + n_heat_transport:units = "W"; + n_heat_transport:coordinates = "geo_region"; + n_heat_transport:standard_name = "northward_ocean_heat_transport"; double time(time) ; time:long_name = "time" ; time:units = "days since 1990-1-1 0:0:0" ; diff --git a/test/cf/6_2_model_level_numbers.ncgen b/test/cf/6_2_model_level_numbers.ncgen index 88862a666..8a01acc0b 100644 --- a/test/cf/6_2_model_level_numbers.ncgen +++ b/test/cf/6_2_model_level_numbers.ncgen @@ -1,4 +1,4 @@ -netcdf model_level_numbers { +netcdf 6_2_model_level_numbers { dimensions: sigma=5; lat=30; diff --git a/test/cf/7_10_decadal_averages_for_january.ncgen b/test/cf/7_10_decadal_averages_for_january.ncgen index dd5264795..429d378ca 100644 --- a/test/cf/7_10_decadal_averages_for_january.ncgen +++ b/test/cf/7_10_decadal_averages_for_january.ncgen @@ -1,4 +1,4 @@ -netcdf decadal_averages_for_january { +netcdf 7_10_decadal_averages_for_january { dimensions: time=3; nv=2; @@ -14,8 +14,12 @@ variables: time:units="days since 1901-1-1"; double climatology_bounds(time,nv); data: // time coordinates translated to datetime format - time="1965-1-15", "1975-1-15", "1985-1-15" ; - climatology_bounds="1961-1-1", "1970-2-1", - "1971-1-1", "1980-2-1", - "1981-1-1", "1990-2-1" ; -} \ No newline at end of file + // time="1965-1-15", "1975-1-15", "1985-1-15" ; + time=23390, 27042, 30695 ; + // climatology_bounds="1961-1-1", "1970-2-1", + // "1971-1-1", "1980-2-1", + // "1981-1-1", "1990-2-1" ; + climatology_bounds=21915, 25233, + 25567, 28885, + 29220, 32538 ; +} diff --git a/test/cf/7_11_temperature_for_each_hour_of_average_day.ncgen b/test/cf/7_11_temperature_for_each_hour_of_average_day.ncgen index 1128af0a7..d7a7ebf98 100644 --- a/test/cf/7_11_temperature_for_each_hour_of_average_day.ncgen +++ b/test/cf/7_11_temperature_for_each_hour_of_average_day.ncgen @@ -1,6 +1,6 @@ -netcdf temperature_for_each_hour_of_average_day { +netcdf 7_11_temperature_for_each_hour_of_average_day { dimensions: - time=4; + time=24; nv=2; lat=10; // added lon=10; // added @@ -13,31 +13,35 @@ variables: time:climatology="climatology_bounds"; time:units="hours since 1997-4-1"; double climatology_bounds(time,nv); -data: // time coordinates translated to datetime format - time="1997-4-1 0:30", "1997-4-1 1:30", "1997-4-1 2:30", "1997-4-1 3:30", "1997-4-1 4:30", "1997-4-1 5:30", "1997-4-1 6:30", "1997-4-1 7:30", "1997-4-1 8:30", "1997-4-1 9:30", "1997-4-1 10:30", "1997-4-1 11:30", "1997-4-1 12:30", "1997-4-1 13:30", "1997-4-1 14:30", "1997-4-1 15:30", "1997-4-1 16:30", "1997-4-1 17:30", "1997-4-1 18:30", "1997-4-1 19:30", "1997-4-1 20:30", "1997-4-1 21:30", "1997-4-1 22:30", "1997-4-1 23:30"; - climatology_bounds="1997-4-1 0:00", "1997-4-30 1:00", - "1997-4-1 1:00", "1997-4-30 2:00", - "1997-4-1 2:00", "1997-4-30 3:00", - "1997-4-1 3:00", "1997-4-30 4:00", - "1997-4-1 4:00", "1997-4-30 5:00", - "1997-4-1 5:00", "1997-4-30 6:00", - "1997-4-1 6:00", "1997-4-30 7:00", - "1997-4-1 7:00", "1997-4-30 8:00", - "1997-4-1 8:00", "1997-4-30 9:00", - "1997-4-1 9:00", "1997-4-30 10:00", - "1997-4-1 10:00", "1997-4-30 11:00", - "1997-4-1 11:00", "1997-4-30 12:00", - "1997-4-1 12:00", "1997-4-30 13:00", - "1997-4-1 13:00", "1997-4-30 14:00", - "1997-4-1 14:00", "1997-4-30 15:00", - "1997-4-1 15:00", "1997-4-30 16:00", - "1997-4-1 16:00", "1997-4-30 17:00", - "1997-4-1 17:00", "1997-4-30 18:00", - "1997-4-1 18:00", "1997-4-30 19:00", - "1997-4-1 19:00", "1997-4-30 20:00", - "1997-4-1 20:00", "1997-4-30 21:00", - "1997-4-1 21:00", "1997-4-30 22:00", - "1997-4-1 22:00", "1997-4-30 23:00", - "1997-4-1 23:00", "1997-4-30 24:00" ; +data: + // time="1997-4-1 0:30" .. "1997-4-1 23:30"; + time=0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5 ; + // climatology_bounds="1997-4-1 0:00", "1997-4-30 1:00", + // ... + // "1997-4-1 23:00", "1997-5-1 24:00" ; + climatology_bounds=0.0, 721.0, + 1.0, 722.0, + 2.0, 723.0, + 3.0, 724.0, + 4.0, 725.0, + 5.0, 726.0, + 6.0, 727.0, + 7.0, 728.0, + 8.0, 729.0, + 9.0, 730.0, + 10.0, 731.0, + 11.0, 732.0, + 12.0, 733.0, + 13.0, 734.0, + 14.0, 735.0, + 15.0, 736.0, + 16.0, 737.0, + 17.0, 738.0, + 18.0, 739.0, + 19.0, 740.0, + 20.0, 741.0, + 21.0, 742.0, + 22.0, 743.0, + 23.0, 744.0 ; +} -} \ No newline at end of file diff --git a/test/cf/7_12_extreme_statistics_and_spell_length.ncgen b/test/cf/7_12_extreme_statistics_and_spell_length.ncgen index 5d9365bd2..f696297b9 100644 --- a/test/cf/7_12_extreme_statistics_and_spell_length.ncgen +++ b/test/cf/7_12_extreme_statistics_and_spell_length.ncgen @@ -1,4 +1,4 @@ -netcdf extreme_statistics_and_spell_length { +netcdf 7_12_extreme_statistics_and_spell_length { dimensions: time=1; // added length 1 time dimension lat=10; // added @@ -21,7 +21,9 @@ variables: time:units="days since 2000-6-1"; double climatology_bounds(time,nv); data: // time coordinates translated to datetime format - time="2008-1-16 6:00"; - climatology_bounds="2007-12-1 6:00", "2008-3-1 6:00"; - threshold=0.; -} \ No newline at end of file + // time="2008-1-16 6:00"; + time=2754.25; + // climatology_bounds="2007-12-1 6:00", "2008-3-1 6:00"; + climatology_bounds=2739.25, 2830.25; + threshold=0.0; +} diff --git a/test/cf/7_13_temperature_for_each_hour_of_typical_climatological_day copy.ncgen b/test/cf/7_13_temperature_for_each_hour_of_typical_climatological_day copy.ncgen index f2eaa750a..d22b95348 100644 --- a/test/cf/7_13_temperature_for_each_hour_of_typical_climatological_day copy.ncgen +++ b/test/cf/7_13_temperature_for_each_hour_of_typical_climatological_day copy.ncgen @@ -1,4 +1,4 @@ -netcdf temperature_for_each_hour_of_typical_climatological_day { +netcdf 7_13_temperature_for_each_hour_of_typical_climatological_day { dimensions: time=24; nv=2; @@ -15,29 +15,31 @@ variables: time:units="days since 1961-1-1"; double climatology_bounds(time,nv); data: // time coordinates translated to datetime format - time="1961-4-1 0:30", "1961-4-1 1:30", "1961-4-1 2:30", "1961-4-1 3:30", "1961-4-1 4:30", "1961-4-1 5:30", "1961-4-1 6:30", "1961-4-1 7:30", "1961-4-1 8:30", "1961-4-1 9:30", "1961-4-1 10:30", "1961-4-1 11:30", "1961-4-1 12:30", "1961-4-1 13:30", "1961-4-1 14:30", "1961-4-1 15:30", "1961-4-1 16:30", "1961-4-1 17:30", "1961-4-1 18:30", "1961-4-1 19:30", "1961-4-1 20:30", "1961-4-1 21:30", "1961-4-1 22:30", "1961-4-1 23:30" ; - climatology_bounds="1961-4-1 0:00", "1990-4-30 1:00", - "1961-4-1 1:00", "1990-4-30 2:00", - "1961-4-1 2:00", "1990-4-30 3:00", - "1961-4-1 3:00", "1990-4-30 4:00", - "1961-4-1 4:00", "1990-4-30 5:00", - "1961-4-1 5:00", "1990-4-30 6:00", - "1961-4-1 6:00", "1990-4-30 7:00", - "1961-4-1 7:00", "1990-4-30 8:00", - "1961-4-1 8:00", "1990-4-30 9:00", - "1961-4-1 9:00", "1990-4-30 10:00", - "1961-4-1 10:00", "1990-4-30 11:00", - "1961-4-1 11:00", "1990-4-30 12:00", - "1961-4-1 12:00", "1990-4-30 13:00", - "1961-4-1 13:00", "1990-4-30 14:00", - "1961-4-1 14:00", "1990-4-30 15:00", - "1961-4-1 15:00", "1990-4-30 16:00", - "1961-4-1 16:00", "1990-4-30 17:00", - "1961-4-1 17:00", "1990-4-30 18:00", - "1961-4-1 18:00", "1990-4-30 19:00", - "1961-4-1 19:00", "1990-4-30 20:00", - "1961-4-1 20:00", "1990-4-30 21:00", - "1961-4-1 21:00", "1990-4-30 22:00", - "1961-4-1 22:00", "1990-4-30 23:00", - "1961-4-1 23:00", "1990-5-1 0:00" ; -} \ No newline at end of file + // time="1961-4-1 0:30" ... "1961-4-1 23:30" ; + time=90.02083333333333, 90.0625, 90.10416666666667, 90.14583333333333, 90.1875, 90.22916666666667, 90.27083333333333, 90.3125, 90.35416666666667, 90.39583333333333, 90.4375, 90.47916666666667, 90.52083333333333, 90.5625, 90.60416666666667, 90.64583333333333, 90.6875, 90.72916666666667, 90.77083333333333, 90.8125, 90.85416666666667, 90.89583333333333, 90.9375, 90.97916666666667 ; + climatology_bounds=90.0 , 10711.041666666666, + 90.041666666666, 10711.083333333334, + 90.083333333334, 10711.125 , + 90.125 , 10711.166666666666, + 90.166666666666, 10711.208333333334, + 90.208333333334, 10711.25 , + 90.25 , 10711.291666666666, + 90.291666666666, 10711.333333333334, + 90.333333333334, 10711.375 , + 90.375 , 10711.416666666666, + 90.416666666666, 10711.458333333334, + 90.458333333334, 10711.5 , + 90.5 , 10711.541666666666, + 90.541666666666, 10711.583333333334, + 90.583333333334, 10711.625 , + 90.625 , 10711.666666666666, + 90.666666666666, 10711.708333333334, + 90.708333333334, 10711.75 , + 90.75 , 10711.791666666666, + 90.791666666666, 10711.833333333334, + 90.833333333334, 10711.875 , + 90.875 , 10711.916666666666, + 90.916666666666, 10711.958333333334, + 90.958333333334, 10712.0 ; +} + diff --git a/test/cf/7_14_monthly_maximum_daily_precipitation_totals.ncgen b/test/cf/7_14_monthly_maximum_daily_precipitation_totals.ncgen index 66aed4f73..9cb7486dd 100644 --- a/test/cf/7_14_monthly_maximum_daily_precipitation_totals.ncgen +++ b/test/cf/7_14_monthly_maximum_daily_precipitation_totals.ncgen @@ -1,4 +1,4 @@ -netcdf monthly_maximum_daily_precipitation_totals { +netcdf 7_14_monthly_maximum_daily_precipitation_totals { dimensions: time=3; nv=2; @@ -14,8 +14,12 @@ variables: time:units="days since 2000-6-1"; double climatology_bounds(time,nv); data: // time coordinates translated to datetime format - time="2000-6-16", "2000-7-16", "2000-8-16" ; - climatology_bounds="2000-6-1 6:00:00", "2000-7-1 6:00:00", - "2000-7-1 6:00:00", "2000-8-1 6:00:00", - "2000-8-1 6:00:00", "2000-9-1 6:00:00" ; -} \ No newline at end of file + // time="2000-6-16", "2000-7-16", "2000-8-16" ; + time=15, 45, 76 ; + // climatology_bounds="2000-6-1 6:00:00", "2000-7-1 6:00:00", + // "2000-7-1 6:00:00", "2000-8-1 6:00:00", + // "2000-8-1 6:00:00", "2000-9-1 6:00:00" ; + climatology_bounds=0, 30, + 30, 61, + 61, 92; +} diff --git a/test/cf/7_15_timeseries_with_geometry.ncgen b/test/cf/7_15_timeseries_with_geometry.ncgen index d080cb2c9..eda609bc5 100644 --- a/test/cf/7_15_timeseries_with_geometry.ncgen +++ b/test/cf/7_15_timeseries_with_geometry.ncgen @@ -1,4 +1,4 @@ -netcdf timeseries_with_geometry { +netcdf 7_15_timeseries_with_geometry { dimensions: instance = 2 ; node = 5 ; @@ -48,4 +48,4 @@ data: node_count = 3, 2 ; x = 30, 10, 40, 50, 50 ; y = 10, 30, 40, 60, 50 ; -} \ No newline at end of file +} diff --git a/test/cf/7_16_polygons_with_holes.ncgen b/test/cf/7_16_polygons_with_holes.ncgen index 25001beb3..3c4487849 100644 --- a/test/cf/7_16_polygons_with_holes.ncgen +++ b/test/cf/7_16_polygons_with_holes.ncgen @@ -1,4 +1,4 @@ -netcdf polygons_with_holes { +netcdf 7_16_polygons_with_holes { dimensions: node = 12 ; instance = 2 ; @@ -57,4 +57,4 @@ data: someData = 1, 2, 3, 4, 1, 2, 3, 4 ; -} \ No newline at end of file +} diff --git a/test/cf/7_2_cells_in_a_non-latitude-longitude_horizontal_grid.ncgen b/test/cf/7_2_cells_in_a_non-latitude-longitude_horizontal_grid.ncgen index cb24cbe3f..35a2b3131 100644 --- a/test/cf/7_2_cells_in_a_non-latitude-longitude_horizontal_grid.ncgen +++ b/test/cf/7_2_cells_in_a_non-latitude-longitude_horizontal_grid.ncgen @@ -1,7 +1,7 @@ -netcdf cells_in_a_non-latitude-longitude_horizontal_grid { +netcdf 7_2_cells_in_a_non-latitude-longitude_horizontal_grid { dimensions: - imax = 128; - jmax = 64; + imax = 2; + jmax = 4; nv = 4; variables: float lat(jmax,imax); @@ -12,6 +12,9 @@ variables: lon:long_name = "longitude"; lon:units = "degrees_east"; lon:bounds = "lon_bnds"; + float dat(jmax,imax); + dat:coordinates = "lon lat"; float lat_bnds(jmax,imax,nv); float lon_bnds(jmax,imax,nv); -} \ No newline at end of file +} + diff --git a/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen b/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen index 06a2e172b..c7e70749d 100644 --- a/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen +++ b/test/cf/7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds.ncgen @@ -1,4 +1,4 @@ -netcdf specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds { +netcdf 7_3_specifying_formula_terms_when_a_parametric_coordinate_variable_has_bounds { dimensions: eta=5; lat=10; diff --git a/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen b/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen index 41846cbb4..1d814b7c5 100644 --- a/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen +++ b/test/cf/7_4_cell_areas_for_a_spherical_geodesic_grid.ncgen @@ -1,4 +1,4 @@ -netcdf cell_areas_for_a_spherical_geodesic_grid { +netcdf 7_4_cell_areas_for_a_spherical_geodesic_grid { dimensions: cell = 2562 ; // number of grid cells time = 12 ; diff --git a/test/cf/7_5_methods_applied_to_a_timeseries.ncgen b/test/cf/7_5_methods_applied_to_a_timeseries.ncgen index 6de58d848..972129369 100644 --- a/test/cf/7_5_methods_applied_to_a_timeseries.ncgen +++ b/test/cf/7_5_methods_applied_to_a_timeseries.ncgen @@ -1,4 +1,4 @@ -netcdf methods_applied_to_a_timeseries { +netcdf 7_5_methods_applied_to_a_timeseries { dimensions: time = UNLIMITED; // (5 currently) station = 10; @@ -24,4 +24,4 @@ variables: data: time = 0., 12., 24., 36., 48.; time_bnds = -12.,0., 0.,12., 12.,24., 24.,36., 36.,48.; -} \ No newline at end of file +} diff --git a/test/cf/7_6_surface_air_temperature_variance.ncgen b/test/cf/7_6_surface_air_temperature_variance.ncgen index 94c6641db..0ee6e8c1e 100644 --- a/test/cf/7_6_surface_air_temperature_variance.ncgen +++ b/test/cf/7_6_surface_air_temperature_variance.ncgen @@ -1,4 +1,4 @@ -netcdf surface_air_temperature_variance { +netcdf 7_6_surface_air_temperature_variance { dimensions: lat=90; lon=180; diff --git a/test/cf/7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea.ncgen b/test/cf/7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea.ncgen index 3fb4364b3..f11ccccb8 100644 --- a/test/cf/7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea.ncgen +++ b/test/cf/7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea.ncgen @@ -1,4 +1,4 @@ -netcdf mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea { +netcdf 7_7_mean_surface_temperature_and_heat_flux_averaged_separately_over_land_and_sea { dimensions: lat=73; lon=96; @@ -14,4 +14,4 @@ variables: land_sea:standard_name="area_type"; data: land_sea="land","sea"; -} \ No newline at end of file +} diff --git a/test/cf/7_9_climatological_seasons.ncgen b/test/cf/7_9_climatological_seasons.ncgen index d660b9f47..19e3c0fe2 100644 --- a/test/cf/7_9_climatological_seasons.ncgen +++ b/test/cf/7_9_climatological_seasons.ncgen @@ -1,4 +1,4 @@ -netcdf climatological_seasons { +netcdf 7_9_climatological_seasons { dimensions: time=4; nv=2; diff --git a/test/sources/cf.jl b/test/sources/cf.jl index 1703e77ed..eec7ca237 100644 --- a/test/sources/cf.jl +++ b/test/sources/cf.jl @@ -32,7 +32,7 @@ example_paths = map(filter(endswith(".ncgen"), readdir(cfdir))) do input_name output_path end -example_ids = replace.(strip.(first.(split.(basename.(example_paths), r"[A-Za-z]")), '_'), ("_" => ".",)) +example_ids = replace.(Base.strip.(first.(split.(basename.(example_paths), r"[A-Za-z]")), '_'), ("_" => ".",)) examples = OrderedDict(example_ids .=> example_paths) rasterstacks = map(enumerate(example_paths)) do (i, example_path) @@ -45,7 +45,7 @@ end |> OrderedDict # name = splitext(basename(example_path))[1] # name == "5_1_independent_coordinate_variables" && return name => nothing # name => Raster(example_path; lazy=true) -# end +# end # st = last.(rasterstacks)[12] # st = last.(rasterstacks)[14] # refdims(st) @@ -56,8 +56,8 @@ end |> OrderedDict rast = RasterStack(examples["2.1"]) @test rast.char_variable == rast.str_variable # Also works lazily - @test read(RasterStack(examples["2.1"]; lazy=true).char_variable) == - read(RasterStack(examples["2.1"]; lazy=true)).char_variable == + @test read(RasterStack(examples["2.1"]; lazy=true).char_variable) == + read(RasterStack(examples["2.1"]; lazy=true)).char_variable == rast.str_variable # Fails due to DiskArrays.jl size checks during the comparison iteration @test_broken RasterStack(examples["2.1"]; lazy=true).char_variable == rast.str_variable @@ -196,25 +196,46 @@ end end @testset "climatology bounds" begin - # Not implemented rast = RasterStack(examples["7.9"]; lazy=true) - @test rast[Ti=Near(DateTime(1960, 2))] == rast[Ti=1] + @test Rasters.Lookups.cycle(lookup(rast, Ti)) == Year(1) + @test Rasters.bounds(rast, Ti) == (DateTime("1960-03-01T00:00:00"), DateTime("1991-03-01T00:00:00")) + @test span(rast, Ti) == Explicit([ + DateTime("1960-03-01T00:00:00") DateTime("1960-06-01T00:00:00") DateTime("1960-09-01T00:00:00") DateTime("1960-12-01T00:00:00") + DateTime("1960-06-01T00:00:00") DateTime("1960-09-01T00:00:00") DateTime("1960-12-01T00:00:00") DateTime("1961-03-01T00:00:00") + ]) + @test rast[Ti=Near(DateTime(1960, 2))] == rast[Ti=1] @test rast[Ti=At(DateTime(1960, 4, 16))] == rast[Ti=At(DateTime(1970, 4, 16))] == rast[Ti=At(DateTime(1990, 4, 16))] == rast[Ti=1] - # We need to add an `cycle_bounds` keyword or something - # to Cyclic so the cycle itself is bounded over a valid period - @test_broken @test_throws BoundsError rast[Ti=At(DateTime(2000, 4, 16))] - @test_broken @test_throws BoundsError rast[Ti=At(DateTime(1950, 4, 16))] + @test_throws Lookups.SelectorError rast[Ti=At(DateTime(2000, 4, 16))] + @test_throws Lookups.SelectorError rast[Ti=At(DateTime(1950, 4, 16))] # Contains is just broken for Cyclic, this should work - @test_broken rast[Ti=Contains(DateTime(1970, 6, 1))] == rast[Ti=2] - @test_broken rast[Ti=Contains(DateTime(1970, 5, 31))] == rast[Ti=1] + @test rast[Ti=Contains(DateTime(1970, 6, 1))] == rast[Ti=2] + @test rast[Ti=Contains(DateTime(1970, 5, 31))] == rast[Ti=1] - RasterStack(examples["7.10"]; lazy=true) - RasterStack(examples["7.11"]; lazy=true) - RasterStack(examples["7.12"]; lazy=true) - RasterStack(examples["7.13"]; lazy=true) - RasterStack(examples["7.14"]; lazy=true) + rast = RasterStack(examples["7.10"]; lazy=false) + @test Rasters.Lookups.cycle(lookup(rast, Ti)) == Year(1) + @test Rasters.bounds(rast, Ti) == (DateTime("1961-01-01T00:00:00"), DateTime("1990-02-01T00:00:00")) + @test span(rast, Ti) == Explicit([ + DateTime("1961-01-01T00:00:00") DateTime("1971-01-01T00:00:00") DateTime("1981-01-01T00:00:00") + DateTime("1961-02-01T00:00:00") DateTime("1971-02-01T00:00:00") DateTime("1981-02-01T00:00:00") + ]) + # Mix up the array values + @test Rasters.dims2indices(rast, (Ti(At(DateTime(1961, 1, 15))),)) == (:, :, 1) + @test_broken Rasters.dims2indices(rast, (Ti(At(DateTime(1971, 1, 15))),)) == (:, :, 2) + @test_broken Rasters.dims2indices(rast, (Ti(At(DateTime(1981, 1, 15))),)) == (:, :, 3) + rast = RasterStack(examples["7.11"]; lazy=true) + @test Rasters.Lookups.cycle(lookup(rast, Ti)) == Day(1) + @test Rasters.bounds(rast, Ti) == (DateTime("1997-04-01T00:00:00"), DateTime("1997-05-02T00:00:00")) + rast = RasterStack(examples["7.12"]; lazy=true) + @test Rasters.Lookups.cycle(lookup(rast, Ti)) == Year(1) + @test Rasters.bounds(rast, Ti) == (DateTime("2007-12-01T06:00:00"), DateTime("2008-3-01T06:00:00")) + rast = RasterStack(examples["7.13"]; lazy=true) + @test Rasters.Lookups.cycle(lookup(rast, Ti)) == (Year(1), Day(1) => Month(1)) + @test Rasters.bounds(rast, Ti) == (DateTime("1961-04-01T00:00:00"), DateTime("1990-05-01T00:00:00")) + rast = RasterStack(examples["7.14"]; lazy=true) + @test Rasters.bounds(rast, Ti) == (DateTime("2000-06-01T00:00:00"), DateTime("2000-09-01T00:00:00")) + @test Rasters.Lookups.cycle(lookup(rast, Ti)) == Year(1) end @testset "Geometry lookups" begin @@ -233,9 +254,9 @@ end GeoInterface.nring(poly) == 2 GeoInterface.nhole(poly) == 1 @test GeoInterface.getgeom(dims(rast, :Geometry)[1], 1) isa GeoInterface.Polygon - @test collect(GeoInterface.getpoint(dims(rast, :Geometry)[1])) == - [(20.0, 0.0), (10.0, 15.0), (0.0, 0.0), (20.0, 0.0), - (5.0, 5.0), (10.0, 10.0), (15.0, 5.0), (5.0, 5.0), + @test collect(GeoInterface.getpoint(dims(rast, :Geometry)[1])) == + [(20.0, 0.0), (10.0, 15.0), (0.0, 0.0), (20.0, 0.0), + (5.0, 5.0), (10.0, 10.0), (15.0, 5.0), (5.0, 5.0), (20.0, 20.0), (10.0, 35.0), (0.0, 20.0), (20.0, 20.0)] end end From abd10bfa41f02f3fd93b6cbe0ced8031cfcc841d Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 14 Jul 2025 17:40:01 +1000 Subject: [PATCH 39/43] better rotated lookups --- src/sources/commondatamodel.jl | 67 +++++++++------ test/cf/5_10_british_national_grid.ncgen | 103 ++++++++++++----------- test/cf/5_6_rotated_pole_grid.ncgen | 23 +++-- test/sources/cf.jl | 23 ++--- 4 files changed, 123 insertions(+), 93 deletions(-) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 40b7b19a9..b69f8ed25 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -333,20 +333,13 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) layer_dimnames_vec[i] = dimnames = dimnames[2:end] end # Loop over the dimensions of this layer, adding missing dims to dim_dict - output_layerdims_vec[i] = map(dimnames) do dimname + output_layerdims_vec[i] = layerdims = map(dimnames) do dimname get!(dim_dict, dimname) do _cdm_dim( ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs, grid_mapping_key ) end end - unformatted_dims = map(dimnames) do dimname - dim_dict[dimname] - end - formatted_dims = format(unformatted_dims, var) - map(dimnames, formatted_dims) do dimname, d - dim_dict[dimname] = d - end # Find scalar coordinates to use as refdims (5.7) for coord_name in coord_names haskey(refdim_dict, coord_name) && continue @@ -361,7 +354,20 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) refdim_dict[coord_name] = refdim end end - # Add this layer to the output + # Format dimensions + unformatted_dims = map(dimnames) do dimname + dim_dict[dimname] + end + formatvar = if eltype(var) <: Char && length(layerdims) == (ndims(var) - 1) + DiskCharToString(var) + else + var + end + formatted_dims = format(unformatted_dims, formatvar) + map(dimnames, formatted_dims) do dimname, d + dim_dict[dimname] = d + end + # Finalise output variables and attributes output_layers_vec[i] = var output_attrs_vec[i] = attr end @@ -416,7 +422,7 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na end if is_multidimensional(ds, vars_dict, dimension_coord_names) if is_rotated_longitude_latitude(ds, vars_dict, attrs_dict, dimname, grid_mapping_key) - r = _roated_longitude_latudtude_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) + r = _rotated_longitude_latudtude_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) return r elseif is_unalligned(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, grid_mapping_key) u = _unalligned_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) @@ -436,13 +442,16 @@ end function is_rotated_longitude_latitude(ds, vars_dict, attrs_dict, dimname, grid_mapping_key) - if !isnothing(grid_mapping_key) - grid_mapping = _get_attr!(attrs_dict, vars_dict, ds, grid_mapping_key) - grid_mapping_name = grid_mapping["grid_mapping_name"] - if grid_mapping_name == "rotated_latitude_longitude" && get(dim_attrs, "standard_name", "") in ("grid_latitude", "grid_longitude") - return true - end - end + # isnothing(grid_mapping_key) && return false + # grid_mapping = _get_attr!(attrs_dict, vars_dict, ds, grid_mapping_key) + # grid_mapping_name = get(grid_mapping, "grid_mapping_name", nothing) + # isnothing(grid_mapping_name) && return false + # if grid_mapping_name == "rotated_latitude_longitude" + # dim_attrs = _get_attr!(attrs_dict, vars_dict, ds, dimname) + # if get(dim_attrs, "standard_name", "") in ("grid_latitude", "grid_longitude") + # return true + # end + # end return false end function is_unalligned(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, grid_mapping_key) @@ -558,8 +567,7 @@ end # Must have "standard_name" in dimension attributes function _rotated_longitude_latudtude_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) - lookup = _unaligned_lookup(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) - standard_name = _get_attr!(attrs_dict, vars_dict, ds)["standard_name"] + standard_name = _get_attr!(attrs_dict, vars_dict, ds, dimname)["standard_name"] D = if standard_name == "grid_latitude" Y elseif standard_name == "grid_longitude" @@ -567,6 +575,7 @@ function _rotated_longitude_latudtude_dim(ds, vars_dict, attrs_dict, dimension_c else error("standard_name $standard_name not recognized") end + lookup = _unaligned_lookup(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, D, crs) return D(lookup) end # Must have "degrees_north" and "degrees_east" in coordinate units @@ -594,10 +603,12 @@ function _unaligned_lookup(ds, vars_dict, attrs_dict, dimension_coord_names, dim end dims = Lat(AutoValues(); metadata=lat_metadata), Lon(AutoValues(); metadata=lon_metadata) - data = if haskey(ds, dimname) - _get_attr!(attrs_dict, vars_dict, ds, dimname) + if haskey(ds, dimname) + dim = _cdm_dimtype(ds, dimname)() + data = collect(_get_var!(vars_dict, ds, dimname)) else - Base.OneTo(CDM.dim(ds, dimname)) + dim = AutoDim() + data = Base.OneTo(CDM.dim(ds, dimname)) end if D <: X matrix = collect(_get_var!(vars_dict, ds, lon_key)) @@ -1061,10 +1072,12 @@ function _def_lookup_var!(ds::AbstractDataset, dim::Dimension{<:Union{Sampled,Ab end _cf_crs(ds, grid_mapping_key::Nothing) = nothing -function _cf_crs(ds, grid_mapping_key::String) - # TODO handle multi-crs +function _cf_crs(ds, grid_mapping_key::AbstractString) if occursin(' ', grid_mapping_key) - _split_attribute(grid_mapping_key)[1][1] + crss = _split_cf_attribute(grid_mapping_key) + return map(crss) do (crs, coords) + _cf_crs(ds, crs) => coords + end end haskey(ds, grid_mapping_key) || return nothing grid_mapping_var = CDM.variable(ds, grid_mapping_key) @@ -1086,7 +1099,7 @@ function _cf_crs(ds, grid_mapping_key::String) end end -function _split_attribute(attribute::String) +function _split_cf_attribute(attribute::String) items = split(attribute, ' ') starts = findall(contains(':'), items) stops = append!(starts[2:end] .- 1, length(items)) @@ -1097,7 +1110,7 @@ end function _grid_mapping_keys(grid_mapping_key::String) if occursin(' ', grid_mapping_key) - first.(_split_attribute(grid_mapping_key)) + first.(_split_cf_attribute(grid_mapping_key)) else [grid_mapping_key] end diff --git a/test/cf/5_10_british_national_grid.ncgen b/test/cf/5_10_british_national_grid.ncgen index 2dda1d59f..2e79458f8 100644 --- a/test/cf/5_10_british_national_grid.ncgen +++ b/test/cf/5_10_british_national_grid.ncgen @@ -1,51 +1,58 @@ netcdf 5_10_british_national_grid { dimensions: - z = 100; - y = 100000 ; - x = 100000 ; - variables: - double x(x) ; - x:standard_name = "projection_x_coordinate" ; - x:long_name = "Easting" ; - x:units = "m" ; - double y(y) ; - y:standard_name = "projection_y_coordinate" ; - y:long_name = "Northing" ; - y:units = "m" ; - double z(z) ; - z:standard_name = "height_above_reference_ellipsoid" ; - z:long_name = "height_above_osgb_newlyn_datum_masl" ; - z:units = "m" ; - double lat(y, x) ; - lat:standard_name = "latitude" ; - lat:units = "degrees_north" ; - double lon(y, x) ; - lon:standard_name = "longitude" ; - lon:units = "degrees_east" ; - float temp(z, y, x) ; - temp:standard_name = "air_temperature" ; - temp:units = "K" ; - temp:coordinates = "lat lon" ; - temp:grid_mapping = "crsOSGB: x y crsWGS84: lat lon" ; - float pres(z, y, x) ; - pres:standard_name = "air_pressure" ; - pres:units = "Pa" ; - pres:coordinates = "lat lon" ; - pres:grid_mapping = "crsOSGB: x y crsWGS84: lat lon" ; - int crsOSGB ; - crsOSGB:grid_mapping_name = "transverse_mercator"; - crsOSGB:semi_major_axis = 6377563.396 ; - crsOSGB:inverse_flattening = 299.3249646 ; - crsOSGB:longitude_of_prime_meridian = 0.0 ; - crsOSGB:latitude_of_projection_origin = 49.0 ; - crsOSGB:longitude_of_central_meridian = -2.0 ; - crsOSGB:scale_factor_at_central_meridian = 0.9996012717 ; - crsOSGB:false_easting = 400000.0 ; - crsOSGB:false_northing = -100000.0 ; - crsOSGB:unit = "metre" ; - int crsWGS84 ; - crsWGS84:grid_mapping_name = "latitude_longitude"; - crsWGS84:longitude_of_prime_meridian = 0.0 ; - crsWGS84:semi_major_axis = 6378137.0 ; - crsWGS84:inverse_flattening = 298.257223563 ; + z = 2; + y = 3 ; + x = 2 ; +variables: + double x(x) ; + x:standard_name = "projection_x_coordinate" ; + x:long_name = "Easting" ; + x:units = "m" ; + double y(y) ; + y:standard_name = "projection_y_coordinate" ; + y:long_name = "Northing" ; + y:units = "m" ; + double z(z) ; + z:standard_name = "height_above_reference_ellipsoid" ; + z:long_name = "height_above_osgb_newlyn_datum_masl" ; + z:units = "m" ; + double lat(y, x) ; + lat:standard_name = "latitude" ; + lat:units = "degrees_north" ; + double lon(y, x) ; + lon:standard_name = "longitude" ; + lon:units = "degrees_east" ; + float temp(z, y, x) ; + temp:standard_name = "air_temperature" ; + temp:units = "K" ; + temp:coordinates = "lat lon" ; + temp:grid_mapping = "crsOSGB: x y crsWGS84: lat lon" ; + float pres(z, y, x) ; + pres:standard_name = "air_pressure" ; + pres:units = "Pa" ; + pres:coordinates = "lat lon" ; + pres:grid_mapping = "crsOSGB: x y crsWGS84: lat lon" ; + int crsOSGB ; + crsOSGB:grid_mapping_name = "transverse_mercator"; + crsOSGB:semi_major_axis = 6377563.396 ; + crsOSGB:inverse_flattening = 299.3249646 ; + crsOSGB:longitude_of_prime_meridian = 0.0 ; + crsOSGB:latitude_of_projection_origin = 49.0 ; + crsOSGB:longitude_of_central_meridian = -2.0 ; + crsOSGB:scale_factor_at_central_meridian = 0.9996012717 ; + crsOSGB:false_easting = 400000.0 ; + crsOSGB:false_northing = -100000.0 ; + crsOSGB:unit = "metre" ; + int crsWGS84 ; + crsWGS84:grid_mapping_name = "latitude_longitude"; + crsWGS84:longitude_of_prime_meridian = 0.0 ; + crsWGS84:semi_major_axis = 6378137.0 ; + crsWGS84:inverse_flattening = 298.257223563 ; +data: + z = 100.,200. ; + x = 1.,2. ; + y = 1.,2.,3. ; + lon = 0.09304411541825952,-0.44758423350737075,-0.9882125824330013,0.7267165797621494,0.18608823083651904,-0.35454011808911123 ; + lat = 1.351930628740054,2.177385813188897,3.00284099763774,1.8784060730312653,2.703861257480108,3.529316441928951 ; + temp = 1.,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12. ; } diff --git a/test/cf/5_6_rotated_pole_grid.ncgen b/test/cf/5_6_rotated_pole_grid.ncgen index 9ea510e02..5e84ac2a8 100644 --- a/test/cf/5_6_rotated_pole_grid.ncgen +++ b/test/cf/5_6_rotated_pole_grid.ncgen @@ -1,14 +1,14 @@ netcdf 5_6_rotated_pole_grid { dimensions: - rlon = 128 ; - rlat = 64 ; - lev = 18 ; + rlon = 2 ; + rlat = 3 ; + lev = 2 ; variables: - float T(lev,rlat,rlon) ; - T:long_name = "temperature" ; - T:units = "K" ; - T:coordinates = "lon lat" ; - T:grid_mapping = "rotated_pole" ; + float temp(lev,rlat,rlon) ; + temp:long_name = "temperature" ; + temp:units = "K" ; + temp:coordinates = "lon lat" ; + temp:grid_mapping = "rotated_pole" ; char rotated_pole ; rotated_pole:grid_mapping_name = "rotated_latitude_longitude" ; rotated_pole:grid_north_pole_latitude = 32.5 ; @@ -30,4 +30,11 @@ variables: float lat(rlat,rlon) ; lat:long_name = "latitude" ; lat:units = "degrees_north" ; +data: + lev = 100.,200. ; + rlon = 1.,2. ; + rlat = 1.,2.,3. ; + lon = 0.09304411541825952,-0.44758423350737075,-0.9882125824330013,0.7267165797621494,0.18608823083651904,-0.35454011808911123 ; + lat = 1.351930628740054,2.177385813188897,3.00284099763774,1.8784060730312653,2.703861257480108,3.529316441928951 ; + temp = 1.,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12. ; } diff --git a/test/sources/cf.jl b/test/sources/cf.jl index eec7ca237..208fab9ac 100644 --- a/test/sources/cf.jl +++ b/test/sources/cf.jl @@ -9,7 +9,8 @@ using Rasters: name, bounds using Dates using GeoInterface -# using NCDatasets +# +#using NCDatasets # using NCDatasets.NetCDF_jll # NetCDF_jll.ncdump() do exe @@ -78,7 +79,7 @@ end end @testset "5.1 independent coordinate variables" begin - rast = RasterStack(examples["5.1"]) + rast = RasterStack(examples["5.1"]); @test dims(rast) == (X(NoLookup(1:36)), Y(NoLookup(1:18)), Dim{:pres}(NoLookup(1:15)), Ti(NoLookup(1:4))) @test metadata(rast.xwind)["long_name"] == "zonal wind" @test metadata(rast.xwind)["units"] == "m/s" @@ -86,9 +87,10 @@ end end @testset "5.2 two-dimensional coordinate variables" begin - rast = RasterStack(examples["5.2"]; lazy=true) - # TODO multi-dimensional mapped - @test_broken rast[Lat=1.0, Lon=2.0] isa Float32 + rast = RasterStack(examples["5.2"]; lazy=true); + @test_broken + rast[X=Near(1.0), Y=Near(2.0), Z=1] + isa Float32 end @testset "5.3 reduced horizontal grid" begin @@ -101,23 +103,24 @@ end end @testset "5.6 rotated pole grid" begin - rast = RasterStack(examples["5.6"]; lazy=true) - @test_broken rast[Lat=1.0, Lon=2.0] isa Float32 + rast = RasterStack(examples["5.6"]; lazy=true); + @test rast.temp[X=Near(-0.9), Y=Near(3.0), Z=1] === 3.0f0 + @test rast.temp[X=At(-0.44758424f0), Y=At(2.1773858f0), Z=1] === 2.0f0 end @testset "5.8-9 WGS 84 EPSG" begin rast = RasterStack(examples["5.8"]; lazy=true) @test crs(rast) isa EPSG @test crs(rast) == EPSG(4326) - rast = RasterStack(examples["5.9"]; lazy=true) + rast = RasterStack(examples["5.9"]; lazy=true); @test crs(rast) isa EPSG @test crs(rast) == EPSG(4326) end @testset "5.10 British national grid" begin - rast = RasterStack(examples["5.10"]; lazy=true) - @test_broken !isnothing(crs(rast)) + rast = RasterStack(examples["5.10"]; lazy=true); @test keys(rast) == (:temp, :pres) + @test_broken !isnothing(crs(rast)) end @testset "5.11 WGS 84 WellKnownText2" begin From d207a43eb9d3dac990acbe8579f799043a9dbfd9 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 19 Jul 2025 06:49:13 -0400 Subject: [PATCH 40/43] Merge remote-tracking branch 'origin/main' into cdm_overhaul (#996) --- ext/RastersGRIBDatasetsExt/gribdatasets_source.jl | 5 ++--- src/openstack.jl | 4 ++-- src/stack.jl | 4 ++-- test/sources/gdal.jl | 2 -- test/sources/gribdatasets.jl | 6 ++++++ 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl index b513ae3db..c41f436c0 100644 --- a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl +++ b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl @@ -21,8 +21,7 @@ Base.close(os::RA.OpenStack{GRIBsource}) = nothing RA.missingval(var::GDS.Variable, ::RA.Metadata{<:RA.CDMsource}) = _missingval(var) RA.missingval(var::GDS.Variable, args...) = _missingval(var) -function _missingval(var::GDS.Variable{T}) where T +function _missingval(var::GDS.Variable) mv = GDS.missing_value(var) - T1 = promote_type(typeof(mv), T) - return T1(mv) + RA._fix_missingval(var, mv) end \ No newline at end of file diff --git a/src/openstack.jl b/src/openstack.jl index d8b894e7d..79bb5be32 100644 --- a/src/openstack.jl +++ b/src/openstack.jl @@ -18,8 +18,8 @@ struct OpenStack{X,K,T,DS,M} end OpenStack{X,K,T}(dataset::DS, mods::M) where {X,K,T,DS,M} = OpenStack{X,K,T,DS,M}(dataset, mods) -OpenStack(fs::FileStack{Source,K}) where {K,Source} = - OpenStack{Source,K}(sourceconstructor(Source())(filename(fs)), fs.mods) +OpenStack(fs::FileStack{Source,K,T}) where {Source,K,T} = + OpenStack{Source,K,T}(sourceconstructor(Source())(filename(fs)), fs.mods) dataset(os::OpenStack) = os.dataset diff --git a/src/stack.jl b/src/stack.jl index a7eee1f10..ebc87e3f9 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -526,8 +526,8 @@ function DD.modify(f, s::AbstractRasterStack{<:FileStack{<:Any,K}}) where K end # Open a single file stack -function Base.open(f::Function, st::AbstractRasterStack{K,T,<:Any,<:FileStack{X}}; kw...) where {X,K,T} - ost = OpenStack{X,K,T}(parent(st)) +function Base.open(f::Function, st::AbstractRasterStack{K}; kw...) where K + ost = OpenStack(parent(st)) # TODO is this needed? layers = map(K) do k ost[k] diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 922207491..1b86120a0 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -268,7 +268,6 @@ gdalpath = maybedownload(url) end @testset "aggregate" begin - DiskArrays.allowscalar(true) # TODO remove when this is fixed in DiskArrays ag = aggregate(mean, gdalarray, 4) @test ag == aggregate(mean, gdalarray, (X(4), Y(4))) @test ag == aggregate(mean, lazyarray, 4; filename=tempname() * ".tif") @@ -295,7 +294,6 @@ gdalpath = maybedownload(url) end @test size(Raster(tempfile)) == 2 .* size(gdalarray) end - DiskArrays.allowscalar(false) end @testset "mosaic" begin diff --git a/test/sources/gribdatasets.jl b/test/sources/gribdatasets.jl index 423a0a34c..529fa8d8c 100644 --- a/test/sources/gribdatasets.jl +++ b/test/sources/gribdatasets.jl @@ -164,6 +164,7 @@ end @testset "RasterStack" begin gribstack = RasterStack(era5; lazy=true) + gribstackraw = RasterStack(era5; lazy=true, raw=true) @testset "RasterStack" begin @test all(read(gribstack.z) .=== Raster(era5; name=:z)) end @@ -172,4 +173,9 @@ end @test dims(dsstack) == dims(gribstack) @test size(dsstack) == size(gribstack) end + @testset "open a stack" begin + Rasters.DiskArrays.allowscalar(true) + @test open(first, gribstack) == open(first, gribstackraw) == gribstack[1] + Rasters.DiskArrays.allowscalar(true) + end end \ No newline at end of file From 12596d942645305e329aaf43e9bebdea8ecc2f63 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 19 Jul 2025 06:53:21 -0400 Subject: [PATCH 41/43] fix sources for dev branches --- Project.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index bc09e203c..c10761c74 100644 --- a/Project.toml +++ b/Project.toml @@ -121,5 +121,4 @@ test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", [sources] -DimensionalData = {url = "https://github.com/rafaqz/DimensionalData.jl", rev = "as/individualindexing"} -GeometryOps = {url = "https://github.com/JuliaGeo/GeometryOps.jl", rev = "as/extentforwarding_for_predicates"} +DimensionalData = {url = "https://github.com/rafaqz/DimensionalData.jl", rev = "as/multidimtrait"} From bd89244ee130e10865763858fbe16f811f3b89e8 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 25 Aug 2025 02:24:43 +1000 Subject: [PATCH 42/43] domains working --- Project.toml | 2 +- src/lookup.jl | 6 +- src/sources/commondatamodel.jl | 101 +++++++++------- src/stack.jl | 19 ++- ...ith_independent_coordinate_variables.ncgen | 3 +- ...areas_for_a_sqpherical_geodesic_grid.ncgen | 2 + ...a_domain_with_no_specific_dimensions.ncgen | 2 + ...ain_containing_a_timeseries_geometry.ncgen | 7 ++ test/sources/cf.jl | 112 ++++++++++++++---- 9 files changed, 178 insertions(+), 76 deletions(-) diff --git a/Project.toml b/Project.toml index c10761c74..4b605517c 100644 --- a/Project.toml +++ b/Project.toml @@ -62,7 +62,7 @@ CommonDataModel = "0.2.3, 0.3" ConstructionBase = "1" CoordinateTransformations = "0.6.2" DataFrames = "1" -DimensionalData = "0.29.4" +DimensionalData = "0.30.0" DiskArrays = "0.4" Extents = "0.1" FillArrays = "0.12, 0.13, 1" diff --git a/src/lookup.jl b/src/lookup.jl index 5a43ae16f..9d3f41d17 100644 --- a/src/lookup.jl +++ b/src/lookup.jl @@ -178,11 +178,13 @@ function Mapped(l::Sampled; Mapped(parent(l), order, span, sampling, metadata, crs, mappedcrs, dim, dims, mapdata) end -DD.hasmultipledimensions(lookup::Mapped) = true GeoInterface.crs(lookup::Mapped) = lookup.crs mappedcrs(lookup::Mapped) = lookup.mappedcrs dim(lookup::Mapped) = lookup.dim -DD.dim(lookup::Mapped) = lookup.dims +DD.dims(lookup::Mapped) = lookup.dims + +DD.hasalternatedimensions(lookup::Mapped) = !isnothing(dims(lookup)) && length(dims(lookup)) == 1 +DD.hasmultipledimensions(lookup::Mapped) = !isnothing(dims(lookup)) && length(dims(lookup)) > 1 """ convertlookup(dstlookup::Type{<:Lookup}, x) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index b69f8ed25..f1a7dfc09 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -140,12 +140,12 @@ DiskArrays.haschunks(A::DiskCharToString) = DiskArrays.haschunks(parent(A)) function DiskArrays.readblock!(A::DiskCharToString, dest, I...) src = parent(A)[:, I...] for I in CartesianIndices(dest) - dest[I] = _chars_to_string(view(src, :, I)) + dest[I] = _cf_chars_to_string(view(src, :, I)) end return dest end -_chars_to_string(chars) = String(strip(join(chars), '\0')) +_cf_chars_to_string(chars) = String(strip(join(chars), '\0')) # This allows arbitrary group nesting _getgroup(ds, ::Union{Nothing,NoKW}) = ds @@ -260,20 +260,13 @@ function _layer_names(ds, var_names, vars_dict, attrs_dict) ) end -function _cdm_coordinates(attr) - if haskey(attr, "coordinates") - String.(split(attr["coordinates"], ' ')) - else - String[] - end -end -function _cdm_node_coordinates(attr) - if haskey(attr, "node_coordinates") - String.(split(attr["node_coordinates"], ' ')) - else - String[] - end -end +_cdm_coordinates(attr) = + haskey(attr, "coordinates") ? _split_attribute(attr["coordinates"]) : String[] +_cdm_node_coordinates(attr) = + haskey(attr, "node_coordinates") ? _split_attribute(attr["node_coordinates"]) : String[] + +_split_attribute(attrib) = + isempty(attrib) ? String[] : String.(split(attrib, ' ')) #= Here we convert the surface of the CF standard that we support into DimensionalData.jl objects @@ -291,7 +284,7 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) geometry_dict = Dict{String,Any}() crs_dict = Dict{String,Any}() # Define a dimensions dict so these are only generated once - dim_dict = Dict{String,Dimension}() + dim_dict = OrderedDict{String,Dimension}() # Refdims need a consistent order as it is not changed later refdim_dict = OrderedDict{String,Dimension}() # Get the layer names to target @@ -302,6 +295,7 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) output_layers_vec = Vector{AbstractArray}(undef, length(layer_names)) output_layerdims_vec = Vector{Tuple}(undef, length(layer_names)) output_attrs_vec = Vector{Any}(undef, length(layer_names)) + used_layers = trues(length(layer_names)) # Loop over the layers we want to load as rasters # As we go, we add dimensions to dim_dict to be used in other layers @@ -309,8 +303,17 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) # Get the variable and its attributes var = get(() -> CDM.variable(ds, var_name), vars_dict, var_name) attr = get(() -> CDM.attribs(var), attrs_dict, var_name) - # Get its dimensions - layer_dimnames_vec[i] = dimnames = CDM.dimnames(var) + isdomain = false + if eltype(var) <: Char && ndims(var) == 0 + if haskey(attr, "dimensions") + layer_dimnames_vec[i] = dimnames = Tuple(_split_attribute(attr["dimensions"])) + # Remove this entry from layer vecs + isdomain = true + end + else + # Get its dimensions + layer_dimnames_vec[i] = dimnames = CDM.dimnames(var) + end # Get its coordinates coord_names = _cdm_coordinates(attr) # Remove coordinates from metadata @@ -333,7 +336,7 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) layer_dimnames_vec[i] = dimnames = dimnames[2:end] end # Loop over the dimensions of this layer, adding missing dims to dim_dict - output_layerdims_vec[i] = layerdims = map(dimnames) do dimname + layerdims = map(dimnames) do dimname get!(dim_dict, dimname) do _cdm_dim( ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, coord_names, dimname, crs, grid_mapping_key @@ -358,25 +361,31 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) unformatted_dims = map(dimnames) do dimname dim_dict[dimname] end - formatvar = if eltype(var) <: Char && length(layerdims) == (ndims(var) - 1) - DiskCharToString(var) + if isdomain + formatted_dims = format(unformatted_dims) + used_layers[i] = false else - var + output_layerdims_vec[i] = layerdims + formatvar = if eltype(var) <: Char && length(layerdims) == (ndims(var) - 1) + DiskCharToString(var) + else + var + end + formatted_dims = format(unformatted_dims, formatvar) + # Finalise output variables and attributes + output_layers_vec[i] = var + output_attrs_vec[i] = attr end - formatted_dims = format(unformatted_dims, formatvar) map(dimnames, formatted_dims) do dimname, d dim_dict[dimname] = d end - # Finalise output variables and attributes - output_layers_vec[i] = var - output_attrs_vec[i] = attr end return (; - names_vec=layer_names, - layers_vec=output_layers_vec, - layerdims_vec=output_layerdims_vec, - layermetadata_vec=output_attrs_vec, + names_vec=layer_names[used_layers], + layers_vec=output_layers_vec[used_layers], + layerdims_vec=output_layerdims_vec[used_layers], + layermetadata_vec=output_attrs_vec[used_layers], dim_dict, refdim_dict, ) @@ -431,6 +440,12 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na return _standard_dim(ds, vars_dict, attrs_dict, dimname, crs) end else + if haskey(ds, dimname) + # This is something we're not handling properly yet, try to warn + dim_attrs = _get_attr!(attrs_dict, vars_dict, ds, dimname) + _check_formula_terms(dim_attrs) + end + # Only one coordinate, so just use it as the dimension/lookup return _merged_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) end end @@ -440,6 +455,12 @@ function _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_na # subdim = _cdm_dim(ds, vars_dict, attrs_dict, geometry_dict, geometry_key, var_name, remaining_coord_names, dimname, crs) end +function _check_formula_terms(dim_attrs) + haskey(dim_attrs, "formula_terms") && @warn """ + formula_terms are not yet recognised or used by Rasters.jl. + This may change at any time in future without a breaking version." + """ +end function is_rotated_longitude_latitude(ds, vars_dict, attrs_dict, dimname, grid_mapping_key) # isnothing(grid_mapping_key) && return false @@ -486,6 +507,7 @@ end # Lookup/Dimension generation function _standard_dim(ds, vars_dict, attrs_dict, dimname, crs) dim_attrs = _get_attr!(attrs_dict, vars_dict, ds, dimname) + _check_formula_terms(dim_attrs) if is_climatology(dim_attrs) return _climatology_dim(ds, vars_dict, attrs_dict, dim_attrs, dimname) else @@ -610,13 +632,8 @@ function _unaligned_lookup(ds, vars_dict, attrs_dict, dimension_coord_names, dim dim = AutoDim() data = Base.OneTo(CDM.dim(ds, dimname)) end - if D <: X - matrix = collect(_get_var!(vars_dict, ds, lon_key)) - return ArrayLookup(matrix; data, dims) - elseif D == Y - matrix = collect(_get_var!(vars_dict, ds, lat_key)) - return ArrayLookup(matrix; data, dims) - end + matrix = collect(_get_var!(vars_dict, ds, lat_key)) + return ArrayLookup(matrix; data, dims) end function _merged_dim(ds, vars_dict, attrs_dict, dimension_coord_names, dimname, crs) @@ -678,10 +695,12 @@ function _cdm_lookup(ds::AbstractDataset, attr, dimname, D::Type, crs) metadata = _metadatadict(sourcetrait(ds), attr) _cdm_lookup(data, ds, var, attr, dimname, D, metadata, crs) end -# For unknown types we just make a Categorical lookup -function _cdm_lookup(data::AbstractArray, ds::AbstractDataset, var, attr, dimname, D::Type, metadata, crs) +function _cdm_lookup(data::AbstractVector{<:Union{AbstractString,Char}}, ds::AbstractDataset, var, attr, dimname, D::Type, metadata, crs) Categorical(data; order=Unordered(), metadata=metadata) end +function _cdm_lookup(data::AbstractMatrix{Char}, ds::AbstractDataset, var, attr, dimname, D::Type, metadata, crs) + Categorical(_cf_chars_to_string.(eachslice(data; dims=2)); order=Unordered(), metadata=metadata) +end # For Number and AbstractTime we generate order/span/sampling # We need to include `Missing` in unions in case `_FillValue` is used # on coordinate variables in a file and propagates here. @@ -1100,7 +1119,7 @@ function _cf_crs(ds, grid_mapping_key::AbstractString) end function _split_cf_attribute(attribute::String) - items = split(attribute, ' ') + items = _split_attribute(attribute) starts = findall(contains(':'), items) stops = append!(starts[2:end] .- 1, length(items)) map(starts, stops) do s, p diff --git a/src/stack.jl b/src/stack.jl index ebc87e3f9..bd736d049 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -45,6 +45,7 @@ _singlemissingval(mvs::NamedTuple, name) = mvs[name] _singlemissingval(mv, name) = mv function _maybe_collapse_missingval(mvs::NamedTuple) + isempty(mvs) && return nothing mv1, mvs_rest = Iterators.peel(mvs) for mv in mvs_rest mv === mv1 || return mvs @@ -297,9 +298,15 @@ function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{AbstractDimArray}}}; kw... ) where K data = map(parent, _layers) - st = RasterStack(data; - dims, refdims, layerdims, metadata, layermetadata, missingval - ) + st = if isempty(data) + # Use the main constructor, there is nothing left to do. + RasterStack(data, dims, refdims, layerdims, metadata, layermetadata, missingval) + else + # Use the NamedTuple of data constructor + RasterStack(data; + dims, refdims, layerdims, metadata, layermetadata, missingval + ) + end return _postprocess_stack(st, dims; kw...) end # Stack from table and dims args @@ -615,10 +622,12 @@ function _sort_by_layerdims(dims, layerdims::Vector) currentorder = _merge_dimorder(ldims, currentorder) end end + dims_tuple = Tuple(dims) if isnothing(currentorder) - return () + return dims_tuple else - return DD.dims(Tuple(dims), currentorder) + used_dims = DD.dims(dims_tuple, currentorder) + return (used_dims..., otherdims(dims_tuple, used_dims)...) end end diff --git a/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen b/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen index 8f299e62a..fcecd0dc0 100644 --- a/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen +++ b/test/cf/5_15_domain_with_independent_coordinate_variables.ncgen @@ -4,7 +4,6 @@ dimensions: lon = 36 ; pres = 15 ; time = 4 ; - variables: char domain ; domain:dimensions = "time pres lat lon" ; @@ -21,4 +20,6 @@ variables: double time(time) ; time:long_name = "time" ; time:units = "days since 1990-1-1 0:0:0" ; +data: + time = 1, 2, 3, 4 ; } diff --git a/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen b/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen index ddd3cef23..0935f6327 100644 --- a/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen +++ b/test/cf/5_17_a_domain_containing_cell_areas_for_a_sqpherical_geodesic_grid.ncgen @@ -26,4 +26,6 @@ variables: cell_area:units = "m2" ; float lon_vertices(cell, nv) ; float lat_vertices(cell, nv) ; +data: + time = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ; } diff --git a/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen b/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen index 902f2abdf..40ba04bf2 100644 --- a/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen +++ b/test/cf/5_18_a_domain_with_no_specific_dimensions.ncgen @@ -9,4 +9,6 @@ variables: double t ; t:standard_name = "time" ; t:units = "days since 2021-01-01" ; +data: + t = 0.5 ; } diff --git a/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen b/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen index 7b4c1e0d3..3e867ff81 100644 --- a/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen +++ b/test/cf/5_19_a_domain_containing_a_timeseries_geometry.ncgen @@ -37,4 +37,11 @@ variables: y:units = "degrees_north" ; y:standard_name = "latitude" ; y:axis = "Y" ; +data: + time = 1, 2, 3, 4 ; + lat = 30, 50 ; + lon = 10, 60 ; + node_count = 3, 2 ; + x = 30, 10, 40, 50, 50 ; + y = 10, 30, 40, 60, 50 ; } diff --git a/test/sources/cf.jl b/test/sources/cf.jl index 208fab9ac..b5bc8cd39 100644 --- a/test/sources/cf.jl +++ b/test/sources/cf.jl @@ -56,10 +56,6 @@ end |> OrderedDict @testset "2.1 string variable representation" begin rast = RasterStack(examples["2.1"]) @test rast.char_variable == rast.str_variable - # Also works lazily - @test read(RasterStack(examples["2.1"]; lazy=true).char_variable) == - read(RasterStack(examples["2.1"]; lazy=true)).char_variable == - rast.str_variable # Fails due to DiskArrays.jl size checks during the comparison iteration @test_broken RasterStack(examples["2.1"]; lazy=true).char_variable == rast.str_variable end @@ -80,17 +76,27 @@ end @testset "5.1 independent coordinate variables" begin rast = RasterStack(examples["5.1"]); - @test dims(rast) == (X(NoLookup(1:36)), Y(NoLookup(1:18)), Dim{:pres}(NoLookup(1:15)), Ti(NoLookup(1:4))) + testdims = (X(Sampled(1.0f0:36.0f0)), + Y(Sampled(1.0f0:18.0f0)), + Dim{:pres}(Sampled(1.0f0:15.0f0)), + Ti(Sampled(collect(DateTime(1990):Day(1):DateTime(1990, 1, 4))))) + @test all(map(==, dims(rast), map(DimensionalData.format, testdims))) @test metadata(rast.xwind)["long_name"] == "zonal wind" @test metadata(rast.xwind)["units"] == "m/s" - # TODO add real vars to the file so there is also dimension metadata + @test metadata(dims(rast, :pres))["long_name"] == "pressure" + @test metadata(dims(rast, :pres))["units"] == "hPa" + @test metadata(dims(rast, X))["long_name"] == "longitude" + @test metadata(dims(rast, X))["units"] == "degrees_east" + @test metadata(dims(rast, Y))["long_name"] == "latitude" + @test metadata(dims(rast, Y))["units"] == "degrees_north" + @test metadata(dims(rast, Ti))["long_name"] == "time" + # This one is kind of redundant, we have converted to DateTime already + @test metadata(dims(rast, Ti))["units"] == "days since 1990-1-1 0:0:0" ; end @testset "5.2 two-dimensional coordinate variables" begin - rast = RasterStack(examples["5.2"]; lazy=true); - @test_broken - rast[X=Near(1.0), Y=Near(2.0), Z=1] - isa Float32 + rast = RasterStack(examples["5.2"]; lazy=true) + @test rast[X=Near(1.0), Y=Near(2.0), Z=1].T isa Float32 end @testset "5.3 reduced horizontal grid" begin @@ -103,16 +109,20 @@ end end @testset "5.6 rotated pole grid" begin - rast = RasterStack(examples["5.6"]; lazy=true); - @test rast.temp[X=Near(-0.9), Y=Near(3.0), Z=1] === 3.0f0 - @test rast.temp[X=At(-0.44758424f0), Y=At(2.1773858f0), Z=1] === 2.0f0 + rast = RasterStack(examples["5.6"]; lazy=true) + @test lookup(rast, :rlat) isa ArrayLookup + @test lookup(rast, :rlon) isa ArrayLookup + @test lookup(rast, Z) == DimensionalData.format(Sampled([100.0f0, 200.0f0]; span=Regular(100.0f0))) + @test_broken rast.temp[X=Near(-0.9), Y=Near(3.0), Z=1] === 3.0f0 + @test_broken rast.temp[X=At(-0.44758424f0), Y=At(2.1773858f0), Z=1] === 2.0f0 end @testset "5.8-9 WGS 84 EPSG" begin + # TODO this is wrong rast = RasterStack(examples["5.8"]; lazy=true) @test crs(rast) isa EPSG @test crs(rast) == EPSG(4326) - rast = RasterStack(examples["5.9"]; lazy=true); + rast = RasterStack(examples["5.9"]; lazy=true) @test crs(rast) isa EPSG @test crs(rast) == EPSG(4326) end @@ -120,28 +130,67 @@ end @testset "5.10 British national grid" begin rast = RasterStack(examples["5.10"]; lazy=true); @test keys(rast) == (:temp, :pres) + # Need CFCRS.jl for this @test_broken !isnothing(crs(rast)) end @testset "5.11 WGS 84 WellKnownText2" begin rast = RasterStack(examples["5.11"]; lazy=true) + # TODO extent is broken + # @test extent(rast) @test crs(rast) isa WellKnownText2 end @testset "5.14 refdims from scalar coordinates" begin - rast = RasterStack(examples["5.14"]; lazy=true) - @test name(refdims(rast)) == (:atime, :p500) + rast = RasterStack(examples["5.14"]; lazy=true); + testdims = (Dim{:atime}([0.0]; span=Regular(0.0)), Dim{:p500}([500.0]; span=Regular(0.0))) + @test all(map(==, refdims(rast), map(DimensionalData.format, testdims))) end @testset "domains" begin - RasterStack(examples["5.15"]; lazy=true) - RasterStack(examples["5.16"]; lazy=true) + # TODO: load the dimensions of the domain, + # even though there are no layers that use them? + # Its a kind of weird RasterStack to have dims + # with no layers? but maybe it works? + @testset "5.15 domain dimension" begin + rast = RasterStack(examples["5.15"]; lazy=true) + @test dims(rast) isa Tuple{<:Ti{<:Sampled{<:DateTime}},<:Dim{:pres},<:Y,<:X} + @test layers(rast) == (;) + @test refdims(rast) == () + end + @testset "5.16 domain dimension" begin + rast = RasterStack(examples["5.16"]; lazy=true) + @test dims(rast) isa Tuple{<:Z,<:Dim{:rlat},<:Dim{:rlon}} + @test layers(rast) == (;) + @test refdims(rast) isa Tuple{<:Ti} + end @testset "5.17 domain defines dimension and coords" begin - RasterStack(examples["5.17"]; lazy=true) + # TODO : where to put area data? It could be useful + # but currently has no formal location, so its just available + # to the user, rather than to the `Rasters.cellarea` function + # TODO and do we make polygons out of the vertices so this is a GeometryLookup? + # Needs actual data in the file to test this + rast = RasterStack(examples["5.17"]; lazy=true) + span(dims(dims(rast, :cell), X)) + end + @testset "5.17 domain with no layers or dimensions, but single coordinates, so refdims" begin + rast = RasterStack(examples["5.18"]; lazy=true) + @test dims(rast) == () + @test refdims(rast) == (DimensionalData.format(Ti(Sampled([0.5]; span=Regular(0.0)))),) + end + @testset "5.17 domain with timeseries geometries" begin + rast = RasterStack(examples["5.19"]; lazy=true) + @test lookup(rast) isa Tuple{<:GeometryLookup,Sampled} + @test dims(rast) isa Tuple{<:Geometry,Ti} + @test length(lookup(rast, Geometry)) == 2 + @test lookup(rast, Geometry)[1] == GeoInterface.LineString{false, false}([(30.0, 10.0), (10.0, 30.0), (40.0, 40.0)]) end - RasterStack(examples["5.18"]; lazy=true) - RasterStack(examples["5.19"]; lazy=true) - RasterStack(examples["5.20"]; lazy=true) +end + +@testset "5.20 indexed ragged array " begin + # TODO ragged array lookup + rast = RasterStack(examples["5.20"]; lazy=true) + dims(rast, :station) end @testset "taxon name/id" begin @@ -173,17 +222,28 @@ end end @testset "7.2 non-aligned horizontal grid" begin + # TODO: load bounds as squarish polygons rast = RasterStack(examples["7.2"]; lazy=true) end -@testset "7.3-4 formula terms" begin - # These are not implemented - RasterStack(examples["7.3"]; lazy=true) +@testset "7.3 formula terms" begin + # These are not implemented, but they load ok and warn + @test_warn "formula_terms" RasterStack(examples["7.3"]; lazy=true) +end + +@testset "7.3 cell areas" begin + # TODO load bounds as polygons + # Not sure if we should attach the area to the dimension or its fine as a variable? RasterStack(examples["7.4"]; lazy=true) end @testset "7.5 methods applied to a timeseries" begin rast = RasterStack(examples["7.5"]; lazy=true) + # TODO: unfortunately mixed points and intervals + # on the same axis is hard to represent in DD without a DimTree + @test metadata(rast.ppn)["cell_methods"] == "time: sum" + @test metadata(rast.pressure)["cell_methods"] == "time: point" + @test metadata(rast.maxtemp)["cell_methods"] == "time: maximum" end @testset "7.6 spacing of data" begin @@ -248,7 +308,7 @@ end @test dims(rast, :Geometry)[1] isa GeoInterface.Wrappers.LineString @test GeoInterface.getpoint(dims(rast, :Geometry)[1]) == [(30.0, 10.0), (10.0, 30.0), (40.0, 40.0)] end - @testset "7.16 polygon" begin + @testset "7.16 polygons with holes" begin rast = RasterStack(examples["7.16"]) multipoly = dims(rast, :Geometry)[1] @test multipoly isa GeoInterface.MultiPolygon From 59d3669066ce59a6bcec408baeccc1f532f89e77 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 1 Sep 2025 14:09:44 +1000 Subject: [PATCH 43/43] fix lazy Char string reading and other minor things --- src/filearray.jl | 13 +++++++++++-- src/geometry_lookup/geometry_lookup.jl | 17 +++++++++++------ src/sources/commondatamodel.jl | 7 ++++--- test/sources/cf.jl | 10 +++++----- 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/filearray.jl b/src/filearray.jl index 8f02e2af7..661864689 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -80,7 +80,12 @@ end function DA.readblock!(A::FileArray, dst, r::AbstractUnitRange...) open(A) do O if isdisk(O) - DA.readblock!(O, dst, r...) + # Handle CF 2d Char arrays that are really 1d strings + if eltype(O) <: Char && eltype(A) <: String + DA.readblock!(DiskCharToString(O), dst, r...) + else + DA.readblock!(O, dst, r...) + end else dest[r...] .= view(parent(O), r...) end @@ -89,7 +94,11 @@ end function DA.writeblock!(A::FileArray, src, r::AbstractUnitRange...) open(A; write=A.write) do O if isdisk(A) - DA.writeblock!(O, src, r...) + if eltype(O) <: Char && eltype(A) <: String + DA.writeblock!(DiskCharToString(O), src, r...) + else + DA.writeblock!(O, src, r...) + end else parent(O)[r...] .= src end diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index 3a2b9b09e..5e41bfd5c 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -190,10 +190,15 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) end end function Lookups.selectindices(lookup::GeometryLookup, sel::At) - if GI.trait(val(sel)) isa GI.PointTrait - Lookups.selectindices(lookup, (At(GI.x(val(sel))), At(GI.y(val(sel))))) - else # invoke the default method - Lookups.at(lookup, sel) + @assert GI.isgeometry(geom) + candidates = _maybe_get_candidates(lookup, GI.extent(val(sel))) + x = findfirst(candiates) do candidate + GO.equal(val(at), candidate) + end + if isnothing(x) + throw(ArgumentError("$sel not found in lookup")) + else + return x end end function Lookups.selectindices(lookup::GeometryLookup, sel::Near) @@ -242,9 +247,9 @@ function Lookups.selectindices( end function Lookups.selectindices( lookup::GeometryLookup, - (xs, ys)::Tuple{Union{<:At,<:Contains}, Union{<:At,<:Contains}} + (x, y)::Tuple{Union{<:At,<:Contains}, Union{<:At,<:Contains}} ) - xval, yval = val(xs), val(ys) + xval, yval = val(x), val(y) lookup_ext = Lookups.bounds(lookup) if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index f1a7dfc09..e32b32642 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -137,6 +137,7 @@ DiskCharToString(A::AbstractArray{Char,M}) where M = DiskCharToString{M-1,M}(A) Base.parent(A::DiskCharToString) = A.parent Base.size(A::DiskCharToString) = size(parent(A))[2:end] DiskArrays.haschunks(A::DiskCharToString) = DiskArrays.haschunks(parent(A)) +DiskArrays.eachchunk(A::DiskCharToString) = DiskArrays.GridChunks(eachchunk(parent(A)).chunks[2:end]) function DiskArrays.readblock!(A::DiskCharToString, dest, I...) src = parent(A)[:, I...] for I in CartesianIndices(dest) @@ -366,14 +367,14 @@ function _organise_dataset(ds::AbstractDataset, names=nokw, group::NoKW=nokw) used_layers[i] = false else output_layerdims_vec[i] = layerdims - formatvar = if eltype(var) <: Char && length(layerdims) == (ndims(var) - 1) + maybewrappedvar = if eltype(var) <: Char && length(layerdims) == (ndims(var) - 1) DiskCharToString(var) else var end - formatted_dims = format(unformatted_dims, formatvar) + formatted_dims = format(unformatted_dims, maybewrappedvar) # Finalise output variables and attributes - output_layers_vec[i] = var + output_layers_vec[i] = maybewrappedvar output_attrs_vec[i] = attr end map(dimnames, formatted_dims) do dimname, d diff --git a/test/sources/cf.jl b/test/sources/cf.jl index b5bc8cd39..5a1c77e5b 100644 --- a/test/sources/cf.jl +++ b/test/sources/cf.jl @@ -56,8 +56,7 @@ end |> OrderedDict @testset "2.1 string variable representation" begin rast = RasterStack(examples["2.1"]) @test rast.char_variable == rast.str_variable - # Fails due to DiskArrays.jl size checks during the comparison iteration - @test_broken RasterStack(examples["2.1"]; lazy=true).char_variable == rast.str_variable + @test RasterStack(examples["2.1"]; lazy=true).char_variable == rast.str_variable end @testset "3.1 units metadata" begin @@ -96,6 +95,7 @@ end @testset "5.2 two-dimensional coordinate variables" begin rast = RasterStack(examples["5.2"]; lazy=true) + # TODO probably should have real data for this @test rast[X=Near(1.0), Y=Near(2.0), Z=1].T isa Float32 end @@ -103,9 +103,9 @@ end rast = RasterStack(examples["5.3"]; lazy=true) @test rast[rgrid=2].PS === 0.2f0 @test rast[rgrid=At(3.0, 30.0)].PS === 0.3f0 - @test_broken rast[X=At(4.0)].PS == [0.4] - @test_broken rast[Y=At(30.0)].PS == [0.3] - @test_broken rast[X=At(1.0), Y=At(10.0)] == 0.1 + @test rast[X=At(4.0)].PS == Union{Missing,Float32}[0.4f0] + @test rast[Y=At(30.0)].PS == Union{Missing,Float32}[0.3f0] + @test rast[X=At(1.0), Y=At(10.0)] == (PS=0.1f0,) end @testset "5.6 rotated pole grid" begin