Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
68d5c73
Support NETCDF4_CLASSIC in the h5engine backend
huard Sep 2, 2025
120af07
convert bytes attributes to numpy.bytes_ in NETCDF4_CLASSIC format wi…
huard Sep 2, 2025
f8f44f0
added test to confirm string attributes are stored as numpy char arra…
huard Sep 2, 2025
522d37d
Added change to whats-new
huard Sep 3, 2025
783c407
run pre-commit
huard Sep 3, 2025
2f1c781
Added test comparing CDL representation of test data written with net…
huard Sep 3, 2025
b142d38
Added global attribute to test data. Apply CLASSIC conversion to vari…
huard Sep 3, 2025
b14c373
Merge branch 'main' into fix_10676
huard Sep 3, 2025
909a96c
Use h5dump to compare file content instead of CDL. Add _nc3_strict at…
huard Sep 3, 2025
14d22a1
Merge branch 'fix_10676' of github.com:Ouranosinc/xarray into fix_10676
huard Sep 3, 2025
35b50ce
raise error if writing groups to CLASSIC file.
huard Sep 3, 2025
a187c8d
remove h5dump test. Remove _nc3_strict attribute (should go into h5ne…
huard Sep 5, 2025
0c1bc08
fix h5netcdf version check.
huard Sep 8, 2025
10707ee
Merge branch 'main' into fix_10676
huard Sep 8, 2025
03ea2de
try to fix tests
huard Sep 8, 2025
592b98b
Set default format to NETCDF4 instead of None, because passing None t…
huard Sep 8, 2025
13b60d0
Apply suggestions from code review
huard Sep 9, 2025
cf8b4be
Suggestions from review.
huard Sep 9, 2025
d0b0948
Merge branch 'fix_10676' of github.com:Ouranosinc/xarray into fix_10676
huard Sep 9, 2025
6351e66
Merge branch 'main' into fix_10676
huard Sep 22, 2025
b2a21d2
Raise an error within `get_child_store` rather that `__init__` with f…
huard Sep 22, 2025
50dbd36
Merge branch 'main' into fix_10676
huard Sep 23, 2025
6d686d4
Merge branch 'main' into fix_10676
huard Oct 2, 2025
18e4bd0
Merge branch 'main' into fix_10676
kmuehlbauer Oct 14, 2025
d9c4879
Remove casting of bytes to np._bytes_. Require h5netcdf 1.7.0 to save…
huard Oct 14, 2025
99c90a5
Merge branch 'main' into fix_10676
kmuehlbauer Oct 15, 2025
28bd354
Apply suggestions from code review
huard Oct 15, 2025
f2a29ca
Update xarray/backends/h5netcdf_.py
huard Oct 15, 2025
f1c1819
Update xarray/backends/h5netcdf_.py
huard Oct 15, 2025
88d0841
Merge branch 'main' into fix_10676
kmuehlbauer Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ New Features
- ``compute=False`` is now supported by :py:meth:`DataTree.to_netcdf` and
:py:meth:`DataTree.to_zarr`.
By `Stephan Hoyer <https://github.com/shoyer>`_.
- The ``h5netcdf`` engine has support for pseudo ``NETCDF4_CLASSIC`` files, meaning variables and attributes are cast to supported types. Note that the saved files won't be recognized as genuine ``NETCDF4_CLASSIC`` files until ``h5netcdf`` adds support. (:issue:`10676`, :pull:`10686`).
By `David Huard <https://github.com/huard>`_.
- ``open_dataset`` will now correctly infer a path ending in ``.zarr/`` as zarr
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.

Expand Down
49 changes: 39 additions & 10 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import TYPE_CHECKING, Any, Self

import numpy as np
from packaging.version import Version

from xarray.backends.common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -27,6 +28,7 @@
PickleableFileManager,
)
from xarray.backends.locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock
from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable
from xarray.backends.netCDF4_ import (
BaseNetCDF4Array,
_build_and_get_enum,
Expand Down Expand Up @@ -124,6 +126,7 @@ def __init__(
manager: FileManager | h5netcdf.File | h5netcdf.Group,
group=None,
mode=None,
format="NETCDF4",
lock=HDF5_LOCK,
autoclose=False,
):
Expand All @@ -143,7 +146,7 @@ def __init__(
self._manager = manager
self._group = group
self._mode = mode
self.format = None
self.format = format or "NETCDF4"
# todo: utilizing find_root_and_group seems a bit clunky
# making filename available on h5netcdf.Group seems better
self._filename = find_root_and_group(self.ds)[0].filename
Expand All @@ -152,6 +155,9 @@ def __init__(
self.autoclose = autoclose

def get_child_store(self, group: str) -> Self:
if self.format == "NETCDF4_CLASSIC":
raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.")

if self._group is not None:
group = os.path.join(self._group, group)
return type(self)(
Expand All @@ -167,7 +173,7 @@ def open(
cls,
filename,
mode="r",
format=None,
format="NETCDF4",
group=None,
lock=None,
autoclose=False,
Expand Down Expand Up @@ -198,8 +204,8 @@ def open(
f"{magic_number!r} is not the signature of a valid netCDF4 file"
)

if format not in [None, "NETCDF4"]:
raise ValueError("invalid format for h5netcdf backend")
if format not in [None, "NETCDF4", "NETCDF4_CLASSIC"]:
raise ValueError(f"invalid format for h5netcdf backend: {format}")

kwargs = {
"invalid_netcdf": invalid_netcdf,
Expand All @@ -210,6 +216,12 @@ def open(
kwargs.update(driver_kwds)
if phony_dims is not None:
kwargs["phony_dims"] = phony_dims
if Version(h5netcdf.__version__) > Version("1.6.4"):
kwargs["format"] = format
elif format == "NETCDF4_CLASSIC":
raise ValueError(
"h5netcdf >= 1.7.0 is required to save output in NETCDF4_CLASSIC format."
)

if lock is None:
if mode == "r":
Expand All @@ -223,7 +235,15 @@ def open(
else PickleableFileManager
)
manager = manager_cls(h5netcdf.File, filename, mode=mode, kwargs=kwargs)
return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)

return cls(
manager,
group=group,
format=format,
mode=mode,
lock=lock,
autoclose=autoclose,
)

def _acquire(self, needs_lock=True):
with self._manager.acquire_context(needs_lock) as root:
Expand Down Expand Up @@ -320,10 +340,15 @@ def set_dimension(self, name, length, is_unlimited=False):
self.ds.dimensions[name] = length

def set_attribute(self, key, value):
if self.format == "NETCDF4_CLASSIC":
value = encode_nc3_attr_value(value)
self.ds.attrs[key] = value

def encode_variable(self, variable, name=None):
return _encode_nc4_variable(variable, name=name)
if self.format == "NETCDF4_CLASSIC":
return encode_nc3_variable(variable, name=name)
else:
return _encode_nc4_variable(variable, name=name)

def prepare_variable(
self, name, variable, check_encoding=False, unlimited_dims=None
Expand All @@ -332,7 +357,9 @@ def prepare_variable(

_ensure_no_forward_slash_in_name(name)
attrs = variable.attrs.copy()
dtype = _get_datatype(variable, raise_on_invalid_encoding=check_encoding)
dtype = _get_datatype(
variable, nc_format=self.format, raise_on_invalid_encoding=check_encoding
)

fillvalue = attrs.pop("_FillValue", None)

Expand Down Expand Up @@ -394,6 +421,8 @@ def prepare_variable(
nc4_var = self.ds[name]

for k, v in attrs.items():
if self.format == "NETCDF4_CLASSIC":
v = encode_nc3_attr_value(v)
nc4_var.attrs[k] = v

target = H5NetCDFArrayWrapper(name, self)
Expand Down Expand Up @@ -484,7 +513,7 @@ def open_dataset(
drop_variables: str | Iterable[str] | None = None,
use_cftime=None,
decode_timedelta=None,
format=None,
format="NETCDF4",
group=None,
lock=None,
invalid_netcdf=None,
Expand Down Expand Up @@ -544,7 +573,7 @@ def open_datatree(
drop_variables: str | Iterable[str] | None = None,
use_cftime=None,
decode_timedelta=None,
format=None,
format="NETCDF4",
group: str | None = None,
lock=None,
invalid_netcdf=None,
Expand Down Expand Up @@ -587,7 +616,7 @@ def open_groups_as_dict(
drop_variables: str | Iterable[str] | None = None,
use_cftime=None,
decode_timedelta=None,
format=None,
format="NETCDF4",
group: str | None = None,
lock=None,
invalid_netcdf=None,
Expand Down
4 changes: 4 additions & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ def _importorskip_h5netcdf_ros3(has_h5netcdf: bool):
"h5netcdf", "1.4.0.dev"
)

has_h5netcdf_1_7_0_or_above, requires_h5netcdf_1_7_0_or_above = _importorskip(
"h5netcdf", "1.7.0.dev"
)

has_netCDF4_1_7_0_or_above, requires_netCDF4_1_7_0_or_above = _importorskip(
"netCDF4", "1.7.0"
)
Expand Down
50 changes: 50 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
requires_fsspec,
requires_h5netcdf,
requires_h5netcdf_1_4_0_or_above,
requires_h5netcdf_1_7_0_or_above,
requires_h5netcdf_or_netCDF4,
requires_h5netcdf_ros3,
requires_iris,
Expand Down Expand Up @@ -461,6 +462,7 @@ def roundtrip(
save_kwargs = {}
if open_kwargs is None:
open_kwargs = {}

with create_tmp_file(allow_cleanup_failure=allow_cleanup_failure) as path:
self.save(data, path, **save_kwargs)
with self.open(path, **open_kwargs) as ds:
Expand Down Expand Up @@ -4736,6 +4738,54 @@ def create_store(self):
) as store:
yield store

@requires_h5netcdf
def test_string_attributes_stored_as_char(self, tmp_path):
import h5netcdf

original = Dataset(attrs={"foo": "bar"})
store_path = tmp_path / "tmp.nc"
original.to_netcdf(store_path, engine=self.engine, format=self.file_format)
with h5netcdf.File(store_path, "r") as ds:
# Check that the attribute is stored as a char array
assert ds._h5file.attrs["foo"].dtype == np.dtype("S3")
Comment on lines +4742 to +4750
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NumPy's S dtype actually corresponds to bytes, not str. I don't think we want to use it for storing attributes in general.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using fixed width chars replicates the behavior of the netCDF4 backend for the CLASSIC format. Again, this has to do with the NC_CHAR vs NC_STRING formats.

Sticking as close as possible to netCDF4 output increases my confidence that the h5netcdf outputs will be compatible with 3rd party software expecting the CLASSIC format.



@requires_h5netcdf_1_7_0_or_above
class TestNetCDF4ClassicViaH5NetCDFData(TestNetCDF4ClassicViaNetCDF4Data):
engine: T_NetcdfEngine = "h5netcdf"
file_format: T_NetcdfTypes = "NETCDF4_CLASSIC"

@contextlib.contextmanager
def create_store(self):
with create_tmp_file() as tmp_file:
with backends.H5NetCDFStore.open(
tmp_file, mode="w", format="NETCDF4_CLASSIC"
) as store:
yield store

@requires_netCDF4
def test_cross_engine_read_write_netcdf4(self) -> None:
# Drop dim3, because its labels include strings. These appear to be
# not properly read with python-netCDF4, which converts them into
# unicode instead of leaving them as bytes.
data = create_test_data().drop_vars("dim3")
data.attrs["foo"] = "bar"
valid_engines: list[T_NetcdfEngine] = ["netcdf4", "h5netcdf"]
for write_engine in valid_engines:
with create_tmp_file() as tmp_file:
data.to_netcdf(tmp_file, engine=write_engine, format=self.file_format)
for read_engine in valid_engines:
with open_dataset(tmp_file, engine=read_engine) as actual:
assert_identical(data, actual)

def test_group_fails(self):
# Check writing group data fails with CLASSIC format
original = create_test_data()
with pytest.raises(
ValueError, match=r"Cannot create sub-groups in `NETCDF4_CLASSIC` format."
):
original.to_netcdf(group="sub", format=self.file_format, engine=self.engine)


@requires_scipy_or_netCDF4
class TestGenericNetCDFData(NetCDF3Only, CFEncodedBase):
Expand Down
Loading