SciTools · pp-mo · Oct 24, 2025 · Oct 24, 2025 · Oct 24, 2025 · Oct 25, 2025
diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py
@@ -708,13 +708,13 @@ def build_and_add_global_attributes(engine: Engine):
             ),
         )
         if problem is not None:
-            stack_notes = problem.stack_trace.__notes__
+            stack_notes = problem.stack_trace.__notes__  # type: ignore[attr-defined]
             if stack_notes is None:
                 stack_notes = []
             stack_notes.append(
                 f"Skipping disallowed global attribute '{attr_name}' (see above error)"
             )
-            problem.stack_trace.__notes__ = stack_notes
+            problem.stack_trace.__notes__ = stack_notes  # type: ignore[attr-defined]
 
 
 ################################################################################
@@ -1536,14 +1536,14 @@ def build_and_add_dimension_coordinate(
     )
     if problem is not None:
         coord_var_name = str(cf_coord_var.cf_name)
-        stack_notes = problem.stack_trace.__notes__
+        stack_notes = problem.stack_trace.__notes__  # type: ignore[attr-defined]
         if stack_notes is None:
             stack_notes = []
         stack_notes.append(
             f"Failed to create {coord_var_name} dimension coordinate:\n"
             f"Gracefully creating {coord_var_name!r} auxiliary coordinate instead."
         )
-        problem.stack_trace.__notes__ = stack_notes
+        problem.stack_trace.__notes__ = stack_notes  # type: ignore[attr-defined]
         problem.handled = True
 
         _ = _add_or_capture(

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
@@ -15,6 +15,7 @@
 """
 
 from abc import ABCMeta, abstractmethod
+import codecs
 from collections.abc import Iterable, MutableMapping
 import os
 import re
@@ -89,6 +90,11 @@ def __init__(self, name, data):
 
         self.cf_data = data
         """NetCDF4 Variable data instance."""
+        # Note: *always* disable encoding/decoding translations
+        #  To avoid current known problems
+        #  See https://github.com/Unidata/netcdf4-python/issues/1440
+        data.set_auto_chartostring(False)
+        # ALSO NOTE: not stored. NetCDFDataProxy must re-assert when re-loading.
 
         """File source of the NetCDF content."""
         try:
@@ -802,13 +808,49 @@ def cf_label_data(self, cf_data_var):
         label_data = self[:]
 
         if ma.isMaskedArray(label_data):
-            label_data = label_data.filled()
+            label_data = label_data.filled(b"\0")
+
+        default_encoding = "utf-8"
+        encoding = getattr(self, "_Encoding", None)
+        if encoding is None:
+            # utf-8 is a reasonable "safe" default, equivalent to 'ascii' for ascii data
+            encoding = default_encoding
+        else:
+            try:
+                # Accept + normalise naming of encodings
+                encoding = codecs.lookup(encoding).name
+                # NOTE: if encoding does not suit data, errors can occur.
+                # For example, _Encoding = "ascii", with non-ascii content.
+            except LookupError:
+                # Replace some invalid setting with "safe"(ish) fallback.
+                encoding = default_encoding
+
+        def string_from_1d_bytearray(array, encoding):
+            r"""Because numpy bytes arrays behave very oddly.
+
+            Elements which "should" contain a zero byte b'\0' instead appear to contain
+            an *empty* byte b''.  So a "b''.join()" will *omit* any zero bytes.
+            """
+            assert array.dtype.kind == "S" and array.dtype.itemsize == 1
+            assert array.ndim == 1
+            bytelist = [b"\0" if byte == b"" else byte for byte in array]
+            bytes = b"".join(bytelist)
+            assert len(bytes) == array.shape[0]
+            try:
+                string = bytes.decode(encoding=encoding)
+            except UnicodeDecodeError:
+                # if encoding == "ascii":
+                #     print("\n\n*** FIX !!")
+                #     string = bytes.decode("utf-8")
+                # else:
+                raise
+            result = string.strip()
+            return result
 
         # Determine whether we have a string-valued scalar label
         # i.e. a character variable that only has one dimension (the length of the string).
         if self.ndim == 1:
-            label_string = b"".join(label_data).strip()
-            label_string = label_string.decode("utf8")
+            label_string = string_from_1d_bytearray(label_data, encoding)
             data = np.array([label_string])
         else:
             # Determine the index of the string dimension.
@@ -829,9 +871,10 @@ def cf_label_data(self, cf_data_var):
                 else:
                     label_index = index + (slice(None, None),)
 
-                label_string = b"".join(label_data[label_index]).strip()
-                label_string = label_string.decode("utf8")
-                data[index] = label_string
+                label_string = string_from_1d_bytearray(
+                    label_data[label_index], encoding
+                )
+                data[index] = label_string.strip()
 
         return data
 

diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
@@ -336,6 +336,11 @@ def __getitem__(self, keys):
             dataset = netCDF4.Dataset(self.path)
             try:
                 variable = dataset.variables[self.variable_name]
+                # ALWAYS disable byte encoding/decoding
+                #  To avoid current known problems
+                #  See https://github.com/Unidata/netcdf4-python/issues/1440
+                variable.set_auto_chartostring(False)
+
                 # Get the NetCDF variable data and slice.
                 var = variable[keys]
             finally:

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
@@ -990,12 +990,12 @@ def _add_aux_coords(
         ]
 
         # Include any relevant mesh location coordinates.
-        mesh: MeshXY | None = getattr(cube, "mesh")
-        mesh_location: str | None = getattr(cube, "location")
+        mesh: MeshXY | None = getattr(cube, "mesh")  # type: ignore[annotation-unchecked]
+        mesh_location: str | None = getattr(cube, "location")  # type: ignore[annotation-unchecked]
         if mesh and mesh_location:
             location_coords: MeshNodeCoords | MeshEdgeCoords | MeshFaceCoords = getattr(
                 mesh, f"{mesh_location}_coords"
-            )
+            )  # type: ignore[annotation-unchecked]
             coords_to_add.extend(list(location_coords))
 
         return self._add_inner_related_vars(

diff --git a/lib/iris/tests/integration/netcdf/test_chararrays.py b/lib/iris/tests/integration/netcdf/test_chararrays.py
@@ -0,0 +1,113 @@
+import netCDF4 as nc
+import numpy as np
+import pytest
+
+import iris
+
+NX, N_STRLEN = 3, 64
+TEST_STRINGS = ["Münster", "London", "Amsterdam"]
+TEST_COORD_VALS = ["bun", "éclair", "sandwich"]
+
+
+def convert_chararray(string_array_1d, maxlen, encoding="utf-8"):
+    bbytes = [text.encode(encoding) for text in string_array_1d]
+    pad = b"\0" * maxlen
+    bbytes = [(x + pad)[:maxlen] for x in bbytes]
+    chararray = np.array([[bb[i : i + 1] for i in range(maxlen)] for bb in bbytes])
+    return chararray
+
+
+INCLUDE_COORD = True
+# INCLUDE_COORD = False
+
+
+def make_testfile(filepath, chararray, coordarray, encoding_str=None):
+    with nc.Dataset(filepath, "w") as ds:
+        ds.createDimension("x", NX)
+        ds.createDimension("nstr", N_STRLEN)
+        vx = ds.createVariable("x", int, dimensions=("x"))
+        vx[:] = np.arange(NX)
+        if INCLUDE_COORD:
+            ds.createDimension("nstr2", N_STRLEN)
+            v_co = ds.createVariable(
+                "v_co",
+                "S1",
+                dimensions=(
+                    "x",
+                    "nstr2",
+                ),
+            )
+            v_co[:] = coordarray
+            if encoding_str is not None:
+                v_co._Encoding = encoding_str
+        v = ds.createVariable(
+            "v",
+            "S1",
+            dimensions=(
+                "x",
+                "nstr",
+            ),
+        )
+        v[:] = chararray
+        if encoding_str is not None:
+            v._Encoding = encoding_str
+        if INCLUDE_COORD:
+            v.coordinates = "v_co"
+
+
+def show_result(filepath):
+    from pp_utils import ncdump
+
+    print(f"File {filepath}")
+    print("NCDUMP:")
+    ncdump(filepath, "")
+    # with nc.Dataset(filepath, "r") as ds:
+    #     v = ds.variables["v"]
+    #     print("\n----\nNetcdf data readback (basic)")
+    #     try:
+    #         print(repr(v[:]))
+    #     except UnicodeDecodeError as err:
+    #         print(repr(err))
+    #     print("..raw:")
+    #     v.set_auto_chartostring(False)
+    #     print(repr(v[:]))
+    print("\nAs iris cube..")
+    try:
+        cube = iris.load_cube(filepath)
+        print(cube)
+        if iris.loading.LOAD_PROBLEMS._problems:
+            print(iris.loading.LOAD_PROBLEMS)
+            print(
+                "\n".join(iris.loading.LOAD_PROBLEMS._problems[0].stack_trace.format())
+            )
+        print("-data-")
+        print(repr(cube.data))
+        if INCLUDE_COORD:
+            print("-coord data-")
+            try:
+                print(repr(cube.coord("v_co").points))
+            except Exception as err2:
+                print(repr(err2))
+    except UnicodeDecodeError as err:
+        print(repr(err))
+
+
+# tsts = (None, "ascii", "utf-8", "utf-32",)
+# tsts = ("utf-8",)
+# tsts = ("utf-8", "utf-32",)
+# tsts = ("utf-32",)
+tsts = ("utf-8", "ascii", "utf-8")
+
+
+@pytest.mark.parametrize("encoding", tsts)
+def test_encodings(encoding):
+    # small change
+    print(f"\n=========\nTesting encoding: {encoding}")
+    filepath = f"tmp_{str(encoding)}.nc"
+    do_as = encoding
+    if encoding != "utf-32":
+        do_as = "utf-8"
+    TEST_CHARARRAY = convert_chararray(TEST_STRINGS, N_STRLEN, encoding=do_as)
+    TEST_COORDARRAY = convert_chararray(TEST_COORD_VALS, N_STRLEN, encoding=do_as)
+    make_testfile(filepath, TEST_CHARARRAY, TEST_COORDARRAY, encoding_str=encoding)
+    show_result(filepath)