Skip to content

Commit b3f5db3

Browse files
dcherianclaude
andauthored
Use ._data in Variable._replace (#10969)
Co-authored-by: Claude <[email protected]>
1 parent 9bee765 commit b3f5db3

File tree

5 files changed

+33
-8
lines changed

5 files changed

+33
-8
lines changed

xarray/coding/times.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,9 +1379,9 @@ def __init__(
13791379
self.time_unit = time_unit
13801380

13811381
def encode(self, variable: Variable, name: T_Name = None) -> Variable:
1382-
if np.issubdtype(
1383-
variable.data.dtype, np.datetime64
1384-
) or contains_cftime_datetimes(variable):
1382+
if np.issubdtype(variable.dtype, np.datetime64) or contains_cftime_datetimes(
1383+
variable
1384+
):
13851385
dims, data, attrs, encoding = unpack_for_encoding(variable)
13861386

13871387
units = encoding.pop("units", None)
@@ -1499,7 +1499,7 @@ def __init__(
14991499
self._emit_decode_timedelta_future_warning = False
15001500

15011501
def encode(self, variable: Variable, name: T_Name = None) -> Variable:
1502-
if np.issubdtype(variable.data.dtype, np.timedelta64):
1502+
if np.issubdtype(variable.dtype, np.timedelta64):
15031503
dims, data, attrs, encoding = unpack_for_encoding(variable)
15041504
dtype = encoding.get("dtype", None)
15051505
units = encoding.pop("units", None)

xarray/core/variable.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -972,7 +972,7 @@ def _replace(
972972
if dims is _default:
973973
dims = copy.copy(self._dims)
974974
if data is _default:
975-
data = copy.copy(self.data)
975+
data = copy.copy(self._data)
976976
if attrs is _default:
977977
attrs = copy.copy(self._attrs)
978978
if encoding is _default:
@@ -1228,7 +1228,7 @@ def _pad_options_dim_to_index(
12281228
if fill_with_shape:
12291229
return [
12301230
pad_option.get(d, (n, n))
1231-
for d, n in zip(self.dims, self.data.shape, strict=True)
1231+
for d, n in zip(self.dims, self.shape, strict=True)
12321232
]
12331233
return [pad_option.get(d, (0, 0)) for d in self.dims]
12341234

@@ -1304,7 +1304,7 @@ def pad(
13041304

13051305
# workaround for bug in Dask's default value of stat_length https://github.com/dask/dask/issues/5303
13061306
if stat_length is None and mode in ["maximum", "mean", "median", "minimum"]:
1307-
stat_length = [(n, n) for n in self.data.shape] # type: ignore[assignment]
1307+
stat_length = [(n, n) for n in self.shape] # type: ignore[assignment]
13081308

13091309
pad_width_by_index = self._pad_options_dim_to_index(pad_width)
13101310

@@ -1469,14 +1469,15 @@ def set_dims(self, dim, shape=None):
14691469
if self.dims == expanded_dims:
14701470
# don't use broadcast_to unless necessary so the result remains
14711471
# writeable if possible
1472-
expanded_data = self.data
1472+
expanded_data = self._data
14731473
elif shape is None or all(
14741474
s == 1 for s, e in zip(shape, dim, strict=True) if e not in self_dims
14751475
):
14761476
# "Trivial" broadcasting, i.e. simply inserting a new dimension
14771477
# This is typically easier for duck arrays to implement
14781478
# than the full "broadcast_to" semantics
14791479
indexer = (None,) * (len(expanded_dims) - self.ndim) + (...,)
1480+
# TODO: switch this to ._data once we teach ExplicitlyIndexed arrays to handle indexers with None.
14801481
expanded_data = self.data[indexer]
14811482
else: # elif shape is not None:
14821483
dims_map = dict(zip(dim, shape, strict=True))

xarray/tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
DuckArrayWrapper,
3232
FirstElementAccessibleArray,
3333
InaccessibleArray,
34+
IndexableArray,
3435
UnexpectedDataAccess,
3536
)
3637

xarray/tests/arrays.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ def __getitem__(self, key):
4141
return self.array[tuple_idxr]
4242

4343

44+
class IndexableArray(InaccessibleArray):
45+
"""An InaccessibleArray subclass that supports indexing."""
46+
47+
def __getitem__(self, key):
48+
return type(self)(self.array[key])
49+
50+
def transpose(self, axes):
51+
return type(self)(self.array.transpose(axes))
52+
53+
4454
class DuckArrayWrapper(utils.NDArrayMixin):
4555
"""Array-like that prevents casting to array.
4656
Modeled after cupy."""

xarray/tests/test_variable.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from xarray.core.variable import as_compatible_data, as_variable
3333
from xarray.namedarray.pycompat import array_type
3434
from xarray.tests import (
35+
IndexableArray,
3536
assert_allclose,
3637
assert_array_equal,
3738
assert_equal,
@@ -3255,3 +3256,15 @@ def test_timedelta_conversion(values, unit) -> None:
32553256
dims = ["time"] if isinstance(values, np.ndarray | pd.Index) else []
32563257
var = Variable(dims, values)
32573258
assert var.dtype == np.dtype(f"timedelta64[{unit}]")
3259+
3260+
3261+
def test_explicitly_indexed_array_preserved() -> None:
3262+
"""Test that methods using ._data preserve ExplicitlyIndexed arrays.
3263+
3264+
Regression test for methods that should use ._data instead of .data
3265+
to avoid loading lazy arrays into memory.
3266+
"""
3267+
arr = IndexableArray(np.array([1, 2, 3]))
3268+
var = Variable(["x"], arr)
3269+
result = var.drop_encoding()
3270+
assert isinstance(result._data, indexing.ExplicitlyIndexed)

0 commit comments

Comments
 (0)