Skip to content

Commit 51664a7

Browse files
ilan-goldLDeakin
andauthored
chore: handle new zarr dtype API (#100)
* (chore): handle new zarr dtype API * (fix): dep in pyproject toml for zarr from main * (fix): use local store for v2 * fix: don't use `default_filters`/`default_compressor` * fix: clean up/slim donw tests * Update pyproject.toml --------- Co-authored-by: Lachlan Deakin <[email protected]>
1 parent 14eb5d4 commit 51664a7

File tree

10 files changed

+187
-442
lines changed

10 files changed

+187
-442
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ classifiers = [
2020
dynamic = ["version"]
2121
dependencies = [
2222
"numpy>=1.24",
23-
"zarr>=3.0.3,<3.1",
23+
"zarr>=3.1",
2424
]
2525

2626
[dependency-groups]

python/zarrs/pipeline.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from zarr.core.chunk_grids import ChunkGrid
2222
from zarr.core.common import ChunkCoords
2323
from zarr.core.indexing import SelectorTuple
24+
from zarr.dtype import ZDType
2425

2526
from ._internal import CodecPipelineImpl, codec_metadata_v2_to_v3
2627
from .utils import (
@@ -134,7 +135,7 @@ def __iter__(self) -> Iterator[Codec]:
134135
yield from self.codecs
135136

136137
def validate(
137-
self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid
138+
self, *, shape: ChunkCoords, dtype: ZDType, chunk_grid: ChunkGrid
138139
) -> None:
139140
raise NotImplementedError("validate")
140141

@@ -236,7 +237,7 @@ def _raise_error_on_unsupported_batch_dtype(
236237
# https://github.com/LDeakin/zarrs/blob/0532fe983b7b42b59dbf84e50a2fe5e6f7bad4ce/zarrs_metadata/src/v2_to_v3.rs#L289-L293 for VSUMm
237238
# Further, our pipeline does not support variable-length objects due to limitations on decode_into, so object/np.dtypes.StringDType is also out
238239
if any(
239-
info.dtype.kind in {"V", "S", "U", "M", "m", "O", "T"}
240+
info.dtype.to_native_dtype().kind in {"V", "S", "U", "M", "m", "O", "T"}
240241
for (_, info, _, _, _) in batch_info
241242
):
242243
raise UnsupportedDataTypeError()

python/zarrs/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import numpy as np
99
from zarr.core.array_spec import ArraySpec
1010
from zarr.core.indexing import SelectorTuple, is_integer
11-
from zarr.core.metadata.v2 import _default_fill_value
1211

1312
from zarrs._internal import Basic, WithSubset
1413

@@ -17,6 +16,7 @@
1716
from types import EllipsisType
1817

1918
from zarr.abc.store import ByteGetter, ByteSetter
19+
from zarr.dtype import ZDType
2020

2121

2222
# adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
@@ -139,9 +139,9 @@ def get_shape_for_selector(
139139
return resulting_shape_from_index(shape, selector_tuple, drop_axes, pad=pad)
140140

141141

142-
def get_implicit_fill_value(dtype: np.dtype, fill_value: Any) -> Any:
142+
def get_implicit_fill_value(dtype: ZDType, fill_value: Any) -> Any:
143143
if fill_value is None:
144-
fill_value = _default_fill_value(dtype)
144+
fill_value = dtype.default_scalar()
145145
return fill_value
146146

147147

src/chunk_item.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ impl Basic {
6868
let chunk_shape = chunk_spec.getattr("shape")?.extract()?;
6969
let mut dtype: String = chunk_spec
7070
.getattr("dtype")?
71+
.call_method0("to_native_dtype")?
7172
.call_method0("__str__")?
7273
.extract()?;
7374
if dtype == "object" {

tests/test_blosc.py

Lines changed: 0 additions & 57 deletions
This file was deleted.

tests/test_codecs.py

Lines changed: 1 addition & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import json
43
from dataclasses import dataclass
54
from typing import TYPE_CHECKING
65

@@ -13,14 +12,14 @@
1312
TransposeCodec,
1413
)
1514
from zarr.core.buffer import default_buffer_prototype
16-
from zarr.core.indexing import Selection, morton_order_iter
1715
from zarr.storage import StorePath
1816

1917
if TYPE_CHECKING:
2018
from zarr.abc.codec import Codec
2119
from zarr.abc.store import Store
2220
from zarr.core.buffer.core import NDArrayLike
2321
from zarr.core.common import MemoryOrder
22+
from zarr.core.indexing import Selection
2423

2524

2625
@dataclass(frozen=True)
@@ -165,51 +164,6 @@ def test_order_implicit(
165164
assert read_data.flags["C_CONTIGUOUS"]
166165

167166

168-
def test_open(store: Store) -> None:
169-
spath = StorePath(store)
170-
a = Array.create(
171-
spath,
172-
shape=(16, 16),
173-
chunk_shape=(16, 16),
174-
dtype="int32",
175-
fill_value=0,
176-
)
177-
b = Array.open(spath)
178-
assert a.metadata == b.metadata
179-
180-
181-
def test_morton() -> None:
182-
assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)]
183-
assert list(morton_order_iter((2, 2, 2))) == [
184-
(0, 0, 0),
185-
(1, 0, 0),
186-
(0, 1, 0),
187-
(1, 1, 0),
188-
(0, 0, 1),
189-
(1, 0, 1),
190-
(0, 1, 1),
191-
(1, 1, 1),
192-
]
193-
assert list(morton_order_iter((2, 2, 2, 2))) == [
194-
(0, 0, 0, 0),
195-
(1, 0, 0, 0),
196-
(0, 1, 0, 0),
197-
(1, 1, 0, 0),
198-
(0, 0, 1, 0),
199-
(1, 0, 1, 0),
200-
(0, 1, 1, 0),
201-
(1, 1, 1, 0),
202-
(0, 0, 0, 1),
203-
(1, 0, 0, 1),
204-
(0, 1, 0, 1),
205-
(1, 1, 0, 1),
206-
(0, 0, 1, 1),
207-
(1, 0, 1, 1),
208-
(0, 1, 1, 1),
209-
(1, 1, 1, 1),
210-
]
211-
212-
213167
def test_write_partial_chunks(store: Store) -> None:
214168
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
215169
spath = StorePath(store)
@@ -241,41 +195,6 @@ async def test_delete_empty_chunks(store: Store) -> None:
241195
assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype()) is None
242196

243197

244-
async def test_dimension_names(store: Store) -> None:
245-
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
246-
path = "dimension_names"
247-
spath = StorePath(store, path)
248-
await AsyncArray.create(
249-
spath,
250-
shape=data.shape,
251-
chunk_shape=(16, 16),
252-
dtype=data.dtype,
253-
fill_value=0,
254-
dimension_names=("x", "y"),
255-
)
256-
257-
assert (await AsyncArray.open(spath)).metadata.dimension_names == (
258-
"x",
259-
"y",
260-
)
261-
path2 = "dimension_names2"
262-
spath2 = StorePath(store, path2)
263-
await AsyncArray.create(
264-
spath2,
265-
shape=data.shape,
266-
chunk_shape=(16, 16),
267-
dtype=data.dtype,
268-
fill_value=0,
269-
)
270-
271-
assert (await AsyncArray.open(spath2)).metadata.dimension_names is None
272-
zarr_json_buffer = await store.get(
273-
f"{path2}/zarr.json", prototype=default_buffer_prototype()
274-
)
275-
assert zarr_json_buffer is not None
276-
assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes())
277-
278-
279198
def test_invalid_metadata(store: Store) -> None:
280199
# LD: Disabled for `zarrs`. Including endianness for a single-byte data type is not invalid.
281200
# spath2 = StorePath(store, "invalid_endian")

tests/test_sharding.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import pickle
21
from typing import Any
32

43
import numpy as np
@@ -286,30 +285,6 @@ def test_nested_sharding(
286285
assert np.array_equal(data, read_data)
287286

288287

289-
def test_open_sharding(store: Store) -> None:
290-
path = "open_sharding"
291-
spath = StorePath(store, path)
292-
a = Array.create(
293-
spath,
294-
shape=(16, 16),
295-
chunk_shape=(16, 16),
296-
dtype="int32",
297-
fill_value=0,
298-
codecs=[
299-
ShardingCodec(
300-
chunk_shape=(8, 8),
301-
codecs=[
302-
TransposeCodec(order=order_from_dim("F", 2)),
303-
BytesCodec(),
304-
BloscCodec(),
305-
],
306-
)
307-
],
308-
)
309-
b = Array.open(spath)
310-
assert a.metadata == b.metadata
311-
312-
313288
def test_write_partial_sharded_chunks(store: Store) -> None:
314289
data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16))
315290
spath = StorePath(store)
@@ -365,11 +340,6 @@ async def test_delete_empty_shards(store: Store) -> None:
365340
assert len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4
366341

367342

368-
def test_pickle() -> None:
369-
codec = ShardingCodec(chunk_shape=(8, 8))
370-
assert pickle.loads(pickle.dumps(codec)) == codec
371-
372-
373343
@pytest.mark.parametrize(
374344
"index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end]
375345
)

tests/test_transpose.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,3 @@ def test_transpose_non_self_inverse(store: Store, order: list[int]) -> None:
8484
a[:, :] = data
8585
read_data = a[:, :]
8686
assert np.array_equal(data, read_data)
87-
88-
89-
def test_transpose_invalid(
90-
store: Store,
91-
) -> None:
92-
data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8))
93-
spath = StorePath(store, "transpose_invalid")
94-
for order in [(1, 0), (3, 2, 1), (3, 3, 1)]:
95-
with pytest.raises(ValueError, match=r".*order"):
96-
Array.create(
97-
spath,
98-
shape=data.shape,
99-
chunk_shape=(1, 32, 8),
100-
dtype=data.dtype,
101-
fill_value=0,
102-
chunk_key_encoding=("v2", "."),
103-
codecs=[TransposeCodec(order=order), BytesCodec()],
104-
)

0 commit comments

Comments
 (0)