From 5ab9154f0a83640a60e00000fe7f82e97af9ae02 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 8 May 2025 18:04:34 +0200 Subject: [PATCH 01/17] remove duplicate metadata parsing --- src/zarr/core/array.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index b0e8b03cd7..b6869ce077 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -137,7 +137,8 @@ def parse_array_metadata(data: Any) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data elif isinstance(data, dict): - if data["zarr_format"] == 3: + zarr_format = data.get("zarr_format") + if zarr_format == 3: meta_out = ArrayV3Metadata.from_dict(data) if len(meta_out.storage_transformers) > 0: msg = ( @@ -146,8 +147,10 @@ def parse_array_metadata(data: Any) -> ArrayMetadata: ) raise ValueError(msg) return meta_out - elif data["zarr_format"] == 2: + elif zarr_format == 2: return ArrayV2Metadata.from_dict(data) + else: + raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") raise TypeError @@ -265,17 +268,6 @@ def __init__( store_path: StorePath, config: ArrayConfigLike | None = None, ) -> None: - if isinstance(metadata, dict): - zarr_format = metadata["zarr_format"] - # TODO: remove this when we extensively type the dict representation of metadata - _metadata = cast(dict[str, JSON], metadata) - if zarr_format == 2: - metadata = ArrayV2Metadata.from_dict(_metadata) - elif zarr_format == 3: - metadata = ArrayV3Metadata.from_dict(_metadata) - else: - raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") - metadata_parsed = parse_array_metadata(metadata) config_parsed = parse_array_config(config) From 0a184454225c15aa310b7ebcdfc41d2635296160 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 8 May 2025 18:21:57 +0200 Subject: [PATCH 02/17] add test cases --- tests/test_api.py | 48 +++++++++++++++++++++++++++++---------------- tests/test_array.py | 11 +++++++---- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 9f03a1067a..f4abe86793 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -70,13 +70,19 @@ def test_create(memory_store: Store) -> None: # TODO: parametrize over everything this function takes @pytest.mark.parametrize("store", ["memory"], indirect=True) -def test_create_array(store: Store) -> None: +def test_create_array(store: Store, zarr_format: ZarrFormat) -> None: attrs: dict[str, JSON] = {"foo": 100} # explicit type annotation to avoid mypy error shape = (10, 10) path = "foo" data_val = 1 array_w = create_array( - store, name=path, shape=shape, attributes=attrs, chunks=shape, dtype="uint8" + store, + name=path, + shape=shape, + attributes=attrs, + chunks=shape, + dtype="uint8", + zarr_format=zarr_format, ) array_w[:] = data_val assert array_w.shape == shape @@ -85,18 +91,27 @@ def test_create_array(store: Store) -> None: @pytest.mark.parametrize("write_empty_chunks", [True, False]) -def test_write_empty_chunks_warns(write_empty_chunks: bool) -> None: +def test_write_empty_chunks_warns(write_empty_chunks: bool, zarr_format: ZarrFormat) -> None: """ Test that using the `write_empty_chunks` kwarg on array access will raise a warning. """ match = "The `write_empty_chunks` keyword argument .*" with pytest.warns(RuntimeWarning, match=match): _ = zarr.array( - data=np.arange(10), shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks + data=np.arange(10), + shape=(10,), + dtype="uint8", + write_empty_chunks=write_empty_chunks, + zarr_format=zarr_format, ) with pytest.warns(RuntimeWarning, match=match): - _ = zarr.create(shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks) + _ = zarr.create( + shape=(10,), + dtype="uint8", + write_empty_chunks=write_empty_chunks, + zarr_format=zarr_format, + ) @pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"]) @@ -113,18 +128,18 @@ def test_open_normalized_path( assert node.path == normalize_path(path) -async def test_open_array(memory_store: MemoryStore) -> None: +async def test_open_array(memory_store: MemoryStore, zarr_format: ZarrFormat) -> None: store = memory_store # open array, create if doesn't exist - z = open(store=store, shape=100) + z = open(store=store, shape=100, zarr_format=zarr_format) assert isinstance(z, Array) assert z.shape == (100,) # open array, overwrite # store._store_dict = {} store = MemoryStore() - z = open(store=store, shape=200) + z = open(store=store, shape=200, zarr_format=zarr_format) assert isinstance(z, Array) assert z.shape == (200,) @@ -138,7 +153,7 @@ async def test_open_array(memory_store: MemoryStore) -> None: # path not found with pytest.raises(FileNotFoundError): - open(store="doesnotexist", mode="r") + open(store="doesnotexist", mode="r", zarr_format=zarr_format) @pytest.mark.parametrize("store", ["memory"], indirect=True) @@ -161,9 +176,9 @@ async def test_open_group(memory_store: MemoryStore) -> None: assert "foo" in g # open group, overwrite - # g = open_group(store=store) - # assert isinstance(g, Group) - # assert "foo" not in g + g = open_group(store=store, mode="w") + assert isinstance(g, Group) + assert "foo" not in g # open group, read-only store_cls = type(store) @@ -306,7 +321,6 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: zarr.open(store=tmp_path, mode="w-") -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_array_order(zarr_format: ZarrFormat) -> None: arr = zarr.ones(shape=(2, 2), order=None, zarr_format=zarr_format) expected = zarr.config.get("array.order") @@ -322,7 +336,6 @@ def test_array_order(zarr_format: ZarrFormat) -> None: @pytest.mark.parametrize("order", ["C", "F"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_array_order_warns(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None: with pytest.warns(RuntimeWarning, match="The `order` keyword argument .*"): arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format) @@ -1135,13 +1148,14 @@ async def test_metadata_validation_error() -> None: ["local", "memory", "zip"], indirect=True, ) -def test_open_array_with_mode_r_plus(store: Store) -> None: +def test_open_array_with_mode_r_plus(store: Store, zarr_format: ZarrFormat) -> None: # 'r+' means read/write (must exist) with pytest.raises(FileNotFoundError): - zarr.open_array(store=store, mode="r+") - zarr.ones(store=store, shape=(3, 3)) + zarr.open_array(store=store, mode="r+", zarr_format=zarr_format) + zarr.ones(store=store, shape=(3, 3), zarr_format=zarr_format) z2 = zarr.open_array(store=store, mode="r+") assert isinstance(z2, Array) + assert z2.metadata.zarr_format == zarr_format result = z2[:] assert isinstance(result, NDArrayLike) assert (result == 1).all() diff --git a/tests/test_array.py b/tests/test_array.py index 4be9bbde43..fb2e64e1c8 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -227,10 +227,13 @@ def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str assert arr.fill_value.dtype == arr.dtype -def test_create_positional_args_deprecated() -> None: - store = MemoryStore() - with pytest.warns(FutureWarning, match="Pass"): - zarr.Array.create(store, (2, 2), dtype="f8") +async def test_create_deprecated() -> None: + with pytest.warns(DeprecationWarning): + with pytest.warns(FutureWarning, match="Pass"): + await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8") + with pytest.warns(DeprecationWarning): + with pytest.warns(FutureWarning, match="Pass"): + zarr.Array.create(MemoryStore(), (2, 2), dtype="f8") def test_selection_positional_args_deprecated() -> None: From 760fb422c1c06142244e60f62e8d2c620a3aaf95 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 8 May 2025 19:10:05 +0200 Subject: [PATCH 03/17] add test cases --- tests/test_api.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index f4abe86793..05aabbf28a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1106,13 +1106,16 @@ def test_open_falls_back_to_open_group() -> None: assert group.attrs == {"key": "value"} -async def test_open_falls_back_to_open_group_async() -> None: +async def test_open_falls_back_to_open_group_async(zarr_format: ZarrFormat) -> None: # https://github.com/zarr-developers/zarr-python/issues/2309 store = MemoryStore() - await zarr.api.asynchronous.open_group(store, attributes={"key": "value"}) + await zarr.api.asynchronous.open_group( + store, attributes={"key": "value"}, zarr_format=zarr_format + ) group = await zarr.api.asynchronous.open(store=store) assert isinstance(group, zarr.core.group.AsyncGroup) + assert group.metadata.zarr_format == zarr_format assert group.attrs == {"key": "value"} From 1989322140a3b467c184706cf283e59975aa221d Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 14:08:56 +0200 Subject: [PATCH 04/17] tests for different zarr_formats in test_storage_transformers --- tests/test_array.py | 50 ++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/tests/test_array.py b/tests/test_array.py index fb2e64e1c8..be2d1fd3f0 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -324,24 +324,46 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> @pytest.mark.parametrize("store", ["memory"], indirect=True) -def test_storage_transformers(store: MemoryStore) -> None: +@pytest.mark.parametrize("zarr_format", [2, 3, "invalid"]) +def test_storage_transformers(store: MemoryStore, zarr_format) -> None: """ Test that providing an actual storage transformer produces a warning and otherwise passes through """ - metadata_dict: dict[str, JSON] = { - "zarr_format": 3, - "node_type": "array", - "shape": (10,), - "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, - "data_type": "uint8", - "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, - "codecs": (BytesCodec().to_dict(),), - "fill_value": 0, - "storage_transformers": ({"test": "should_raise"}), - } - match = "Arrays with storage transformers are not supported in zarr-python at this time." - with pytest.raises(ValueError, match=match): + if zarr_format == 3: + metadata_dict: dict[str, JSON] = { + "zarr_format": 3, + "node_type": "array", + "shape": (10,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": "uint8", + "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, + "codecs": (BytesCodec().to_dict(),), + "fill_value": 0, + "storage_transformers": ({"test": "should_raise"}), + } + else: + metadata_dict: dict[str, JSON] = { + "zarr_format": zarr_format, + "shape": (10,), + "chunks": (1,), + "dtype": "uint8", + "dimension_separator": ".", + "codecs": (BytesCodec().to_dict(),), + "fill_value": 0, + "order": "C", + "storage_transformers": ({"test": "should_raise"}), + } + if zarr_format == 3: + match = "Arrays with storage transformers are not supported in zarr-python at this time." + with pytest.raises(ValueError, match=match): + Array.from_dict(StorePath(store), data=metadata_dict) + elif zarr_format == 2: + # no warning Array.from_dict(StorePath(store), data=metadata_dict) + else: + match = f"Invalid zarr_format: {zarr_format}. Expected 2 or 3" + with pytest.raises(ValueError, match=match): + Array.from_dict(StorePath(store), data=metadata_dict) @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) From 01f540dc00df51f4fb6d6df1a014abf1b0756f55 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 14:15:47 +0200 Subject: [PATCH 05/17] tests for different zarr_formats in test_storage_transformers --- tests/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_array.py b/tests/test_array.py index be2d1fd3f0..abb3845aaa 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -230,7 +230,7 @@ def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str async def test_create_deprecated() -> None: with pytest.warns(DeprecationWarning): with pytest.warns(FutureWarning, match="Pass"): - await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8") + await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8", zarr_format=3) with pytest.warns(DeprecationWarning): with pytest.warns(FutureWarning, match="Pass"): zarr.Array.create(MemoryStore(), (2, 2), dtype="f8") From d6d0d35960b2a797ae67575b7d9949ef26ade25b Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 14:32:37 +0200 Subject: [PATCH 06/17] ignore mypy arg-type error for deprecation test --- tests/test_array.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_array.py b/tests/test_array.py index abb3845aaa..fdcfa99207 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -229,10 +229,10 @@ def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str async def test_create_deprecated() -> None: with pytest.warns(DeprecationWarning): - with pytest.warns(FutureWarning, match="Pass"): - await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8", zarr_format=3) + with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")): + await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8") # type: ignore[arg-type] with pytest.warns(DeprecationWarning): - with pytest.warns(FutureWarning, match="Pass"): + with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")): zarr.Array.create(MemoryStore(), (2, 2), dtype="f8") From 39cff00dcb206310950ce47aaffb6ab6c90934cf Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 14:37:29 +0200 Subject: [PATCH 07/17] fix typing in tests --- tests/test_array.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_array.py b/tests/test_array.py index fdcfa99207..f195d153e2 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -230,7 +230,7 @@ def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str async def test_create_deprecated() -> None: with pytest.warns(DeprecationWarning): with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")): - await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8") # type: ignore[arg-type] + await zarr.AsyncArray.create(MemoryStore(), (2, 2), dtype="f8") # type: ignore[call-overload] with pytest.warns(DeprecationWarning): with pytest.warns(FutureWarning, match=re.escape("Pass shape=(2, 2) as keyword args")): zarr.Array.create(MemoryStore(), (2, 2), dtype="f8") @@ -325,12 +325,13 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("zarr_format", [2, 3, "invalid"]) -def test_storage_transformers(store: MemoryStore, zarr_format) -> None: +def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat) -> None: """ Test that providing an actual storage transformer produces a warning and otherwise passes through """ + metadata_dict: dict[str, JSON] if zarr_format == 3: - metadata_dict: dict[str, JSON] = { + metadata_dict = { "zarr_format": 3, "node_type": "array", "shape": (10,), @@ -342,7 +343,7 @@ def test_storage_transformers(store: MemoryStore, zarr_format) -> None: "storage_transformers": ({"test": "should_raise"}), } else: - metadata_dict: dict[str, JSON] = { + metadata_dict = { "zarr_format": zarr_format, "shape": (10,), "chunks": (1,), From c63a90ed9d1e791620bc8b4f3dd4a741450f6fbc Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 15:23:39 +0200 Subject: [PATCH 08/17] test_chunk_key_encoding --- tests/test_array.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/tests/test_array.py b/tests/test_array.py index f195d153e2..9229a72049 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -51,7 +51,7 @@ if TYPE_CHECKING: from zarr.core.array_spec import ArrayConfigLike - from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.metadata.v2 import ArrayV2Metadata @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @@ -325,7 +325,7 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("zarr_format", [2, 3, "invalid"]) -def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat) -> None: +def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat | str) -> None: """ Test that providing an actual storage transformer produces a warning and otherwise passes through """ @@ -1131,6 +1131,40 @@ async def test_v3_chunk_encoding( assert arr.filters == filters_expected assert arr.compressors == compressors_expected + @staticmethod + @pytest.mark.parametrize("name", ["v2", "default", "invalid"]) + @pytest.mark.parametrize("separator", [".", "/"]) + async def test_chunk_key_encoding( + name: str, separator: str, zarr_format: ZarrFormat, store: MemoryStore + ) -> None: + chunk_key_encoding = {"name": name, "separator": separator} + error_msg = "" + if name == "invalid": + error_msg = "Unknown chunk key encoding." + if zarr_format == 2 and name == "default": + error_msg = "Invalid chunk key encoding. For Zarr format 2 arrays, the `name` field of the chunk key encoding must be 'v2'." + if error_msg: + with pytest.raises(ValueError, match=re.escape(error_msg)): + arr = await create_array( + store=store, + dtype="uint8", + shape=(10,), + chunks=(1,), + zarr_format=zarr_format, + chunk_key_encoding=chunk_key_encoding, + ) + else: + arr = await create_array( + store=store, + dtype="uint8", + shape=(10,), + chunks=(1,), + zarr_format=zarr_format, + chunk_key_encoding=chunk_key_encoding, + ) + if isinstance(arr.metadata, ArrayV2Metadata): + assert arr.metadata.dimension_separator == separator + @staticmethod @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) @pytest.mark.parametrize( From b3f313d24f3a64455552f81f0492833e045b9931 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 15:55:17 +0200 Subject: [PATCH 09/17] test_invalid_v2_arguments --- tests/test_array.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/test_array.py b/tests/test_array.py index 9229a72049..8e6a23dd5c 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -41,6 +41,7 @@ from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.buffer.cpu import NDBuffer from zarr.core.chunk_grids import _auto_partition +from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.core.group import AsyncGroup from zarr.core.indexing import BasicIndexer, ceildiv @@ -1137,7 +1138,7 @@ async def test_v3_chunk_encoding( async def test_chunk_key_encoding( name: str, separator: str, zarr_format: ZarrFormat, store: MemoryStore ) -> None: - chunk_key_encoding = {"name": name, "separator": separator} + chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[arg-type] error_msg = "" if name == "invalid": error_msg = "Unknown chunk key encoding." @@ -1165,6 +1166,22 @@ async def test_chunk_key_encoding( if isinstance(arr.metadata, ArrayV2Metadata): assert arr.metadata.dimension_separator == separator + @staticmethod + @pytest.mark.parametrize( + ("kwargs", "error_msg"), + [ + ({"serializer": "bytes"}, "Zarr format 2 arrays do not support `serializer`."), + ({"dimension_names": ["test"]}, "Zarr format 2 arrays do not support dimension names."), + ], + ) + async def test_invalid_v2_arguments( + kwargs: dict[str, Any], error_msg: str, store: MemoryStore + ) -> None: + with pytest.raises(ValueError, match=re.escape(error_msg)): + await create_array( + store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs + ) + @staticmethod @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) @pytest.mark.parametrize( From 74e4d9d228c2ede5028f699bf188ca0c162b05bf Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 16:23:57 +0200 Subject: [PATCH 10/17] test_array_repr --- tests/test_array.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/test_array.py b/tests/test_array.py index 8e6a23dd5c..e18c1ec83a 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1138,7 +1138,7 @@ async def test_v3_chunk_encoding( async def test_chunk_key_encoding( name: str, separator: str, zarr_format: ZarrFormat, store: MemoryStore ) -> None: - chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[arg-type] + chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[typeddict-item"] error_msg = "" if name == "invalid": error_msg = "Unknown chunk key encoding." @@ -1661,3 +1661,14 @@ async def test_sharding_coordinate_selection() -> None: result = arr[1, [0, 1]] # type: ignore[index] assert isinstance(result, NDArrayLike) assert (result == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all() + +@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) +def test_array_repr(store: Store) -> None: + shape = (2, 3, 4) + dtype = "uint8" + arr = zarr.create_array( + store, + shape=shape, + dtype=dtype + ) + assert str(arr) == f"" From fc64cbffb008dea72d1067fe44def4c0e0d965e0 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 16:46:47 +0200 Subject: [PATCH 11/17] type annotation for parse_array_metadata --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index c8fafa38e8..d2bcbc7d3a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -134,7 +134,7 @@ logger = getLogger(__name__) -def parse_array_metadata(data: Any) -> ArrayMetadata: +def parse_array_metadata(data: ArrayMetadata | ArrayMetadataDict) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data elif isinstance(data, dict): From 956159ec582c055fbe6ed900b0984b7190439aec Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 9 May 2025 17:21:24 +0200 Subject: [PATCH 12/17] test_v2_and_v3_exist_at_same_path --- src/zarr/core/array.py | 7 +++---- tests/test_api.py | 10 ++++++++++ tests/test_array.py | 7 ++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index d2bcbc7d3a..9f81ebe3af 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -134,7 +134,7 @@ logger = getLogger(__name__) -def parse_array_metadata(data: ArrayMetadata | ArrayMetadataDict) -> ArrayMetadata: +def parse_array_metadata(data: ArrayMetadata | dict[str, JSON]) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data elif isinstance(data, dict): @@ -152,7 +152,7 @@ def parse_array_metadata(data: ArrayMetadata | ArrayMetadataDict) -> ArrayMetada return ArrayV2Metadata.from_dict(data) else: raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") - raise TypeError + raise TypeError # pragma: no cover def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline: @@ -161,8 +161,7 @@ def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline: elif isinstance(metadata, ArrayV2Metadata): v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor) return get_pipeline_class().from_codecs([v2_codec]) - else: - raise TypeError + raise TypeError # pragma: no cover async def get_array_metadata( diff --git a/tests/test_api.py b/tests/test_api.py index 05aabbf28a..467ef9e6b6 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -156,6 +157,15 @@ async def test_open_array(memory_store: MemoryStore, zarr_format: ZarrFormat) -> open(store="doesnotexist", mode="r", zarr_format=zarr_format) +@pytest.mark.parametrize("store", ["memory", "local", "zip"], indirect=True) +def test_v2_and_v3_exist_at_same_path(store: Store) -> None: + zarr.create_array(store, shape=(10,), dtype="uint8", zarr_format=3) + zarr.create_array(store, shape=(10,), dtype="uint8", zarr_format=2) + msg = f"Both zarr.json (Zarr format 3) and .zarray (Zarr format 2) metadata objects exist at {store}. Zarr v3 will be used." + with pytest.warns(UserWarning, match=re.escape(msg)): + zarr.open(store=store, mode="r") + + @pytest.mark.parametrize("store", ["memory"], indirect=True) async def test_create_group(store: Store, zarr_format: ZarrFormat) -> None: attrs = {"foo": 100} diff --git a/tests/test_array.py b/tests/test_array.py index e18c1ec83a..108911cd3f 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1662,13 +1662,10 @@ async def test_sharding_coordinate_selection() -> None: assert isinstance(result, NDArrayLike) assert (result == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all() + @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_array_repr(store: Store) -> None: shape = (2, 3, 4) dtype = "uint8" - arr = zarr.create_array( - store, - shape=shape, - dtype=dtype - ) + arr = zarr.create_array(store, shape=shape, dtype=dtype) assert str(arr) == f"" From 788f9cc5e985fe749bccddda6bfab0d57f7434ae Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 13 May 2025 12:58:15 +0200 Subject: [PATCH 13/17] remove duplicate check for dimension_separator in v3 --- src/zarr/api/asynchronous.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index ac143f6dea..f362ced80b 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -1012,11 +1012,6 @@ async def create( warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2) if read_only is not None: warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2) - if dimension_separator is not None and zarr_format == 3: - raise ValueError( - "dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead" - ) - if order is not None: _warn_order_kwarg() if write_empty_chunks is not None: From 699837513fdb22900b36ff846b928fb835ac9e61 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 13 May 2025 12:59:52 +0200 Subject: [PATCH 14/17] tests for invalid arguments in creation --- src/zarr/core/array.py | 4 +++- tests/test_array.py | 43 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index d5fcc4bb65..76f39b6bd3 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -135,7 +135,9 @@ logger = getLogger(__name__) -def parse_array_metadata(data: ArrayMetadata | dict[str, JSON]) -> ArrayMetadata: +def parse_array_metadata( + data: ArrayMetadata | ArrayMetadataDict | dict[str, JSON], +) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data elif isinstance(data, dict): diff --git a/tests/test_array.py b/tests/test_array.py index 8e1a3753c5..35260bce59 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1137,9 +1137,9 @@ async def test_v3_chunk_encoding( @pytest.mark.parametrize("name", ["v2", "default", "invalid"]) @pytest.mark.parametrize("separator", [".", "/"]) async def test_chunk_key_encoding( - name: str, separator: str, zarr_format: ZarrFormat, store: MemoryStore + name: str, separator: Literal[".", "/"], zarr_format: ZarrFormat, store: MemoryStore ) -> None: - chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[typeddict-item"] + chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[typeddict-item] error_msg = "" if name == "invalid": error_msg = "Unknown chunk key encoding." @@ -1175,13 +1175,48 @@ async def test_chunk_key_encoding( ({"dimension_names": ["test"]}, "Zarr format 2 arrays do not support dimension names."), ], ) - async def test_invalid_v2_arguments( + async def test_create_array_invalid_v2_arguments( kwargs: dict[str, Any], error_msg: str, store: MemoryStore ) -> None: with pytest.raises(ValueError, match=re.escape(error_msg)): - await create_array( + await zarr.api.asynchronous.create_array( store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs ) + @staticmethod + @pytest.mark.parametrize( + ("kwargs", "error_msg"), + [ + ({"dimension_names": ["test"]}, "dimension_names cannot be used for arrays with zarr_format 2."), + ({"chunk_key_encoding": {"name": "default", "separator": "/"}}, "chunk_key_encoding cannot be used for arrays with zarr_format 2. Use dimension_separator instead."), + ({"codecs": "bytes"}, "codecs cannot be used for arrays with zarr_format 2. Use filters and compressor instead."), + ], + ) + async def test_create_invalid_v2_arguments( + kwargs: dict[str, Any], error_msg: str, store: MemoryStore + ) -> None: + with pytest.raises(ValueError, match=re.escape(error_msg)): + await zarr.api.asynchronous.create( + store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs + ) + + @staticmethod + @pytest.mark.parametrize( + ("kwargs", "error_msg"), + [ + ({"chunk_shape": (1,), "chunks": (2,)}, "Only one of chunk_shape or chunks can be provided."), + ({"dimension_separator": "/"}, "dimension_separator cannot be used for arrays with zarr_format 3. Use chunk_key_encoding instead."), + ({"filters": []}, "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead"), + ({"compressor": "blosc"}, "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead"), + ], + ) + async def test_invalid_v3_arguments( + kwargs: dict[str, Any], error_msg: str, store: MemoryStore + ) -> None: + kwargs.setdefault("chunks", (1,)) + with pytest.raises(ValueError, match=re.escape(error_msg)): + zarr.create( + store=store, dtype="uint8", shape=(10,), zarr_format=3, **kwargs + ) @staticmethod @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) From e5529a37bdf3c6debb71a4469826df470ea0d0a4 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 13 May 2025 13:00:50 +0200 Subject: [PATCH 15/17] format --- tests/test_array.py | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/tests/test_array.py b/tests/test_array.py index 35260bce59..eb19f0e7f3 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1182,13 +1182,23 @@ async def test_create_array_invalid_v2_arguments( await zarr.api.asynchronous.create_array( store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs ) + @staticmethod @pytest.mark.parametrize( ("kwargs", "error_msg"), [ - ({"dimension_names": ["test"]}, "dimension_names cannot be used for arrays with zarr_format 2."), - ({"chunk_key_encoding": {"name": "default", "separator": "/"}}, "chunk_key_encoding cannot be used for arrays with zarr_format 2. Use dimension_separator instead."), - ({"codecs": "bytes"}, "codecs cannot be used for arrays with zarr_format 2. Use filters and compressor instead."), + ( + {"dimension_names": ["test"]}, + "dimension_names cannot be used for arrays with zarr_format 2.", + ), + ( + {"chunk_key_encoding": {"name": "default", "separator": "/"}}, + "chunk_key_encoding cannot be used for arrays with zarr_format 2. Use dimension_separator instead.", + ), + ( + {"codecs": "bytes"}, + "codecs cannot be used for arrays with zarr_format 2. Use filters and compressor instead.", + ), ], ) async def test_create_invalid_v2_arguments( @@ -1203,10 +1213,22 @@ async def test_create_invalid_v2_arguments( @pytest.mark.parametrize( ("kwargs", "error_msg"), [ - ({"chunk_shape": (1,), "chunks": (2,)}, "Only one of chunk_shape or chunks can be provided."), - ({"dimension_separator": "/"}, "dimension_separator cannot be used for arrays with zarr_format 3. Use chunk_key_encoding instead."), - ({"filters": []}, "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead"), - ({"compressor": "blosc"}, "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead"), + ( + {"chunk_shape": (1,), "chunks": (2,)}, + "Only one of chunk_shape or chunks can be provided.", + ), + ( + {"dimension_separator": "/"}, + "dimension_separator cannot be used for arrays with zarr_format 3. Use chunk_key_encoding instead.", + ), + ( + {"filters": []}, + "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead", + ), + ( + {"compressor": "blosc"}, + "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead", + ), ], ) async def test_invalid_v3_arguments( @@ -1214,9 +1236,7 @@ async def test_invalid_v3_arguments( ) -> None: kwargs.setdefault("chunks", (1,)) with pytest.raises(ValueError, match=re.escape(error_msg)): - zarr.create( - store=store, dtype="uint8", shape=(10,), zarr_format=3, **kwargs - ) + zarr.create(store=store, dtype="uint8", shape=(10,), zarr_format=3, **kwargs) @staticmethod @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"]) From c78da0fe830be955db64798cb86020d0db36e537 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 13 May 2025 13:40:17 +0200 Subject: [PATCH 16/17] revert typing --- src/zarr/core/array.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 76f39b6bd3..7ad8128780 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -135,9 +135,7 @@ logger = getLogger(__name__) -def parse_array_metadata( - data: ArrayMetadata | ArrayMetadataDict | dict[str, JSON], -) -> ArrayMetadata: +def parse_array_metadata(data: Any) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data elif isinstance(data, dict): From a0b5f4f30497f039d7b8a2d0b8a0cd58914a1ef9 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 13 May 2025 14:05:45 +0200 Subject: [PATCH 17/17] document changes --- changes/3049.misc.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3049.misc.rst diff --git a/changes/3049.misc.rst b/changes/3049.misc.rst new file mode 100644 index 0000000000..79ecd6ed95 --- /dev/null +++ b/changes/3049.misc.rst @@ -0,0 +1 @@ +Added tests for ``AsyncArray``, ``Array`` and removed duplicate argument parsing. \ No newline at end of file