From 6d5446990028cf1ffb357ef5b0add53f8617ae49 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 2 Oct 2024 10:04:56 -0500 Subject: [PATCH 1/4] Ignore extra keys in v2 metadata Ignore unexpected keys in Zarr V2 metadata, to enable reading zarr files written by other systems, which might store additional data in the top level of the `.zgroup` and `.zarray` files` Closes https://github.com/zarr-developers/zarr-python/issues/2296 --- src/zarr/core/group.py | 11 ++++++++++- src/zarr/core/metadata/v2.py | 9 ++++++++- tests/v3/test_array.py | 1 + tests/v3/test_group.py | 12 ++++++++++++ tests/v3/test_metadata/test_v2.py | 26 ++++++++++++++++++++++++++ 5 files changed, 57 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 18caea3fd4..08ac7665bd 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -3,7 +3,7 @@ import asyncio import json import logging -from dataclasses import asdict, dataclass, field, replace +from dataclasses import asdict, dataclass, field, fields, replace from typing import TYPE_CHECKING, Literal, cast, overload import numpy as np @@ -116,6 +116,15 @@ def __init__( @classmethod def from_dict(cls, data: dict[str, Any]) -> GroupMetadata: assert data.pop("node_type", None) in ("group", None) + + zarr_format = data.get("zarr_format") + if zarr_format == 2 or zarr_format is None: + # zarr v2 allowed arbitrary keys here. + # We don't want the GroupMetadata constructor to fail just because someone put an + # extra key in the metadata. + expected = {x.name for x in fields(cls)} + data = {k: v for k, v in data.items() if k in expected} + return cls(**data) def to_dict(self) -> dict[str, Any]: diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index df7f2abaea..698132e3f4 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -13,7 +13,7 @@ from zarr.core.common import JSON, ChunkCoords import json -from dataclasses import dataclass, field, replace +from dataclasses import dataclass, field, fields, replace import numcodecs import numpy as np @@ -140,6 +140,13 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: _data = data.copy() # check that the zarr_format attribute is correct _ = parse_zarr_format(_data.pop("zarr_format")) + + # zarr v2 allowed arbitrary keys here. + # We don't want the ArrayV2Metadata constructor to fail just because someone put an + # extra key in the metadata. + expected = {x.name for x in fields(cls)} + _data = {k: v for k, v in _data.items() if k in expected} + return cls(**_data) def to_dict(self) -> dict[str, JSON]: diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 5778f7e8fa..291d284483 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -6,6 +6,7 @@ import pytest import zarr.api.asynchronous +import zarr.storage from zarr import Array, AsyncArray, Group from zarr.codecs.bytes import BytesCodec from zarr.core.array import chunks_initialized diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 15b9658e43..9d9f486051 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -963,3 +963,15 @@ async def test_open_mutable_mapping(): def test_open_mutable_mapping_sync(): group = zarr.open_group(store={}, mode="w") assert isinstance(group.store_path.store, MemoryStore) + + +class TestGroupMetadata: + def test_from_dict_extra_fields(self): + data = { + "attributes": {"key": "value"}, + "_nczarr_superblock": {"version": "2.0.0"}, + "zarr_format": 2, + } + result = GroupMetadata.from_dict(data) + expected = GroupMetadata(attributes={"key": "value"}, zarr_format=2) + assert result == expected diff --git a/tests/v3/test_metadata/test_v2.py b/tests/v3/test_metadata/test_v2.py index 3ea702eecd..070becfb05 100644 --- a/tests/v3/test_metadata/test_v2.py +++ b/tests/v3/test_metadata/test_v2.py @@ -72,3 +72,29 @@ def test_metadata_to_dict( observed.pop("dimension_separator") assert observed == expected + + +def test_from_dict_extra_fields() -> None: + data = { + "_nczarr_array": {"dimrefs": ["/dim1", "/dim2"], "storage": "chunked"}, + "attributes": {"key": "value"}, + "chunks": [8], + "compressor": None, + "dtype": " Date: Wed, 2 Oct 2024 11:07:41 -0500 Subject: [PATCH 2/4] handle renames --- src/zarr/core/metadata/v2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 698132e3f4..0dc138e5fd 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -145,6 +145,10 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: # We don't want the ArrayV2Metadata constructor to fail just because someone put an # extra key in the metadata. expected = {x.name for x in fields(cls)} + # https://github.com/zarr-developers/zarr-python/issues/2269 + # add some extra + expected |= {"dtype", "chunks"} + _data = {k: v for k, v in _data.items() if k in expected} return cls(**_data) From bf16ef7072bfb85d7d907a7bc9acee941cd36a87 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 2 Oct 2024 11:19:40 -0500 Subject: [PATCH 3/4] fixup --- src/zarr/core/metadata/v2.py | 2 +- tests/v3/test_metadata/test_v2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 0dc138e5fd..19b44ec450 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -146,7 +146,7 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: # extra key in the metadata. expected = {x.name for x in fields(cls)} # https://github.com/zarr-developers/zarr-python/issues/2269 - # add some extra + # handle the renames expected |= {"dtype", "chunks"} _data = {k: v for k, v in _data.items() if k in expected} diff --git a/tests/v3/test_metadata/test_v2.py b/tests/v3/test_metadata/test_v2.py index 070becfb05..7c080d9317 100644 --- a/tests/v3/test_metadata/test_v2.py +++ b/tests/v3/test_metadata/test_v2.py @@ -92,7 +92,7 @@ def test_from_dict_extra_fields() -> None: expected = ArrayV2Metadata( attributes={"key": "value"}, shape=(8,), - data_type="float64", + dtype="float64", chunk_grid=(8,), fill_value=0.0, order="C", From d0c466c84e0b7351cffe4d479b17e4150d223c31 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 2 Oct 2024 11:20:32 -0500 Subject: [PATCH 4/4] fixup --- tests/v3/test_metadata/test_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_metadata/test_v2.py b/tests/v3/test_metadata/test_v2.py index 7c080d9317..4cd222d812 100644 --- a/tests/v3/test_metadata/test_v2.py +++ b/tests/v3/test_metadata/test_v2.py @@ -93,7 +93,7 @@ def test_from_dict_extra_fields() -> None: attributes={"key": "value"}, shape=(8,), dtype="float64", - chunk_grid=(8,), + chunks=(8,), fill_value=0.0, order="C", )