diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4973e151736..22e38668f25 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,6 +66,10 @@ Deprecations Bug fixes ~~~~~~~~~ + +- Fix ``open_datatree`` incompatibilities with Zarr-Python V3 and refactor + ``TestZarrDatatreeIO`` accordingly (:issue:`9960`, :pull:`10020`). + By `Alfonso Ladino-Rincon `_. - Default to resolution-dependent optimal integer encoding units when saving chunked non-nanosecond :py:class:`numpy.datetime64` or :py:class:`numpy.timedelta64` arrays to disk. Previously units of @@ -97,6 +101,7 @@ Bug fixes datetimes and timedeltas (:issue:`8957`, :pull:`10050`). By `Kai Mühlbauer `_. + Documentation ~~~~~~~~~~~~~ - Better expose the :py:class:`Coordinates` class in API reference (:pull:`10000`) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index d381897a29d..6c2719544d2 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -666,10 +666,21 @@ def open_store( use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, zarr_format=zarr_format, ) + + from zarr import Group + + group_members: dict[str, Group] = {} group_paths = list(_iter_zarr_groups(zarr_group, parent=group)) - return { + for path in group_paths: + if path == group: + group_members[path] = zarr_group + else: + rel_path = path.removeprefix(f"{group}/") + group_members[path] = zarr_group[rel_path.removeprefix("/")] + + out = { group: cls( - zarr_group.get(group), + group_store, mode, consolidate_on_close, append_dim, @@ -680,8 +691,9 @@ def open_store( use_zarr_fill_value_as_mask, cache_members=cache_members, ) - for group in group_paths + for group, group_store in group_members.items() } + return out @classmethod def open_group( @@ -1034,8 +1046,6 @@ def store( if self._consolidate_on_close: kwargs = {} if _zarr_v3(): - # https://github.com/zarr-developers/zarr-python/pull/2113#issuecomment-2386718323 - kwargs["path"] = self.zarr_group.name.lstrip("/") kwargs["zarr_format"] = self.zarr_group.metadata.zarr_format zarr.consolidate_metadata(self.zarr_group.store, **kwargs) @@ -1662,8 +1672,6 @@ def open_groups_as_dict( zarr_version=None, zarr_format=None, ) -> dict[str, Dataset]: - from xarray.core.treenode import NodePath - filename_or_obj = _normalize_path(filename_or_obj) # Check for a group and make it a parent if it exists @@ -1686,7 +1694,6 @@ def open_groups_as_dict( ) groups_dict = {} - for path_group, store in stores.items(): store_entrypoint = StoreBackendEntrypoint() @@ -1762,44 +1769,57 @@ def _get_open_params( consolidated = False if _zarr_v3(): - missing_exc = ValueError + # TODO: replace AssertionError after https://github.com/zarr-developers/zarr-python/issues/2821 is resolved + missing_exc = AssertionError else: missing_exc = zarr.errors.GroupNotFoundError - if consolidated is None: - try: - zarr_group = zarr.open_consolidated(store, **open_kwargs) - except (ValueError, KeyError): - # ValueError in zarr-python 3.x, KeyError in 2.x. + if consolidated in [None, True]: + # open the root of the store, in case there is metadata consolidated there + group = open_kwargs.pop("path") + + if consolidated: + # TODO: an option to pass the metadata_key keyword + zarr_root_group = zarr.open_consolidated(store, **open_kwargs) + elif consolidated is None: + # same but with more error handling in case no consolidated metadata found try: - zarr_group = zarr.open_group(store, **open_kwargs) - emit_user_level_warning( - "Failed to open Zarr store with consolidated metadata, " - "but successfully read with non-consolidated metadata. " - "This is typically much slower for opening a dataset. " - "To silence this warning, consider:\n" - "1. Consolidating metadata in this existing store with " - "zarr.consolidate_metadata().\n" - "2. Explicitly setting consolidated=False, to avoid trying " - "to read consolidate metadata, or\n" - "3. Explicitly setting consolidated=True, to raise an " - "error in this case instead of falling back to try " - "reading non-consolidated metadata.", - RuntimeWarning, - ) - except missing_exc as err: - raise FileNotFoundError( - f"No such file or directory: '{store}'" - ) from err - elif consolidated: - # TODO: an option to pass the metadata_key keyword - zarr_group = zarr.open_consolidated(store, **open_kwargs) + zarr_root_group = zarr.open_consolidated(store, **open_kwargs) + except (ValueError, KeyError): + # ValueError in zarr-python 3.x, KeyError in 2.x. + try: + zarr_root_group = zarr.open_group(store, **open_kwargs) + emit_user_level_warning( + "Failed to open Zarr store with consolidated metadata, " + "but successfully read with non-consolidated metadata. " + "This is typically much slower for opening a dataset. " + "To silence this warning, consider:\n" + "1. Consolidating metadata in this existing store with " + "zarr.consolidate_metadata().\n" + "2. Explicitly setting consolidated=False, to avoid trying " + "to read consolidate metadata, or\n" + "3. Explicitly setting consolidated=True, to raise an " + "error in this case instead of falling back to try " + "reading non-consolidated metadata.", + RuntimeWarning, + ) + except missing_exc as err: + raise FileNotFoundError( + f"No such file or directory: '{store}'" + ) from err + + # but the user should still receive a DataTree whose root is the group they asked for + if group and group != "/": + zarr_group = zarr_root_group[group.removeprefix("/")] + else: + zarr_group = zarr_root_group else: if _zarr_v3(): # we have determined that we don't want to use consolidated metadata # so we set that to False to avoid trying to read it open_kwargs["use_consolidated"] = False zarr_group = zarr.open_group(store, **open_kwargs) + close_store_on_close = zarr_group.store is not store # we use this to determine how to handle fill_value diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 4fe91bd9a12..7dacc6de6a6 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -16,7 +16,6 @@ TYPE_CHECKING, Any, Concatenate, - Literal, NoReturn, ParamSpec, TypeVar, @@ -1741,7 +1740,7 @@ def to_zarr( consolidated: bool = True, group: str | None = None, write_inherited_coords: bool = False, - compute: Literal[True] = True, + compute: bool = True, **kwargs, ): """ diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 6bf244c868a..31024d72e60 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -165,6 +165,21 @@ def _importorskip( has_array_api_strict, requires_array_api_strict = _importorskip("array_api_strict") +parametrize_zarr_format = pytest.mark.parametrize( + "zarr_format", + [ + pytest.param(2, id="zarr_format=2"), + pytest.param( + 3, + marks=pytest.mark.skipif( + not has_zarr_v3, + reason="zarr-python v2 cannot understand the zarr v3 format", + ), + id="zarr_format=3", + ), + ], +) + def _importorskip_h5netcdf_ros3(has_h5netcdf: bool): if not has_h5netcdf: diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 60e65bf05e1..579d1f7314b 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -1,8 +1,9 @@ from __future__ import annotations import re -from collections.abc import Hashable -from typing import TYPE_CHECKING, cast +from collections.abc import Callable, Generator, Hashable +from pathlib import Path +from typing import TYPE_CHECKING, Literal, cast import numpy as np import pytest @@ -12,6 +13,8 @@ from xarray.core.datatree import DataTree from xarray.testing import assert_equal, assert_identical from xarray.tests import ( + has_zarr_v3, + parametrize_zarr_format, requires_dask, requires_h5netcdf, requires_netCDF4, @@ -26,8 +29,6 @@ except ImportError: pass -have_zarr_v3 = xr.backends.zarr._zarr_v3() - def diff_chunks( comparison: dict[tuple[str, Hashable], bool], tree1: DataTree, tree2: DataTree @@ -116,7 +117,13 @@ def unaligned_datatree_nc(tmp_path_factory): @pytest.fixture(scope="module") -def unaligned_datatree_zarr(tmp_path_factory): +def unaligned_datatree_zarr_factory( + tmp_path_factory, +) -> Generator[ + Callable[[Literal[2, 3]], Path], + None, + None, +]: """Creates a zarr store with the following unaligned group hierarchy: Group: / │ Dimensions: (y: 3, x: 2) @@ -141,15 +148,40 @@ def unaligned_datatree_zarr(tmp_path_factory): a (y) int64 16B ... b (x) float64 16B ... """ - filepath = tmp_path_factory.mktemp("data") / "unaligned_simple_datatree.zarr" - root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) - set1_data = xr.Dataset({"a": 0, "b": 1}) - set2_data = xr.Dataset({"a": ("y", [2, 3]), "b": ("x", [0.1, 0.2])}) - root_data.to_zarr(filepath) - set1_data.to_zarr(filepath, group="/Group1", mode="a") - set2_data.to_zarr(filepath, group="/Group2", mode="a") - set1_data.to_zarr(filepath, group="/Group1/subgroup1", mode="a") - yield filepath + + def _unaligned_datatree_zarr(zarr_format: Literal[2, 3]) -> Path: + filepath = tmp_path_factory.mktemp("data") / "unaligned_simple_datatree.zarr" + root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) + set1_data = xr.Dataset({"a": 0, "b": 1}) + set2_data = xr.Dataset({"a": ("y", [2, 3]), "b": ("x", [0.1, 0.2])}) + + root_data.to_zarr( + filepath, + mode="w", + zarr_format=zarr_format, + ) + set1_data.to_zarr( + filepath, + group="/Group1", + mode="a", + zarr_format=zarr_format, + ) + set2_data.to_zarr( + filepath, + group="/Group2", + mode="a", + zarr_format=zarr_format, + ) + set1_data.to_zarr( + filepath, + group="/Group1/subgroup1", + mode="a", + zarr_format=zarr_format, + ) + + return filepath + + yield _unaligned_datatree_zarr class DatatreeIOBase: @@ -414,57 +446,67 @@ def test_phony_dims_warning(self, tmpdir) -> None: } -@pytest.mark.skipif( - have_zarr_v3, reason="datatree support for zarr 3 is not implemented yet" -) @requires_zarr +@parametrize_zarr_format class TestZarrDatatreeIO: engine = "zarr" - def test_to_zarr(self, tmpdir, simple_datatree): - filepath = tmpdir / "test.zarr" + def test_to_zarr(self, tmpdir, simple_datatree, zarr_format): + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree - original_dt.to_zarr(filepath) + original_dt.to_zarr(filepath, zarr_format=zarr_format) with open_datatree(filepath, engine="zarr") as roundtrip_dt: assert_equal(original_dt, roundtrip_dt) - def test_zarr_encoding(self, tmpdir, simple_datatree): - from numcodecs.blosc import Blosc - - filepath = tmpdir / "test.zarr" + def test_zarr_encoding(self, tmpdir, simple_datatree, zarr_format): + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree - comp = {"compressor": Blosc(cname="zstd", clevel=3, shuffle=2)} + if zarr_format == 2: + from numcodecs.blosc import Blosc + + codec = Blosc(cname="zstd", clevel=3, shuffle=2) + comp = {"compressors": (codec,)} if has_zarr_v3 else {"compressor": codec} + elif zarr_format == 3: + # specifying codecs in zarr_format=3 requires importing from zarr 3 namespace + import numcodecs.zarr3 + + comp = {"compressors": (numcodecs.zarr3.Blosc(cname="zstd", clevel=3),)} + enc = {"/set2": {var: comp for var in original_dt["/set2"].dataset.data_vars}} - original_dt.to_zarr(filepath, encoding=enc) + original_dt.to_zarr(filepath, encoding=enc, zarr_format=zarr_format) with open_datatree(filepath, engine="zarr") as roundtrip_dt: - print(roundtrip_dt["/set2/a"].encoding) - assert roundtrip_dt["/set2/a"].encoding["compressor"] == comp["compressor"] + compressor_key = "compressors" if has_zarr_v3 else "compressor" + assert ( + roundtrip_dt["/set2/a"].encoding[compressor_key] == comp[compressor_key] + ) enc["/not/a/group"] = {"foo": "bar"} # type: ignore[dict-item] with pytest.raises(ValueError, match="unexpected encoding group.*"): - original_dt.to_zarr(filepath, encoding=enc, engine="zarr") + original_dt.to_zarr( + filepath, encoding=enc, engine="zarr", zarr_format=zarr_format + ) - def test_to_zarr_zip_store(self, tmpdir, simple_datatree): + def test_to_zarr_zip_store(self, tmpdir, simple_datatree, zarr_format): from zarr.storage import ZipStore - filepath = tmpdir / "test.zarr.zip" + filepath = str(tmpdir / "test.zarr.zip") original_dt = simple_datatree - store = ZipStore(filepath) - original_dt.to_zarr(store) + store = ZipStore(filepath, mode="w") + original_dt.to_zarr(store, zarr_format=zarr_format) with open_datatree(store, engine="zarr") as roundtrip_dt: # type: ignore[arg-type, unused-ignore] assert_equal(original_dt, roundtrip_dt) - def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree): + def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree, zarr_format): filepath = tmpdir / "test.zarr" zmetadata = filepath / ".zmetadata" s1zmetadata = filepath / "set1" / ".zmetadata" filepath = str(filepath) # casting to str avoids a pathlib bug in xarray original_dt = simple_datatree - original_dt.to_zarr(filepath, consolidated=False) + original_dt.to_zarr(filepath, consolidated=False, zarr_format=zarr_format) assert not zmetadata.exists() assert not s1zmetadata.exists() @@ -472,54 +514,119 @@ def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree): with open_datatree(filepath, engine="zarr") as roundtrip_dt: assert_equal(original_dt, roundtrip_dt) - def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree): + def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree, zarr_format): + simple_datatree.to_zarr(str(tmpdir), zarr_format=zarr_format) + import zarr - simple_datatree.to_zarr(tmpdir) + # expected exception type changed in zarr-python v2->v3, see https://github.com/zarr-developers/zarr-python/issues/2821 + expected_exception_type = ( + FileExistsError if has_zarr_v3 else zarr.errors.ContainsGroupError + ) # with default settings, to_zarr should not overwrite an existing dir - with pytest.raises(zarr.errors.ContainsGroupError): - simple_datatree.to_zarr(tmpdir) + with pytest.raises(expected_exception_type): + simple_datatree.to_zarr(str(tmpdir)) @requires_dask - def test_to_zarr_compute_false(self, tmpdir, simple_datatree): + def test_to_zarr_compute_false( + self, tmp_path: Path, simple_datatree: DataTree, zarr_format: Literal[2, 3] + ): import dask.array as da - filepath = tmpdir / "test.zarr" + storepath = tmp_path / "test.zarr" original_dt = simple_datatree.chunk() - original_dt.to_zarr(filepath, compute=False) + original_dt.to_zarr(str(storepath), compute=False, zarr_format=zarr_format) + + def assert_expected_zarr_files_exist( + arr_dir: Path, + chunks_expected: bool, + is_scalar: bool, + zarr_format: Literal[2, 3], + ) -> None: + """For one zarr array, check that all expected metadata and chunk data files exist.""" + # TODO: This function is now so complicated that it's practically checking compliance with the whole zarr spec... + # TODO: Perhaps it would be better to instead trust that zarr-python is spec-compliant and check `DataTree` against zarr-python? + # TODO: The way to do that would ideally be to use zarr-pythons ability to determine how many chunks have been initialized. + + if zarr_format == 2: + zarray_file, zattrs_file = (arr_dir / ".zarray"), (arr_dir / ".zattrs") + + assert zarray_file.exists() and zarray_file.is_file() + assert zattrs_file.exists() and zattrs_file.is_file() + + chunk_file = arr_dir / "0" + if chunks_expected: + # assumes empty chunks were written + # (i.e. they did not contain only fill_value and write_empty_chunks was False) + assert chunk_file.exists() and chunk_file.is_file() + else: + # either dask array or array of all fill_values + assert not chunk_file.exists() + elif zarr_format == 3: + metadata_file = arr_dir / "zarr.json" + assert metadata_file.exists() and metadata_file.is_file() + + chunks_dir = arr_dir / "c" + chunk_file = chunks_dir / "0" + if chunks_expected: + # assumes empty chunks were written + # (i.e. they did not contain only fill_value and write_empty_chunks was False) + if is_scalar: + # this is the expected behaviour for storing scalars in zarr 3, see https://github.com/pydata/xarray/issues/10147 + assert chunks_dir.exists() and chunks_dir.is_file() + else: + assert chunks_dir.exists() and chunks_dir.is_dir() + assert chunk_file.exists() and chunk_file.is_file() + else: + assert not chunks_dir.exists() + assert not chunk_file.exists() + + DEFAULT_ZARR_FILL_VALUE = 0 + # The default value of write_empty_chunks changed from True->False in zarr-python v2->v3 + WRITE_EMPTY_CHUNKS_DEFAULT = not has_zarr_v3 for node in original_dt.subtree: - for name, variable in node.dataset.variables.items(): - var_dir = filepath / node.path / name - var_files = var_dir.listdir() - assert var_dir / ".zarray" in var_files - assert var_dir / ".zattrs" in var_files - if isinstance(variable.data, da.Array): - assert var_dir / "0" not in var_files - else: - assert var_dir / "0" in var_files + # inherited variables aren't meant to be written to zarr + local_node_variables = node.to_dataset(inherit=False).variables + for name, var in local_node_variables.items(): + var_dir = storepath / node.path.removeprefix("/") / name + + assert_expected_zarr_files_exist( + arr_dir=var_dir, + # don't expect dask.Arrays to be written to disk, as compute=False + # also don't expect numpy arrays containing only zarr's fill_value to be written to disk + chunks_expected=( + not isinstance(var.data, da.Array) + and ( + var.data != DEFAULT_ZARR_FILL_VALUE + or WRITE_EMPTY_CHUNKS_DEFAULT + ) + ), + is_scalar=not bool(var.dims), + zarr_format=zarr_format, + ) - def test_to_zarr_inherited_coords(self, tmpdir): + def test_to_zarr_inherited_coords(self, tmpdir, zarr_format): original_dt = DataTree.from_dict( { "/": xr.Dataset({"a": (("x",), [1, 2])}, coords={"x": [3, 4]}), "/sub": xr.Dataset({"b": (("x",), [5, 6])}), } ) - filepath = tmpdir / "test.zarr" - original_dt.to_zarr(filepath) + filepath = str(tmpdir / "test.zarr") + original_dt.to_zarr(filepath, zarr_format=zarr_format) with open_datatree(filepath, engine="zarr") as roundtrip_dt: assert_equal(original_dt, roundtrip_dt) subtree = cast(DataTree, roundtrip_dt["/sub"]) assert "x" not in subtree.to_dataset(inherit=False).coords - def test_open_groups_round_trip(self, tmpdir, simple_datatree) -> None: + def test_open_groups_round_trip(self, tmpdir, simple_datatree, zarr_format) -> None: """Test `open_groups` opens a zarr store with the `simple_datatree` structure.""" - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") original_dt = simple_datatree - original_dt.to_zarr(filepath) + original_dt.to_zarr(filepath, zarr_format=zarr_format) roundtrip_dict = open_groups(filepath, engine="zarr") roundtrip_dt = DataTree.from_dict(roundtrip_dict) @@ -530,19 +637,24 @@ def test_open_groups_round_trip(self, tmpdir, simple_datatree) -> None: for ds in roundtrip_dict.values(): ds.close() - def test_open_datatree(self, unaligned_datatree_zarr) -> None: + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) + def test_open_datatree(self, unaligned_datatree_zarr_factory, zarr_format) -> None: """Test if `open_datatree` fails to open a zarr store with an unaligned group hierarchy.""" + storepath = unaligned_datatree_zarr_factory(zarr_format=zarr_format) + with pytest.raises( ValueError, match=( re.escape("group '/Group2' is not aligned with its parents:") + ".*" ), ): - open_datatree(unaligned_datatree_zarr, engine="zarr") + open_datatree(storepath, engine="zarr") @requires_dask - def test_open_datatree_chunks(self, tmpdir, simple_datatree) -> None: - filepath = tmpdir / "test.zarr" + def test_open_datatree_chunks(self, tmpdir, zarr_format) -> None: + filepath = str(tmpdir / "test.zarr") chunks = {"x": 2, "y": 1} @@ -556,7 +668,7 @@ def test_open_datatree_chunks(self, tmpdir, simple_datatree) -> None: "/group2": set2_data.chunk(chunks), } ) - original_tree.to_zarr(filepath) + original_tree.to_zarr(filepath, zarr_format=zarr_format) with open_datatree(filepath, engine="zarr", chunks=chunks) as tree: xr.testing.assert_identical(tree, original_tree) @@ -567,42 +679,52 @@ def test_open_datatree_chunks(self, tmpdir, simple_datatree) -> None: # from each node. xr.testing.assert_identical(tree.compute(), original_tree) - def test_open_groups(self, unaligned_datatree_zarr) -> None: + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) + def test_open_groups(self, unaligned_datatree_zarr_factory, zarr_format) -> None: """Test `open_groups` with a zarr store of an unaligned group hierarchy.""" - unaligned_dict_of_datasets = open_groups(unaligned_datatree_zarr, engine="zarr") + storepath = unaligned_datatree_zarr_factory(zarr_format=zarr_format) + unaligned_dict_of_datasets = open_groups(storepath, engine="zarr") assert "/" in unaligned_dict_of_datasets.keys() assert "/Group1" in unaligned_dict_of_datasets.keys() assert "/Group1/subgroup1" in unaligned_dict_of_datasets.keys() assert "/Group2" in unaligned_dict_of_datasets.keys() # Check that group name returns the correct datasets - with xr.open_dataset( - unaligned_datatree_zarr, group="/", engine="zarr" - ) as expected: + with xr.open_dataset(storepath, group="/", engine="zarr") as expected: assert_identical(unaligned_dict_of_datasets["/"], expected) - with xr.open_dataset( - unaligned_datatree_zarr, group="Group1", engine="zarr" - ) as expected: + with xr.open_dataset(storepath, group="Group1", engine="zarr") as expected: assert_identical(unaligned_dict_of_datasets["/Group1"], expected) with xr.open_dataset( - unaligned_datatree_zarr, group="/Group1/subgroup1", engine="zarr" + storepath, group="/Group1/subgroup1", engine="zarr" ) as expected: assert_identical(unaligned_dict_of_datasets["/Group1/subgroup1"], expected) - with xr.open_dataset( - unaligned_datatree_zarr, group="/Group2", engine="zarr" - ) as expected: + with xr.open_dataset(storepath, group="/Group2", engine="zarr") as expected: assert_identical(unaligned_dict_of_datasets["/Group2"], expected) for ds in unaligned_dict_of_datasets.values(): ds.close() - def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None: + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) + @pytest.mark.parametrize("write_consolidated_metadata", [True, False, None]) + def test_open_datatree_specific_group( + self, + tmpdir, + simple_datatree, + write_consolidated_metadata, + zarr_format, + ) -> None: """Test opening a specific group within a Zarr store using `open_datatree`.""" - filepath = tmpdir / "test.zarr" + filepath = str(tmpdir / "test.zarr") group = "/set2" original_dt = simple_datatree - original_dt.to_zarr(filepath) + original_dt.to_zarr( + filepath, consolidated=write_consolidated_metadata, zarr_format=zarr_format + ) expected_subtree = original_dt[group].copy() expected_subtree.orphan() with open_datatree(filepath, group=group, engine=self.engine) as subgroup_tree: @@ -610,14 +732,11 @@ def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None: assert_equal(subgroup_tree, expected_subtree) @requires_dask - def test_open_groups_chunks(self, tmpdir) -> None: + def test_open_groups_chunks(self, tmpdir, zarr_format) -> None: """Test `open_groups` with chunks on a zarr store.""" chunks = {"x": 2, "y": 1} - filepath = tmpdir / "test.zarr" - - chunks = {"x": 2, "y": 1} - + filepath = str(tmpdir / "test.zarr") root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])}) set1_data = xr.Dataset({"a": ("y", [-1, 0, 1]), "b": ("x", [-10, 6])}) set2_data = xr.Dataset({"a": ("y", [1, 2, 3]), "b": ("x", [0.1, 0.2])}) @@ -628,7 +747,7 @@ def test_open_groups_chunks(self, tmpdir) -> None: "/group2": set2_data.chunk(chunks), } ) - original_tree.to_zarr(filepath, mode="w") + original_tree.to_zarr(filepath, mode="w", zarr_format=zarr_format) dict_of_datasets = open_groups(filepath, engine="zarr", chunks=chunks) @@ -640,7 +759,7 @@ def test_open_groups_chunks(self, tmpdir) -> None: for ds in dict_of_datasets.values(): ds.close() - def test_write_subgroup(self, tmpdir): + def test_write_subgroup(self, tmpdir, zarr_format): original_dt = DataTree.from_dict( { "/": xr.Dataset(coords={"x": [1, 2, 3]}), @@ -651,14 +770,17 @@ def test_write_subgroup(self, tmpdir): expected_dt = original_dt.copy() expected_dt.name = None - filepath = tmpdir / "test.zarr" - original_dt.to_zarr(filepath) + filepath = str(tmpdir / "test.zarr") + original_dt.to_zarr(filepath, zarr_format=zarr_format) with open_datatree(filepath, engine="zarr") as roundtrip_dt: assert_equal(original_dt, roundtrip_dt) assert_identical(expected_dt, roundtrip_dt) - def test_write_inherited_coords_false(self, tmpdir): + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) + def test_write_inherited_coords_false(self, tmpdir, zarr_format): original_dt = DataTree.from_dict( { "/": xr.Dataset(coords={"x": [1, 2, 3]}), @@ -666,8 +788,10 @@ def test_write_inherited_coords_false(self, tmpdir): } ) - filepath = tmpdir / "test.zarr" - original_dt.to_zarr(filepath, write_inherited_coords=False) + filepath = str(tmpdir / "test.zarr") + original_dt.to_zarr( + filepath, write_inherited_coords=False, zarr_format=zarr_format + ) with open_datatree(filepath, engine="zarr") as roundtrip_dt: assert_identical(original_dt, roundtrip_dt) @@ -677,7 +801,10 @@ def test_write_inherited_coords_false(self, tmpdir): with open_datatree(filepath, group="child", engine="zarr") as roundtrip_child: assert_identical(expected_child, roundtrip_child) - def test_write_inherited_coords_true(self, tmpdir): + @pytest.mark.filterwarnings( + "ignore:Failed to open Zarr store with consolidated metadata:RuntimeWarning" + ) + def test_write_inherited_coords_true(self, tmpdir, zarr_format): original_dt = DataTree.from_dict( { "/": xr.Dataset(coords={"x": [1, 2, 3]}), @@ -685,8 +812,10 @@ def test_write_inherited_coords_true(self, tmpdir): } ) - filepath = tmpdir / "test.zarr" - original_dt.to_zarr(filepath, write_inherited_coords=True) + filepath = str(tmpdir / "test.zarr") + original_dt.to_zarr( + filepath, write_inherited_coords=True, zarr_format=zarr_format + ) with open_datatree(filepath, engine="zarr") as roundtrip_dt: assert_identical(original_dt, roundtrip_dt)