Skip to content

Commit 9235548

Browse files
authored
Handle empty containers in zarr chunk checks (#5526)
1 parent 4692c59 commit 9235548

File tree

3 files changed

+20
-5
lines changed

3 files changed

+20
-5
lines changed

doc/whats-new.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Bug fixes
6363
By `Michael Delgado <https://github.com/delgadom>`_.
6464
- `dt.season <https://xarray.pydata.org/en/stable/generated/xarray.DataArray.dt.season.html>`_ can now handle NaN and NaT. (:pull:`5876`).
6565
By `Pierre Loicq <https://github.com/pierreloicq>`_.
66-
66+
- Determination of zarr chunks handles empty lists for encoding chunks or variable chunks that occurs in certain cirumstances (:pull:`5526`). By `Chris Roat <https://github.com/chrisroat>`_.
6767

6868
Documentation
6969
~~~~~~~~~~~~~

xarray/backends/zarr.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ def __getitem__(self, key):
8484

8585
def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
8686
"""
87-
Given encoding chunks (possibly None) and variable chunks (possibly None)
87+
Given encoding chunks (possibly None or []) and variable chunks
88+
(possibly None or []).
8889
"""
8990

9091
# zarr chunk spec:
@@ -93,7 +94,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
9394

9495
# if there are no chunks in encoding and the variable data is a numpy
9596
# array, then we let zarr use its own heuristics to pick the chunks
96-
if var_chunks is None and enc_chunks is None:
97+
if not var_chunks and not enc_chunks:
9798
return None
9899

99100
# if there are no chunks in encoding but there are dask chunks, we try to
@@ -102,7 +103,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
102103
# http://zarr.readthedocs.io/en/latest/spec/v1.html#chunks
103104
# while dask chunks can be variable sized
104105
# http://dask.pydata.org/en/latest/array-design.html#chunks
105-
if var_chunks and enc_chunks is None:
106+
if var_chunks and not enc_chunks:
106107
if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
107108
raise ValueError(
108109
"Zarr requires uniform chunk sizes except for final chunk. "
@@ -145,7 +146,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
145146

146147
# if there are chunks in encoding and the variable data is a numpy array,
147148
# we use the specified chunks
148-
if var_chunks is None:
149+
if not var_chunks:
149150
return enc_chunks_tuple
150151

151152
# the hard case

xarray/tests/test_backends.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,6 +2383,20 @@ def test_open_zarr_use_cftime(self):
23832383
ds_b = xr.open_zarr(store_target, use_cftime=True)
23842384
assert xr.coding.times.contains_cftime_datetimes(ds_b.time)
23852385

2386+
def test_write_read_select_write(self):
2387+
# Test for https://github.com/pydata/xarray/issues/4084
2388+
ds = create_test_data()
2389+
2390+
# NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug.
2391+
with self.create_zarr_target() as initial_store:
2392+
ds.to_zarr(initial_store, mode="w")
2393+
ds1 = xr.open_zarr(initial_store)
2394+
2395+
# Combination of where+squeeze triggers error on write.
2396+
ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3")
2397+
with self.create_zarr_target() as final_store:
2398+
ds_sel.to_zarr(final_store, mode="w")
2399+
23862400

23872401
@requires_zarr
23882402
class TestZarrDictStore(ZarrBase):

0 commit comments

Comments
 (0)