Skip to content

Commit bea202d

Browse files
authored
Fix "Chunksize cannot exceed dimension size" (#1707)
* Fix "Chunksize cannot exceed dimension size" Fixes GH1225 * Fix chunksizes not iterable * Only remove big chunks if dimension is not unlimited
1 parent f83ad9e commit bea202d

File tree

3 files changed

+36
-6
lines changed

3 files changed

+36
-6
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ Bug fixes
5656

5757
- Fixed ``apply_ufunc`` with ``dask='parallelized'`` for scalar arguments
5858
(:issue:`1697`).
59+
- Fix "Chunksize cannot exceed dimension size" error when writing netCDF4 files
60+
loaded from disk (:issue:`1225`).
5961
By `Stephan Hoyer <https://github.com/shoyer>`_.
6062

6163
- Validate the shape of coordinates with names matching dimensions in the

xarray/backends/netCDF4_.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,11 @@ def _force_native_endianness(var):
147147

148148

149149
def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
150-
lsd_okay=True, backend='netCDF4'):
150+
lsd_okay=True, backend='netCDF4',
151+
unlimited_dims=None):
152+
if unlimited_dims is None:
153+
unlimited_dims = ()
154+
151155
encoding = variable.encoding.copy()
152156

153157
safe_to_drop = set(['source', 'original_shape'])
@@ -156,10 +160,17 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
156160
if lsd_okay:
157161
valid_encodings.add('least_significant_digit')
158162

159-
if (encoding.get('chunksizes') is not None and
160-
(encoding.get('original_shape', variable.shape) !=
161-
variable.shape) and not raise_on_invalid):
162-
del encoding['chunksizes']
163+
if not raise_on_invalid and encoding.get('chunksizes') is not None:
164+
# It's possible to get encoded chunksizes larger than a dimension size
165+
# if the original file had an unlimited dimension. This is problematic
166+
# if the new file no longer has an unlimited dimension.
167+
chunksizes = encoding['chunksizes']
168+
chunks_too_big = any(
169+
c > d and dim not in unlimited_dims
170+
for c, d, dim in zip(chunksizes, variable.shape, variable.dims))
171+
changed_shape = encoding.get('original_shape') != variable.shape
172+
if chunks_too_big or changed_shape:
173+
del encoding['chunksizes']
163174

164175
for k in safe_to_drop:
165176
if k in encoding:
@@ -346,7 +357,8 @@ def prepare_variable(self, name, variable, check_encoding=False,
346357
'NC_CHAR type.' % name)
347358

348359
encoding = _extract_nc4_variable_encoding(
349-
variable, raise_on_invalid=check_encoding)
360+
variable, raise_on_invalid=check_encoding,
361+
unlimited_dims=unlimited_dims)
350362
nc4_var = self.ds.createVariable(
351363
varname=name,
352364
datatype=datatype,

xarray/tests/test_backends.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -924,6 +924,21 @@ def test_compression_encoding(self):
924924
with self.roundtrip(expected) as actual:
925925
self.assertDatasetEqual(expected, actual)
926926

927+
def test_encoding_chunksizes_unlimited(self):
928+
# regression test for GH1225
929+
ds = Dataset({'x': [1, 2, 3], 'y': ('x', [2, 3, 4])})
930+
ds.variables['x'].encoding = {
931+
'zlib': False,
932+
'shuffle': False,
933+
'complevel': 0,
934+
'fletcher32': False,
935+
'contiguous': False,
936+
'chunksizes': (2 ** 20,),
937+
'original_shape': (3,),
938+
}
939+
with self.roundtrip(ds) as actual:
940+
self.assertDatasetEqual(ds, actual)
941+
927942
def test_mask_and_scale(self):
928943
with create_tmp_file() as tmp_file:
929944
with nc4.Dataset(tmp_file, mode='w') as nc:
@@ -1245,6 +1260,7 @@ def test_encoding_unlimited_dims(self):
12451260
save_kwargs=dict(unlimited_dims=['y'])) as actual:
12461261
self.assertEqual(actual.encoding['unlimited_dims'], set('y'))
12471262
self.assertDatasetEqual(ds, actual)
1263+
12481264
ds.encoding = {'unlimited_dims': ['y']}
12491265
with self.roundtrip(ds) as actual:
12501266
self.assertEqual(actual.encoding['unlimited_dims'], set('y'))

0 commit comments

Comments
 (0)