@@ -147,7 +147,11 @@ def _force_native_endianness(var):
147
147
148
148
149
149
def _extract_nc4_variable_encoding (variable , raise_on_invalid = False ,
150
- lsd_okay = True , backend = 'netCDF4' ):
150
+ lsd_okay = True , backend = 'netCDF4' ,
151
+ unlimited_dims = None ):
152
+ if unlimited_dims is None :
153
+ unlimited_dims = ()
154
+
151
155
encoding = variable .encoding .copy ()
152
156
153
157
safe_to_drop = set (['source' , 'original_shape' ])
@@ -156,10 +160,17 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
156
160
if lsd_okay :
157
161
valid_encodings .add ('least_significant_digit' )
158
162
159
- if (encoding .get ('chunksizes' ) is not None and
160
- (encoding .get ('original_shape' , variable .shape ) !=
161
- variable .shape ) and not raise_on_invalid ):
162
- del encoding ['chunksizes' ]
163
+ if not raise_on_invalid and encoding .get ('chunksizes' ) is not None :
164
+ # It's possible to get encoded chunksizes larger than a dimension size
165
+ # if the original file had an unlimited dimension. This is problematic
166
+ # if the new file no longer has an unlimited dimension.
167
+ chunksizes = encoding ['chunksizes' ]
168
+ chunks_too_big = any (
169
+ c > d and dim not in unlimited_dims
170
+ for c , d , dim in zip (chunksizes , variable .shape , variable .dims ))
171
+ changed_shape = encoding .get ('original_shape' ) != variable .shape
172
+ if chunks_too_big or changed_shape :
173
+ del encoding ['chunksizes' ]
163
174
164
175
for k in safe_to_drop :
165
176
if k in encoding :
@@ -346,7 +357,8 @@ def prepare_variable(self, name, variable, check_encoding=False,
346
357
'NC_CHAR type.' % name )
347
358
348
359
encoding = _extract_nc4_variable_encoding (
349
- variable , raise_on_invalid = check_encoding )
360
+ variable , raise_on_invalid = check_encoding ,
361
+ unlimited_dims = unlimited_dims )
350
362
nc4_var = self .ds .createVariable (
351
363
varname = name ,
352
364
datatype = datatype ,
0 commit comments