Skip to content

Commit 263ec98

Browse files
author
Joe Hamman
committed
Merge branch 'master' of github.com:pydata/xarray into feature/interpolate
2 parents 42d63ef + bea202d commit 263ec98

File tree

11 files changed

+119
-22
lines changed

11 files changed

+119
-22
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ install:
8989
- python xarray/util/print_versions.py
9090

9191
script:
92+
- python -OO -c "import xarray"
9293
- py.test xarray --cov=xarray --cov-config ci/.coveragerc --cov-report term-missing --verbose $EXTRA_FLAGS
9394
- git diff upstream/master **/*py | flake8 --diff --exit-zero || true
9495

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ Exceptions
490490
:toctree: generated/
491491

492492
MergeError
493+
SerializationWarning
493494

494495
Advanced API
495496
============

doc/whats-new.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,29 @@ Bug fixes
4343
``NumpyIndexingAdapter``. (:issue:`1694`)
4444
By `Keisuke Fujii <https://github.com/fujiisoup>`_
4545

46+
- Fix importing xarray when running Python with ``-OO`` (:issue:`1706`).
47+
By `Stephan Hoyer <https://github.com/shoyer>`_.
48+
49+
- Saving a netCDF file with a coordinates with a spaces in its names now raises
50+
an appropriate warning (:issue:`1689`).
51+
By `Stephan Hoyer <https://github.com/shoyer>`_.
52+
4653
- Fix two bugs that were preventing dask arrays from being specified as
4754
coordinates in the DataArray constructor (:issue:`1684`).
4855
By `Joe Hamman <https://github.com/jhamman>`_
4956

5057
- Fixed ``apply_ufunc`` with ``dask='parallelized'`` for scalar arguments
5158
(:issue:`1697`).
59+
- Fix "Chunksize cannot exceed dimension size" error when writing netCDF4 files
60+
loaded from disk (:issue:`1225`).
61+
By `Stephan Hoyer <https://github.com/shoyer>`_.
62+
63+
- Validate the shape of coordinates with names matching dimensions in the
64+
DataArray constructor (:issue:`1709`).
65+
By `Stephan Hoyer <https://github.com/shoyer>`_.
66+
67+
- Raise ``NotImplementedError`` when attempting to save a MultiIndex to a
68+
netCDF file (:issue:`1547`).
5269
By `Stephan Hoyer <https://github.com/shoyer>`_.
5370

5471
Testing

xarray/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
save_mfdataset)
2020
from .backends.rasterio_ import open_rasterio
2121

22-
from .conventions import decode_cf
22+
from .conventions import decode_cf, SerializationWarning
2323

2424
try:
2525
from .version import version as __version__

xarray/backends/netCDF4_.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,11 @@ def _force_native_endianness(var):
147147

148148

149149
def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
150-
lsd_okay=True, backend='netCDF4'):
150+
lsd_okay=True, backend='netCDF4',
151+
unlimited_dims=None):
152+
if unlimited_dims is None:
153+
unlimited_dims = ()
154+
151155
encoding = variable.encoding.copy()
152156

153157
safe_to_drop = set(['source', 'original_shape'])
@@ -156,10 +160,17 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
156160
if lsd_okay:
157161
valid_encodings.add('least_significant_digit')
158162

159-
if (encoding.get('chunksizes') is not None and
160-
(encoding.get('original_shape', variable.shape) !=
161-
variable.shape) and not raise_on_invalid):
162-
del encoding['chunksizes']
163+
if not raise_on_invalid and encoding.get('chunksizes') is not None:
164+
# It's possible to get encoded chunksizes larger than a dimension size
165+
# if the original file had an unlimited dimension. This is problematic
166+
# if the new file no longer has an unlimited dimension.
167+
chunksizes = encoding['chunksizes']
168+
chunks_too_big = any(
169+
c > d and dim not in unlimited_dims
170+
for c, d, dim in zip(chunksizes, variable.shape, variable.dims))
171+
changed_shape = encoding.get('original_shape') != variable.shape
172+
if chunks_too_big or changed_shape:
173+
del encoding['chunksizes']
163174

164175
for k in safe_to_drop:
165176
if k in encoding:
@@ -346,7 +357,8 @@ def prepare_variable(self, name, variable, check_encoding=False,
346357
'NC_CHAR type.' % name)
347358

348359
encoding = _extract_nc4_variable_encoding(
349-
variable, raise_on_invalid=check_encoding)
360+
variable, raise_on_invalid=check_encoding,
361+
unlimited_dims=unlimited_dims)
350362
nc4_var = self.ds.createVariable(
351363
varname=name,
352364
datatype=datatype,

xarray/conventions.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from .core import duck_array_ops, indexing, ops, utils
1919
from .core.formatting import format_timestamp, first_n_items, last_item
20-
from .core.variable import as_variable, Variable
20+
from .core.variable import as_variable, IndexVariable, Variable
2121
from .core.pycompat import iteritems, OrderedDict, PY3, basestring
2222

2323

@@ -32,6 +32,10 @@
3232
'D': 1e9 * 60 * 60 * 24}
3333

3434

35+
class SerializationWarning(RuntimeWarning):
36+
"""Warnings about encoding/decoding issues in serialization."""
37+
38+
3539
def mask_and_scale(array, fill_value=None, scale_factor=None, add_offset=None,
3640
dtype=float):
3741
"""Scale and mask array values according to CF conventions for packed and
@@ -113,15 +117,15 @@ def _decode_datetime_with_netcdf4(num_dates, units, calendar):
113117
warnings.warn('Unable to decode time axis into full '
114118
'numpy.datetime64 objects, continuing using dummy '
115119
'netCDF4.datetime objects instead, reason: dates out'
116-
' of range', RuntimeWarning, stacklevel=3)
120+
' of range', SerializationWarning, stacklevel=3)
117121
else:
118122
try:
119123
dates = nctime_to_nptime(dates)
120124
except ValueError as e:
121125
warnings.warn('Unable to decode time axis into full '
122126
'numpy.datetime64 objects, continuing using '
123127
'dummy netCDF4.datetime objects instead, reason:'
124-
'{0}'.format(e), RuntimeWarning, stacklevel=3)
128+
'{0}'.format(e), SerializationWarning, stacklevel=3)
125129
return dates
126130

127131

@@ -773,7 +777,7 @@ def maybe_encode_nonstring_dtype(var, name=None):
773777
warnings.warn('saving variable %s with floating '
774778
'point data as an integer dtype without '
775779
'any _FillValue to use for NaNs' % name,
776-
RuntimeWarning, stacklevel=3)
780+
SerializationWarning, stacklevel=3)
777781
data = duck_array_ops.around(data)[...]
778782
if encoding.get('_Unsigned', False):
779783
signed_dtype = np.dtype('i%s' % dtype.itemsize)
@@ -828,6 +832,15 @@ def _infer_dtype(array, name=None):
828832
def ensure_dtype_not_object(var, name=None):
829833
# TODO: move this from conventions to backends? (it's not CF related)
830834
if var.dtype.kind == 'O':
835+
if (isinstance(var, IndexVariable) and
836+
isinstance(var.to_index(), pd.MultiIndex)):
837+
raise NotImplementedError(
838+
'variable {!r} is a MultiIndex, which cannot yet be '
839+
'serialized to netCDF files '
840+
'(https://github.com/pydata/xarray/issues/1077). Use '
841+
'reset_index() to convert MultiIndex levels into coordinate '
842+
'variables instead.'.format(name))
843+
831844
dims, data, attrs, encoding = _var_as_tuple(var)
832845
missing = pd.isnull(data)
833846
if missing.any():
@@ -951,7 +964,7 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
951964
else:
952965
warnings.warn("variable %r has _Unsigned attribute but is not "
953966
"of integer type. Ignoring attribute." % name,
954-
RuntimeWarning, stacklevel=3)
967+
SerializationWarning, stacklevel=3)
955968

956969
if mask_and_scale:
957970
if 'missing_value' in attributes:
@@ -975,7 +988,7 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
975988
warnings.warn("variable {!r} has multiple fill values {}, "
976989
"decoding all values to NaN."
977990
.format(name, fill_value),
978-
RuntimeWarning, stacklevel=3)
991+
SerializationWarning, stacklevel=3)
979992

980993
scale_factor = pop_to(attributes, encoding, 'scale_factor')
981994
add_offset = pop_to(attributes, encoding, 'add_offset')
@@ -1185,6 +1198,16 @@ def cf_decoder(variables, attributes,
11851198
def _encode_coordinates(variables, attributes, non_dim_coord_names):
11861199
# calculate global and variable specific coordinates
11871200
non_dim_coord_names = set(non_dim_coord_names)
1201+
1202+
for name in list(non_dim_coord_names):
1203+
if isinstance(name, basestring) and ' ' in name:
1204+
warnings.warn(
1205+
'coordinate {!r} has a space in its name, which means it '
1206+
'cannot be marked as a coordinate on disk and will be '
1207+
'saved as a data variable instead'.format(name),
1208+
SerializationWarning, stacklevel=6)
1209+
non_dim_coord_names.discard(name)
1210+
11881211
global_coordinates = non_dim_coord_names.copy()
11891212
variable_coordinates = defaultdict(set)
11901213
for coord_name in non_dim_coord_names:

xarray/core/dataarray.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def _infer_coords_and_dims(shape, coords, dims):
3535
"""All the logic for creating a new DataArray"""
3636

3737
if (coords is not None and not utils.is_dict_like(coords) and
38-
len(coords) != len(shape)):
38+
len(coords) != len(shape)):
3939
raise ValueError('coords is not dict-like, but it has %s items, '
4040
'which does not match the %s dimensions of the '
4141
'data' % (len(coords), len(shape)))
@@ -50,8 +50,8 @@ def _infer_coords_and_dims(shape, coords, dims):
5050
if utils.is_dict_like(coords):
5151
# deprecated in GH993, removed in GH1539
5252
raise ValueError('inferring DataArray dimensions from '
53-
'dictionary like ``coords`` has been '
54-
'deprecated. Use an explicit list of '
53+
'dictionary like ``coords`` is no longer '
54+
'supported. Use an explicit list of '
5555
'``dims`` instead.')
5656
for n, (dim, coord) in enumerate(zip(dims, coords)):
5757
coord = as_variable(coord,
@@ -87,6 +87,12 @@ def _infer_coords_and_dims(shape, coords, dims):
8787
'length %s on the data but length %s on '
8888
'coordinate %r' % (d, sizes[d], s, k))
8989

90+
if k in sizes and v.shape != (sizes[k],):
91+
raise ValueError('coordinate %r is a DataArray dimension, but '
92+
'it has shape %r rather than expected shape %r '
93+
'matching the dimension size'
94+
% (k, v.shape, (sizes[k],)))
95+
9096
assert_unique_multiindex_level_names(new_coords)
9197

9298
return new_coords, dims

xarray/core/variable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def as_variable(obj, name=None):
9696
'{}'.format(obj))
9797
elif utils.is_scalar(obj):
9898
obj = Variable([], obj)
99-
elif (isinstance(obj, (pd.Index, IndexVariable)) and obj.name is not None):
99+
elif isinstance(obj, (pd.Index, IndexVariable)) and obj.name is not None:
100100
obj = Variable(obj.name, obj)
101101
elif name is not None:
102102
data = as_compatible_data(obj)

xarray/plot/plot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def _plot2d(plotfunc):
402402
"""
403403

404404
# Build on the original docstring
405-
plotfunc.__doc__ = '\n'.join((plotfunc.__doc__, commondoc))
405+
plotfunc.__doc__ = '%s\n%s' % (plotfunc.__doc__, commondoc)
406406

407407
@functools.wraps(plotfunc)
408408
def newplotfunc(darray, x=None, y=None, figsize=None, size=None,

xarray/tests/test_backends.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -376,9 +376,17 @@ def test_roundtrip_coordinates(self):
376376
with self.roundtrip(original) as actual:
377377
self.assertDatasetIdentical(original, actual)
378378

379-
expected = original.drop('foo')
380-
with self.roundtrip(expected) as actual:
381-
self.assertDatasetIdentical(expected, actual)
379+
def test_roundtrip_global_coordinates(self):
380+
original = Dataset({'x': [2, 3], 'y': ('a', [42]), 'z': ('x', [4, 5])})
381+
with self.roundtrip(original) as actual:
382+
self.assertDatasetIdentical(original, actual)
383+
384+
def test_roundtrip_coordinates_with_space(self):
385+
original = Dataset(coords={'x': 0, 'y z': 1})
386+
expected = Dataset({'y z': 1}, {'x': 0})
387+
with pytest.warns(xr.SerializationWarning):
388+
with self.roundtrip(original) as actual:
389+
self.assertDatasetIdentical(expected, actual)
382390

383391
def test_roundtrip_boolean_dtype(self):
384392
original = create_boolean_data()
@@ -711,6 +719,13 @@ def test_append_overwrite_values(self):
711719
def test_vectorized_indexing(self):
712720
self._test_vectorized_indexing(vindex_support=False)
713721

722+
def test_multiindex_not_implemented(self):
723+
ds = (Dataset(coords={'y': ('x', [1, 2]), 'z': ('x', ['a', 'b'])})
724+
.set_index(x=['y', 'z']))
725+
with raises_regex(NotImplementedError, 'MultiIndex'):
726+
with self.roundtrip(ds):
727+
pass
728+
714729

715730
_counter = itertools.count()
716731

@@ -909,6 +924,21 @@ def test_compression_encoding(self):
909924
with self.roundtrip(expected) as actual:
910925
self.assertDatasetEqual(expected, actual)
911926

927+
def test_encoding_chunksizes_unlimited(self):
928+
# regression test for GH1225
929+
ds = Dataset({'x': [1, 2, 3], 'y': ('x', [2, 3, 4])})
930+
ds.variables['x'].encoding = {
931+
'zlib': False,
932+
'shuffle': False,
933+
'complevel': 0,
934+
'fletcher32': False,
935+
'contiguous': False,
936+
'chunksizes': (2 ** 20,),
937+
'original_shape': (3,),
938+
}
939+
with self.roundtrip(ds) as actual:
940+
self.assertDatasetEqual(ds, actual)
941+
912942
def test_mask_and_scale(self):
913943
with create_tmp_file() as tmp_file:
914944
with nc4.Dataset(tmp_file, mode='w') as nc:
@@ -1230,6 +1260,7 @@ def test_encoding_unlimited_dims(self):
12301260
save_kwargs=dict(unlimited_dims=['y'])) as actual:
12311261
self.assertEqual(actual.encoding['unlimited_dims'], set('y'))
12321262
self.assertDatasetEqual(ds, actual)
1263+
12331264
ds.encoding = {'unlimited_dims': ['y']}
12341265
with self.roundtrip(ds) as actual:
12351266
self.assertEqual(actual.encoding['unlimited_dims'], set('y'))
@@ -1506,8 +1537,11 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={},
15061537
allow_cleanup_failure=False):
15071538
yield data.chunk()
15081539

1540+
# Override methods in DatasetIOTestCases - not applicable to dask
15091541
def test_roundtrip_string_encoded_characters(self):
1510-
# Override method in DatasetIOTestCases - not applicable to dask
1542+
pass
1543+
1544+
def test_roundtrip_coordinates_with_space(self):
15111545
pass
15121546

15131547
def test_roundtrip_datetime_data(self):

0 commit comments

Comments
 (0)