Skip to content

Commit 63a3bc7

Browse files
committed
Merge branch 'master' into yohai-ds_scatter
* master: pyupgrade one-off run (pydata#3190) mfdataset, concat now support the 'join' kwarg. (pydata#3102) reduce the size of example dataset in dask docs (pydata#3187) add climpred to related-projects (pydata#3188) bump rasterio to 1.0.24 in doc building environment (pydata#3186) More annotations (pydata#3177) Support for __array_function__ implementers (sparse arrays) [WIP] (pydata#3117) Internal clean-up of isnull() to avoid relying on pandas (pydata#3132) Call darray.compute() in plot() (pydata#3183) BUG: fix + test open_mfdataset fails on variable attributes with list… (pydata#3181)
2 parents a805c59 + 8a9c471 commit 63a3bc7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1488
-317
lines changed

ci/requirements/py37.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies:
2121
- pip
2222
- scipy
2323
- seaborn
24+
- sparse
2425
- toolz
2526
- rasterio
2627
- boto3

doc/dask.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ argument to :py:func:`~xarray.open_dataset` or using the
5858
np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3)
5959
6060
ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'),
61-
np.random.randn(365, 180, 360)),
62-
'time': pd.date_range('2015-01-01', periods=365),
63-
'longitude': np.arange(360),
61+
np.random.randn(30, 180, 180)),
62+
'time': pd.date_range('2015-01-01', periods=30),
63+
'longitude': np.arange(180),
6464
'latitude': np.arange(89.5, -90.5, -1)})
6565
ds.to_netcdf('example-data.nc')
6666

doc/environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dependencies:
1212
- ipython=7.2.0
1313
- netCDF4=1.4.2
1414
- cartopy=0.17.0
15-
- rasterio=1.0.13
15+
- rasterio=1.0.24
1616
- zarr=2.2.0
1717
- iris=2.2.0
1818
- flake8=3.6.0

doc/related-projects.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Geosciences
1111
~~~~~~~~~~~
1212

1313
- `aospy <https://aospy.readthedocs.io>`_: Automated analysis and management of gridded climate data.
14+
- `climpred <https://climpred.readthedocs.io>`_: Analysis of ensemble forecast models for climate prediction.
1415
- `infinite-diff <https://github.com/spencerahill/infinite-diff>`_: xarray-based finite-differencing, focused on gridded climate/meterology data
1516
- `marc_analysis <https://github.com/darothen/marc_analysis>`_: Analysis package for CESM/MARC experiments and output.
1617
- `MetPy <https://unidata.github.io/MetPy/dev/index.html>`_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data.

doc/whats-new.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ New functions/methods
4444
Enhancements
4545
~~~~~~~~~~~~
4646

47+
- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg.
48+
It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian <https://github.com/dcherian>`_.
4749
- In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
4850
``append_dim`` is set, as it will automatically be set to ``'a'`` internally.
4951
By `David Brochart <https://github.com/davidbrochart>`_.
@@ -65,8 +67,12 @@ Bug fixes
6567
By `Tom Nicholas <http://github.com/TomNicholas>`_.
6668
- Fixed crash when applying ``distributed.Client.compute()`` to a DataArray
6769
(:issue:`3171`). By `Guido Imperiale <https://github.com/crusaderky>`_.
68-
69-
70+
- Better error message when using groupby on an empty DataArray (:issue:`3037`).
71+
By `Hasan Ahmad <https://github.com/HasanAhmadQ7>`_.
72+
- Fix error that arises when using open_mfdataset on a series of netcdf files
73+
having differing values for a variable attribute of type list. (:issue:`3034`)
74+
By `Hasan Ahmad <https://github.com/HasanAhmadQ7>`_.
75+
7076
.. _whats-new.0.12.3:
7177

7278
v0.12.3 (10 July 2019)
@@ -107,8 +113,6 @@ Bug fixes
107113
- Fix HDF5 error that could arise when reading multiple groups from a file at
108114
once (:issue:`2954`).
109115
By `Stephan Hoyer <https://github.com/shoyer>`_.
110-
- Better error message when using groupby on an empty DataArray (:issue:`3037`).
111-
By `Hasan Ahmad <https://github.com/HasanAhmadQ7>`_.
112116

113117
.. _whats-new.0.12.2:
114118

properties/test_encode_decode.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
These ones pass, just as you'd hope!
55
66
"""
7-
from __future__ import absolute_import, division, print_function
8-
97
import hypothesis.extra.numpy as npst
108
import hypothesis.strategies as st
119
from hypothesis import given, settings

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ ignore_missing_imports = True
8383
ignore_missing_imports = True
8484
[mypy-seaborn.*]
8585
ignore_missing_imports = True
86+
[mypy-sparse.*]
87+
ignore_missing_imports = True
8688
[mypy-toolz.*]
8789
ignore_missing_imports = True
8890
[mypy-zarr.*]

versioneer.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
398398
stderr=(subprocess.PIPE if hide_stderr
399399
else None))
400400
break
401-
except EnvironmentError:
401+
except OSError:
402402
e = sys.exc_info()[1]
403403
if e.errno == errno.ENOENT:
404404
continue
@@ -421,7 +421,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
421421
return stdout, p.returncode
422422

423423

424-
LONG_VERSION_PY['git'] = '''
424+
LONG_VERSION_PY['git'] = r'''
425425
# This file helps to compute a version number in source trees obtained from
426426
# git-archive tarball (such as those provided by githubs download-from-tag
427427
# feature). Distribution tarballs (built by setup.py sdist) and build
@@ -968,7 +968,7 @@ def git_get_keywords(versionfile_abs):
968968
if mo:
969969
keywords["date"] = mo.group(1)
970970
f.close()
971-
except EnvironmentError:
971+
except OSError:
972972
pass
973973
return keywords
974974

@@ -992,11 +992,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
992992
if verbose:
993993
print("keywords are unexpanded, not using")
994994
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
995-
refs = set([r.strip() for r in refnames.strip("()").split(",")])
995+
refs = {r.strip() for r in refnames.strip("()").split(",")}
996996
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
997997
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
998998
TAG = "tag: "
999-
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
999+
tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
10001000
if not tags:
10011001
# Either we're using git < 1.8.3, or there really are no tags. We use
10021002
# a heuristic: assume all version tags have a digit. The old git %d
@@ -1005,7 +1005,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
10051005
# between branches and tags. By ignoring refnames without digits, we
10061006
# filter out many common branch names like "release" and
10071007
# "stabilization", as well as "HEAD" and "master".
1008-
tags = set([r for r in refs if re.search(r'\d', r)])
1008+
tags = {r for r in refs if re.search(r'\d', r)}
10091009
if verbose:
10101010
print("discarding '%s', no digits" % ",".join(refs - tags))
10111011
if verbose:
@@ -1148,7 +1148,7 @@ def do_vcs_install(manifest_in, versionfile_source, ipy):
11481148
if "export-subst" in line.strip().split()[1:]:
11491149
present = True
11501150
f.close()
1151-
except EnvironmentError:
1151+
except OSError:
11521152
pass
11531153
if not present:
11541154
f = open(".gitattributes", "a+")
@@ -1206,7 +1206,7 @@ def versions_from_file(filename):
12061206
try:
12071207
with open(filename) as f:
12081208
contents = f.read()
1209-
except EnvironmentError:
1209+
except OSError:
12101210
raise NotThisMethod("unable to read _version.py")
12111211
mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON",
12121212
contents, re.M | re.S)
@@ -1702,8 +1702,7 @@ def do_setup():
17021702
root = get_root()
17031703
try:
17041704
cfg = get_config_from_root(root)
1705-
except (EnvironmentError, configparser.NoSectionError,
1706-
configparser.NoOptionError) as e:
1705+
except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e:
17071706
if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
17081707
print("Adding sample versioneer config to setup.cfg",
17091708
file=sys.stderr)
@@ -1728,7 +1727,7 @@ def do_setup():
17281727
try:
17291728
with open(ipy, "r") as f:
17301729
old = f.read()
1731-
except EnvironmentError:
1730+
except OSError:
17321731
old = ""
17331732
if INIT_PY_SNIPPET not in old:
17341733
print(" appending to %s" % ipy)
@@ -1752,7 +1751,7 @@ def do_setup():
17521751
if line.startswith("include "):
17531752
for include in line.split()[1:]:
17541753
simple_includes.add(include)
1755-
except EnvironmentError:
1754+
except OSError:
17561755
pass
17571756
# That doesn't cover everything MANIFEST.in can do
17581757
# (http://docs.python.org/2/distutils/sourcedist.html#commands), so

xarray/_version.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
8181
stderr=(subprocess.PIPE if hide_stderr
8282
else None))
8383
break
84-
except EnvironmentError:
84+
except OSError:
8585
e = sys.exc_info()[1]
8686
if e.errno == errno.ENOENT:
8787
continue
@@ -153,7 +153,7 @@ def git_get_keywords(versionfile_abs):
153153
if mo:
154154
keywords["date"] = mo.group(1)
155155
f.close()
156-
except EnvironmentError:
156+
except OSError:
157157
pass
158158
return keywords
159159

@@ -177,11 +177,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
177177
if verbose:
178178
print("keywords are unexpanded, not using")
179179
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180-
refs = set([r.strip() for r in refnames.strip("()").split(",")])
180+
refs = {r.strip() for r in refnames.strip("()").split(",")}
181181
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182182
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
183183
TAG = "tag: "
184-
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
184+
tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
185185
if not tags:
186186
# Either we're using git < 1.8.3, or there really are no tags. We use
187187
# a heuristic: assume all version tags have a digit. The old git %d
@@ -190,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
190190
# between branches and tags. By ignoring refnames without digits, we
191191
# filter out many common branch names like "release" and
192192
# "stabilization", as well as "HEAD" and "master".
193-
tags = set([r for r in refs if re.search(r'\d', r)])
193+
tags = {r for r in refs if re.search(r'\d', r)}
194194
if verbose:
195195
print("discarding '%s', no digits" % ",".join(refs - tags))
196196
if verbose:

xarray/backends/api.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
609609
compat='no_conflicts', preprocess=None, engine=None,
610610
lock=None, data_vars='all', coords='different',
611611
combine='_old_auto', autoclose=None, parallel=False,
612-
**kwargs):
612+
join='outer', **kwargs):
613613
"""Open multiple files as a single dataset.
614614
615615
If combine='by_coords' then the function ``combine_by_coords`` is used to
@@ -704,6 +704,16 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
704704
parallel : bool, optional
705705
If True, the open and preprocess steps of this function will be
706706
performed in parallel using ``dask.delayed``. Default is False.
707+
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
708+
String indicating how to combine differing indexes
709+
(excluding concat_dim) in objects
710+
711+
- 'outer': use the union of object indexes
712+
- 'inner': use the intersection of object indexes
713+
- 'left': use indexes from the first object with each dimension
714+
- 'right': use indexes from the last object with each dimension
715+
- 'exact': instead of aligning, raise `ValueError` when indexes to be
716+
aligned are not equal
707717
**kwargs : optional
708718
Additional arguments passed on to :py:func:`xarray.open_dataset`.
709719
@@ -742,7 +752,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
742752
paths = [str(p) if isinstance(p, Path) else p for p in paths]
743753

744754
if not paths:
745-
raise IOError('no files to open')
755+
raise OSError('no files to open')
746756

747757
# If combine='by_coords' then this is unnecessary, but quick.
748758
# If combine='nested' then this creates a flat list which is easier to
@@ -798,18 +808,20 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
798808

799809
combined = auto_combine(datasets, concat_dim=concat_dim,
800810
compat=compat, data_vars=data_vars,
801-
coords=coords, from_openmfds=True)
811+
coords=coords, join=join,
812+
from_openmfds=True)
802813
elif combine == 'nested':
803814
# Combined nested list by successive concat and merge operations
804815
# along each dimension, using structure given by "ids"
805816
combined = _nested_combine(datasets, concat_dims=concat_dim,
806817
compat=compat, data_vars=data_vars,
807-
coords=coords, ids=ids)
818+
coords=coords, ids=ids, join=join)
808819
elif combine == 'by_coords':
809820
# Redo ordering from coordinates, ignoring how they were ordered
810821
# previously
811822
combined = combine_by_coords(datasets, compat=compat,
812-
data_vars=data_vars, coords=coords)
823+
data_vars=data_vars, coords=coords,
824+
join=join)
813825
else:
814826
raise ValueError("{} is an invalid option for the keyword argument"
815827
" ``combine``".format(combine))
@@ -1039,7 +1051,7 @@ def save_mfdataset(datasets, paths, mode='w', format=None, groups=None,
10391051
if groups is None:
10401052
groups = [None] * len(datasets)
10411053

1042-
if len(set([len(datasets), len(paths), len(groups)])) > 1:
1054+
if len({len(datasets), len(paths), len(groups)}) > 1:
10431055
raise ValueError('must supply lists of the same length for the '
10441056
'datasets, paths and groups arguments to '
10451057
'save_mfdataset')

xarray/backends/netCDF4_.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def _netcdf4_create_group(dataset, name):
138138

139139

140140
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
141-
if group in set([None, '', '/']):
141+
if group in {None, '', '/'}:
142142
# use the root group
143143
return ds
144144
else:
@@ -155,7 +155,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
155155
ds = create_group(ds, key)
156156
else:
157157
# wrap error to provide slightly more helpful message
158-
raise IOError('group not found: %s' % key, e)
158+
raise OSError('group not found: %s' % key, e)
159159
return ds
160160

161161

@@ -195,9 +195,11 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False,
195195

196196
encoding = variable.encoding.copy()
197197

198-
safe_to_drop = set(['source', 'original_shape'])
199-
valid_encodings = set(['zlib', 'complevel', 'fletcher32', 'contiguous',
200-
'chunksizes', 'shuffle', '_FillValue', 'dtype'])
198+
safe_to_drop = {'source', 'original_shape'}
199+
valid_encodings = {
200+
'zlib', 'complevel', 'fletcher32', 'contiguous',
201+
'chunksizes', 'shuffle', '_FillValue', 'dtype'
202+
}
201203
if lsd_okay:
202204
valid_encodings.add('least_significant_digit')
203205
if h5py_okay:

xarray/backends/netcdf3.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111

1212
# The following are reserved names in CDL and may not be used as names of
1313
# variables, dimension, attributes
14-
_reserved_names = set(['byte', 'char', 'short', 'ushort', 'int', 'uint',
15-
'int64', 'uint64', 'float' 'real', 'double', 'bool',
16-
'string'])
14+
_reserved_names = {
15+
'byte', 'char', 'short', 'ushort', 'int', 'uint', 'int64', 'uint64',
16+
'float' 'real', 'double', 'bool', 'string'
17+
}
1718

1819
# These data-types aren't supported by netCDF3, so they are automatically
1920
# coerced instead as indicated by the "coerce_nc3_dtype" function
@@ -108,4 +109,4 @@ def is_valid_nc3_name(s):
108109
('/' not in s) and
109110
(s[-1] != ' ') and
110111
(_isalnumMUTF8(s[0]) or (s[0] == '_')) and
111-
all((_isalnumMUTF8(c) or c in _specialchars for c in s)))
112+
all(_isalnumMUTF8(c) or c in _specialchars for c in s))

xarray/backends/pseudonetcdf_.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,18 @@ def get_variables(self):
7575
for k, v in self.ds.variables.items())
7676

7777
def get_attrs(self):
78-
return Frozen(dict([(k, getattr(self.ds, k))
79-
for k in self.ds.ncattrs()]))
78+
return Frozen({k: getattr(self.ds, k) for k in self.ds.ncattrs()})
8079

8180
def get_dimensions(self):
8281
return Frozen(self.ds.dimensions)
8382

8483
def get_encoding(self):
85-
encoding = {}
86-
encoding['unlimited_dims'] = set(
87-
[k for k in self.ds.dimensions
88-
if self.ds.dimensions[k].isunlimited()])
89-
return encoding
84+
return {
85+
'unlimited_dims': {
86+
k for k in self.ds.dimensions
87+
if self.ds.dimensions[k].isunlimited()
88+
}
89+
}
9090

9191
def close(self):
9292
self._manager.close()

xarray/backends/pynio_.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,12 @@ def get_dimensions(self):
7575
return Frozen(self.ds.dimensions)
7676

7777
def get_encoding(self):
78-
encoding = {}
79-
encoding['unlimited_dims'] = set(
80-
[k for k in self.ds.dimensions if self.ds.unlimited(k)])
81-
return encoding
78+
return {
79+
'unlimited_dims': {
80+
k for k in self.ds.dimensions
81+
if self.ds.unlimited(k)
82+
}
83+
}
8284

8385
def close(self):
8486
self._manager.close()

0 commit comments

Comments
 (0)