diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 17ff3ab661c..728e40d4409 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,8 +59,13 @@ Bug fixes ``dask.threaded.get``. By `Matthew Rocklin `_. - Bug fixes in :py:meth:`DataArray.plot.imshow`: all-NaN arrays and arrays with size one in some dimension can now be plotted, which is good for - exploring satellite imagery. (:issue:`1780`) + exploring satellite imagery (:issue:`1780`). By `Zac Hatfield-Dodds `_. +- The ``variables``, ``attrs``, and ``dimensions`` properties have been + deprecated as part of a bug fix addressing an issue where backends were + unintentionally loading the datastores data and attributes repeatedly during + writes (:issue:`1798`). + By `Joe Hamman `_. .. _whats-new.0.10.0: diff --git a/xarray/backends/common.py b/xarray/backends/common.py index fd408877f87..83753ced8f5 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -7,7 +7,7 @@ import traceback import contextlib from collections import Mapping -from distutils.version import LooseVersion +import warnings from ..conventions import cf_encoder from ..core import indexing @@ -133,24 +133,25 @@ def load(self): @property def variables(self): - # Because encoding/decoding might happen which may require both the - # attributes and the variables, and because a store may be updated - # we need to load both the attributes and variables - # anytime either one is requested. + warnings.warn('The ``variables`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) variables, _ = self.load() return variables @property def attrs(self): - # Because encoding/decoding might happen which may require both the - # attributes and the variables, and because a store may be updated - # we need to load both the attributes and variables - # anytime either one is requested. - _, attributes = self.load() - return attributes + warnings.warn('The ``attrs`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) + _, attrs = self.load() + return attrs @property def dimensions(self): + warnings.warn('The ``dimensions`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) return self.get_dimensions() def close(self): @@ -183,11 +184,7 @@ def add(self, source, target): def sync(self): if self.sources: import dask.array as da - import dask - if LooseVersion(dask.__version__) > LooseVersion('0.8.1'): - da.store(self.sources, self.targets, lock=self.lock) - else: - da.store(self.sources, self.targets) + da.store(self.sources, self.targets, lock=self.lock) self.sources = [] self.targets = [] @@ -232,19 +229,17 @@ def set_variables(self, variables, check_encoding_set, for vn, v in iteritems(variables): name = _encode_variable_name(vn) check = vn in check_encoding_set - if vn not in self.variables: - target, source = self.prepare_variable( - name, v, check, unlimited_dims=unlimited_dims) - else: - target, source = self.ds.variables[name], v.data + target, source = self.prepare_variable( + name, v, check, unlimited_dims=unlimited_dims) self.writer.add(source, target) def set_necessary_dimensions(self, variable, unlimited_dims=None): if unlimited_dims is None: unlimited_dims = set() + dims = self.get_dimensions() for d, l in zip(variable.dims, variable.shape): - if d not in self.dimensions: + if d not in dims: is_unlimited = d in unlimited_dims self.set_dimension(d, l, is_unlimited) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index b4d2dc7e689..82abaade06a 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -156,8 +156,11 @@ def prepare_variable(self, name, variable, check_encoding=False, 'chunksizes', 'fletcher32']: if key in encoding: kwargs[key] = encoding[key] - nc4_var = self.ds.createVariable(name, dtype, variable.dims, - fill_value=fill_value, **kwargs) + if name not in self.ds.variables: + nc4_var = self.ds.createVariable(name, dtype, variable.dims, + fill_value=fill_value, **kwargs) + else: + nc4_var = self.ds.variables[name] for k, v in iteritems(attrs): nc4_var.setncattr(k, v) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 59e195b1c9a..d8aa33f35dc 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -352,20 +352,24 @@ def prepare_variable(self, name, variable, check_encoding=False, encoding = _extract_nc4_variable_encoding( variable, raise_on_invalid=check_encoding, unlimited_dims=unlimited_dims) - nc4_var = self.ds.createVariable( - varname=name, - datatype=datatype, - dimensions=variable.dims, - zlib=encoding.get('zlib', False), - complevel=encoding.get('complevel', 4), - shuffle=encoding.get('shuffle', True), - fletcher32=encoding.get('fletcher32', False), - contiguous=encoding.get('contiguous', False), - chunksizes=encoding.get('chunksizes'), - endian='native', - least_significant_digit=encoding.get('least_significant_digit'), - fill_value=fill_value) - _disable_auto_decode_variable(nc4_var) + if name in self.ds.variables: + nc4_var = self.ds.variables[name] + else: + nc4_var = self.ds.createVariable( + varname=name, + datatype=datatype, + dimensions=variable.dims, + zlib=encoding.get('zlib', False), + complevel=encoding.get('complevel', 4), + shuffle=encoding.get('shuffle', True), + fletcher32=encoding.get('fletcher32', False), + contiguous=encoding.get('contiguous', False), + chunksizes=encoding.get('chunksizes'), + endian='native', + least_significant_digit=encoding.get( + 'least_significant_digit'), + fill_value=fill_value) + _disable_auto_decode_variable(nc4_var) for k, v in iteritems(attrs): # set attributes one-by-one since netCDF4<1.0.10 can't handle diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 7194e06186f..7aa054bc119 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -6,7 +6,6 @@ import numpy as np from .. import conventions, Variable -from ..core import duck_array_ops from ..core.pycompat import basestring, unicode_type, OrderedDict diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 240b8f2ebaa..0994d8510b8 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -165,7 +165,7 @@ def get_encoding(self): def set_dimension(self, name, length, is_unlimited=False): with self.ensure_open(autoclose=False): - if name in self.dimensions: + if name in self.ds.dimensions: raise ValueError('%s does not support modifying dimensions' % type(self).__name__) dim_length = length if not is_unlimited else None @@ -196,7 +196,8 @@ def prepare_variable(self, name, variable, check_encoding=False, # nb. this still creates a numpy array in all memory, even though we # don't write the data yet; scipy.io.netcdf does not not support # incremental writes. - self.ds.createVariable(name, data.dtype, variable.dims) + if name not in self.ds.variables: + self.ds.createVariable(name, data.dtype, variable.dims) scipy_var = self.ds.variables[name] for k, v in iteritems(variable.attrs): self._validate_attr_key(k) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 779d8d07886..30ea51811c4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -378,8 +378,11 @@ def prepare_variable(self, name, variable, check_encoding=False, # compressor='default', fill_value=0, order='C', store=None, # synchronizer=None, overwrite=False, path=None, chunk_store=None, # filters=None, cache_metadata=True, **kwargs) - zarr_array = self.ds.create(name, shape=shape, dtype=dtype, - fill_value=fill_value, **encoding) + if name in self.ds: + zarr_array = self.ds[name] + else: + zarr_array = self.ds.create(name, shape=shape, dtype=dtype, + fill_value=fill_value, **encoding) # decided not to explicity enumerate encoding options because we # risk overriding zarr's defaults (e.g. if we specificy # cache_metadata=None instead of True). Alternative is to have lots of diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 385ab2066cf..e3bead51a94 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,7 +5,6 @@ from collections import defaultdict import functools import itertools -from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -1392,9 +1391,6 @@ def quantile(self, q, dim=None, interpolation='linear'): raise TypeError("quantile does not work for arrays stored as dask " "arrays. Load the data via .compute() or .load() " "prior to calling this method.") - if LooseVersion(np.__version__) < LooseVersion('1.10.0'): - raise NotImplementedError( - 'quantile requres numpy version 1.10.0 or later') q = np.asarray(q, dtype=np.float64)