From d62ff4d331123c3096e6cf6a0bfdd2e5f8366484 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 2 Jul 2020 15:53:25 -0600 Subject: [PATCH 01/17] Propagate attrs with unary, binary functions Closes #3490 Closes #4065 Closes #3433 Closes #3595 --- xarray/core/arithmetic.py | 3 ++- xarray/core/computation.py | 29 ++++++++++++++++++++++++----- xarray/core/dataset.py | 7 +++++-- xarray/core/variable.py | 4 +++- xarray/tests/test_dataarray.py | 25 ++++++++++++++++++++++++- 5 files changed, 58 insertions(+), 10 deletions(-) diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 571dfbe70ed..b93e9064115 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -3,7 +3,7 @@ import numpy as np -from .options import OPTIONS +from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .utils import not_implemented @@ -77,6 +77,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): dataset_fill_value=np.nan, kwargs=kwargs, dask="allowed", + keep_attrs=_get_keep_attrs(default=False), ) # this has no runtime function - these are listed so IDEs know these diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 94d4c6b1540..7277e891404 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -223,8 +223,13 @@ def apply_dataarray_vfunc( args, join=join, copy=False, exclude=exclude_dims, raise_on_invalid=False ) - if keep_attrs and hasattr(args[0], "name"): - name = args[0].name + for arg in args: + first_obj = arg + if isinstance(arg, DataArray): + break + + if keep_attrs and hasattr(first_obj, "name"): + name = first_obj.name else: name = result_name(args) result_coords = build_output_coords(args, signature, exclude_dims) @@ -241,6 +246,12 @@ def apply_dataarray_vfunc( (coords,) = result_coords out = DataArray(result_var, coords, name=name, fastpath=True) + if keep_attrs and hasattr(first_obj, "attrs"): + if isinstance(out, tuple): + out = tuple(da._copy_attrs_from(first_obj) for da in out) + else: + out._copy_attrs_from(first_obj) + return out @@ -361,7 +372,10 @@ def apply_dataset_vfunc( """ from .dataset import Dataset - first_obj = args[0] # we'll copy attrs from this in case keep_attrs=True + for arg in args: + first_obj = args + if isinstance(first_obj, Dataset): + break if dataset_join not in _JOINS_WITHOUT_FILL_VALUES and fill_value is _NO_FILL_VALUE: raise TypeError( @@ -554,6 +568,11 @@ def apply_variable_ufunc( """ from .variable import Variable, as_compatible_data + for arg in args: + first_obj = arg + if isinstance(arg, Variable): + break + dim_sizes = unified_dim_sizes( (a for a in args if hasattr(a, "dims")), exclude_dims=exclude_dims ) @@ -639,8 +658,8 @@ def func(*arrays): ) ) - if keep_attrs and isinstance(args[0], Variable): - var.attrs.update(args[0].attrs) + if keep_attrs and isinstance(first_obj, Variable): + var.attrs.update(first_obj.attrs) output.append(var) if signature.num_outputs == 1: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1b0e01914f2..1a0258160c3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4368,12 +4368,15 @@ def map( foo (dim_0, dim_1) float64 0.3751 1.951 1.945 0.2948 0.711 0.3948 bar (x) float64 1.0 2.0 """ + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) variables = { k: maybe_wrap_array(v, func(v, *args, **kwargs)) for k, v in self.data_vars.items() } - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + if keep_attrs: + for k, v in variables.items(): + v._copy_attrs_from(self.data_vars[k]) attrs = self.attrs if keep_attrs else None return type(self)(variables, attrs=attrs) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c505c749557..d4bf4c259c9 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2024,7 +2024,9 @@ def imag(self): return type(self)(self.dims, self.data.imag, self._attrs) def __array_wrap__(self, obj, context=None): - return Variable(self.dims, obj) + keep_attrs = _get_keep_attrs(default=False) + attrs = self._attrs if keep_attrs else {} + return Variable(self.dims, obj, attrs) @staticmethod def _unary_op(f): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index e0da3f1527f..dee3404639a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -9,7 +9,15 @@ import pytest import xarray as xr -from xarray import DataArray, Dataset, IndexVariable, Variable, align, broadcast +from xarray import ( + DataArray, + Dataset, + IndexVariable, + Variable, + align, + broadcast, + set_options, +) from xarray.coding.times import CFDatetimeCoder from xarray.convert import from_cdms2 from xarray.core import dtypes @@ -2458,6 +2466,21 @@ def test_assign_attrs(self): assert_identical(new_actual, expected) assert actual.attrs == {"a": 1, "b": 2} + def test_propagate_attrs(self): + da = DataArray(self.va) + + # test defaults + assert da.clip(0, 1).attrs != da.attrs + assert (np.float64(1.0) * da).attrs != da.attrs + assert np.abs(da).attrs != da.attrs + assert abs(da).attrs != da.attrs + + with set_options(keep_attrs=True): + assert da.clip(0, 1).attrs == da.attrs + assert (np.float64(1.0) * da).attrs == da.attrs + assert np.abs(da).attrs == da.attrs + assert abs(da).attrs == da.attrs + def test_fillna(self): a = DataArray([np.nan, 1, np.nan, 3], coords={"x": range(4)}, dims="x") actual = a.fillna(-1) From a5d7c0958f3c8da0a7f46a36eb93482bb9458a68 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 6 Jul 2020 14:12:53 -0600 Subject: [PATCH 02/17] Un xfail test --- xarray/tests/test_weighted.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 1bf685cc95d..531e5ab5846 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -320,7 +320,6 @@ def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): assert not result.attrs -@pytest.mark.xfail(reason="xr.Dataset.map does not copy attrs of DataArrays GH: 3595") @pytest.mark.parametrize("operation", ("sum", "mean")) def test_weighted_operations_keep_attr_da_in_ds(operation): # GH #3595 From 548b158b381b429d3850dce37e16477675f1eac1 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 25 Jul 2020 16:56:48 -0600 Subject: [PATCH 03/17] bugfix --- xarray/core/computation.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 7277e891404..b2a5dae13e6 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -40,6 +40,12 @@ _JOINS_WITHOUT_FILL_VALUES = frozenset({"inner", "exact"}) +def _first_of_type(args, kind): + for arg in args: + if isinstance(arg, kind): + return arg + + class _UFuncSignature: """Core dimensions signature for a given function. @@ -223,10 +229,7 @@ def apply_dataarray_vfunc( args, join=join, copy=False, exclude=exclude_dims, raise_on_invalid=False ) - for arg in args: - first_obj = arg - if isinstance(arg, DataArray): - break + first_obj = _first_of_type(args, DataArray) if keep_attrs and hasattr(first_obj, "name"): name = first_obj.name @@ -372,10 +375,7 @@ def apply_dataset_vfunc( """ from .dataset import Dataset - for arg in args: - first_obj = args - if isinstance(first_obj, Dataset): - break + first_obj = _first_of_type(args, Dataset) if dataset_join not in _JOINS_WITHOUT_FILL_VALUES and fill_value is _NO_FILL_VALUE: raise TypeError( @@ -568,10 +568,7 @@ def apply_variable_ufunc( """ from .variable import Variable, as_compatible_data - for arg in args: - first_obj = arg - if isinstance(arg, Variable): - break + first_obj = _first_of_type(args, Variable) dim_sizes = unified_dim_sizes( (a for a in args if hasattr(a, "dims")), exclude_dims=exclude_dims From ad75394bc4813dc52af0aabf1bb690309d9602d7 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 25 Jul 2020 17:23:21 -0600 Subject: [PATCH 04/17] Some progress. Still need keep_attrs in DataArray._unary_op --- xarray/core/arithmetic.py | 2 +- xarray/core/dataset.py | 5 ++++- xarray/tests/test_dataarray.py | 15 ++++++++------- xarray/tests/test_dataset.py | 17 +++++++++++++++++ 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index b93e9064115..8eba0fe7919 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -77,7 +77,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): dataset_fill_value=np.nan, kwargs=kwargs, dask="allowed", - keep_attrs=_get_keep_attrs(default=False), + keep_attrs=_get_keep_attrs(default=True), ) # this has no runtime function - these are listed so IDEs know these diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1a0258160c3..216833ccce0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4853,7 +4853,10 @@ def from_dict(cls, d): return obj @staticmethod - def _unary_op(f, keep_attrs=False): + def _unary_op(f, keep_attrs=None): + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + @functools.wraps(f) def func(self, *args, **kwargs): variables = {} diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index dee3404639a..a5af4a921d9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2470,16 +2470,17 @@ def test_propagate_attrs(self): da = DataArray(self.va) # test defaults + assert (np.float64(1.0) * da).attrs == da.attrs + assert np.abs(da).attrs == da.attrs + # not sure about the next two assert da.clip(0, 1).attrs != da.attrs - assert (np.float64(1.0) * da).attrs != da.attrs - assert np.abs(da).attrs != da.attrs assert abs(da).attrs != da.attrs - with set_options(keep_attrs=True): - assert da.clip(0, 1).attrs == da.attrs - assert (np.float64(1.0) * da).attrs == da.attrs - assert np.abs(da).attrs == da.attrs - assert abs(da).attrs == da.attrs + with set_options(keep_attrs=False): + assert da.clip(0, 1).attrs != da.attrs + assert (np.float64(1.0) * da).attrs != da.attrs + assert np.abs(da).attrs != da.attrs + assert abs(da).attrs != da.attrs def test_fillna(self): a = DataArray([np.nan, 1, np.nan, 3], coords={"x": range(4)}, dims="x") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 9037013cc79..13118889eb8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4388,6 +4388,23 @@ def test_fillna(self): assert actual.a.name == "a" assert actual.a.attrs == ds.a.attrs + def test_propagate_attrs(self): + + da = DataArray(range(5), name="a", attrs={"attr": "da"}) + ds = Dataset({"a": da}, attrs={"attr": "ds"}) + + # test defaults + assert ds.clip(0, 1).attrs == ds.attrs + assert (np.float64(1.0) * ds).attrs == ds.attrs + assert np.abs(ds).attrs == ds.attrs + assert abs(ds).attrs == ds.attrs + + with set_options(keep_attrs=False): + assert ds.clip(0, 1).attrs != ds.attrs + assert (np.float64(1.0) * ds).attrs != ds.attrs + assert np.abs(ds).attrs != ds.attrs + assert abs(ds).attrs != ds.attrs + def test_where(self): ds = Dataset({"a": ("x", range(5))}) expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) From 9c578d6e69d5e770a950c19d5a13b4baf0d06838 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 15 Aug 2020 15:09:02 -0600 Subject: [PATCH 05/17] Fix dataset attrs --- xarray/core/dataset.py | 10 ++++++---- xarray/core/options.py | 2 +- xarray/core/variable.py | 12 ++++++++---- xarray/tests/test_dataset.py | 17 +++++++---------- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 66c2110f963..9bf824cfa9f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4899,18 +4899,20 @@ def from_dict(cls, d): return obj @staticmethod - def _unary_op(f, keep_attrs=None): - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=True) - + def _unary_op(f): @functools.wraps(f) def func(self, *args, **kwargs): variables = {} + keep_attrs = kwargs.pop("keep_attrs", None) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) for k, v in self._variables.items(): if k in self._coord_names: variables[k] = v else: variables[k] = f(v, *args, **kwargs) + if keep_attrs: + variables[k].attrs = v._attrs attrs = self._attrs if keep_attrs else None return self._replace_with_new_dims(variables, attrs=attrs) diff --git a/xarray/core/options.py b/xarray/core/options.py index bb1b1c47840..6f268d320d2 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -71,7 +71,7 @@ def _get_keep_attrs(default): return global_choice else: raise ValueError( - "The global option keep_attrs must be one of" " True, False or 'default'." + "The global option keep_attrs must be one of True, False or 'default'." ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c7ea5b94fb2..1f64bd74728 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2035,16 +2035,20 @@ def imag(self): return type(self)(self.dims, self.data.imag, self._attrs) def __array_wrap__(self, obj, context=None): - keep_attrs = _get_keep_attrs(default=False) - attrs = self._attrs if keep_attrs else {} - return Variable(self.dims, obj, attrs) + return Variable(self.dims, obj) @staticmethod def _unary_op(f): @functools.wraps(f) def func(self, *args, **kwargs): + keep_attrs = kwargs.pop("keep_attrs", None) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) with np.errstate(all="ignore"): - return self.__array_wrap__(f(self.data, *args, **kwargs)) + result = self.__array_wrap__(f(self.data, *args, **kwargs)) + if keep_attrs: + result.attrs = self._attrs + return result return func diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index edf528c391b..6983a89bd0f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4415,22 +4415,19 @@ def test_fillna(self): assert actual.a.name == "a" assert actual.a.attrs == ds.a.attrs - def test_propagate_attrs(self): + @pytest.mark.parametrize( + "func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs] + ) + def test_propagate_attrs(self, func): da = DataArray(range(5), name="a", attrs={"attr": "da"}) ds = Dataset({"a": da}, attrs={"attr": "ds"}) # test defaults - assert ds.clip(0, 1).attrs == ds.attrs - assert (np.float64(1.0) * ds).attrs == ds.attrs - assert np.abs(ds).attrs == ds.attrs - assert abs(ds).attrs == ds.attrs - + assert func(ds).attrs == ds.attrs with set_options(keep_attrs=False): - assert ds.clip(0, 1).attrs != ds.attrs - assert (np.float64(1.0) * ds).attrs != ds.attrs - assert np.abs(ds).attrs != ds.attrs - assert abs(ds).attrs != ds.attrs + assert func(ds).attrs != ds.attrs + assert func(ds).a.attrs != ds.a.attrs def test_where(self): ds = Dataset({"a": ("x", range(5))}) From ab81c5e329a12dff0575db546bf38c8b6396a7d5 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 15 Aug 2020 15:24:26 -0600 Subject: [PATCH 06/17] whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3521e8215dd..932b7d378a1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,6 +49,8 @@ New Features now accept a ``dim_order`` parameter allowing to specify the resulting dataframe's dimensions order (:issue:`4331`, :pull:`4333`). By `Thomas Zilio `_. +- More operations follow the ``keep_attrs`` flag (:issue:`3490`, :issue:`4065`, :issue:`3433`, :issue:`3595`, :pull:`4195`). + By `Deepak Cherian `_. Bug fixes From 61682ef47eb72e45a6b0c995b0fec2166d60be24 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 15 Aug 2020 15:29:24 -0600 Subject: [PATCH 07/17] small fix --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1f64bd74728..d29bb2e3f1d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2047,7 +2047,7 @@ def func(self, *args, **kwargs): with np.errstate(all="ignore"): result = self.__array_wrap__(f(self.data, *args, **kwargs)) if keep_attrs: - result.attrs = self._attrs + result.attrs = self.attrs return result return func From bf9d213f6fd65979c9956420712d69f6aed938b0 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 15 Aug 2020 15:53:59 -0600 Subject: [PATCH 08/17] Fix imag, real --- xarray/core/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9bf824cfa9f..8613df3846b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5649,11 +5649,11 @@ def _integrate_one(self, coord, datetime_unit=None): @property def real(self): - return self._unary_op(lambda x: x.real, keep_attrs=True)(self) + return self.map(lambda x: x.real, keep_attrs=True) @property def imag(self): - return self._unary_op(lambda x: x.imag, keep_attrs=True)(self) + return self.map(lambda x: x.imag, keep_attrs=True) plot = utils.UncachedAccessor(_Dataset_PlotMethods) From ebc1be5badb7a1fd8ce18b8d5e3fbb59f3eb2e7d Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 15 Aug 2020 16:06:56 -0600 Subject: [PATCH 09/17] fix variable tests --- xarray/tests/test_variable.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index d79d40d67c0..27d0b2c7b08 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -329,7 +329,8 @@ def test_1d_math(self): assert_array_equal(y - v, 1 - v) # verify attributes are dropped v2 = self.cls(["x"], x, {"units": "meters"}) - assert_identical(base_v, +v2) + with set_options(keep_attrs=False): + assert_identical(base_v, +v2) # binary ops with all variables assert_array_equal(v + v, 2 * v) w = self.cls(["x"], y, {"foo": "bar"}) From 564046b484e0f22c30f16e83188761fe03f1aaf6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 15 Aug 2020 16:25:35 -0600 Subject: [PATCH 10/17] fix multiple return variables. --- xarray/core/computation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 0c99c54ca3c..5a08eff46da 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -251,7 +251,9 @@ def apply_dataarray_vfunc( if keep_attrs and hasattr(first_obj, "attrs"): if isinstance(out, tuple): - out = tuple(da._copy_attrs_from(first_obj) for da in out) + for da in out: + # This is adding attrs in place + da._copy_attrs_from(first_obj) else: out._copy_attrs_from(first_obj) @@ -404,7 +406,9 @@ def apply_dataset_vfunc( if keep_attrs and isinstance(first_obj, Dataset): if isinstance(out, tuple): - out = tuple(ds._copy_attrs_from(first_obj) for ds in out) + for ds in out: + # This is adding attrs in place + ds._copy_attrs_from(first_obj) else: out._copy_attrs_from(first_obj) return out From 092edeac9e369e5860c84d733296026e6195e3e2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 20 Aug 2020 17:16:16 -0600 Subject: [PATCH 11/17] review comments --- doc/whats-new.rst | 2 +- xarray/core/computation.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 932b7d378a1..4c8bccc0cae 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,7 +49,7 @@ New Features now accept a ``dim_order`` parameter allowing to specify the resulting dataframe's dimensions order (:issue:`4331`, :pull:`4333`). By `Thomas Zilio `_. -- More operations follow the ``keep_attrs`` flag (:issue:`3490`, :issue:`4065`, :issue:`3433`, :issue:`3595`, :pull:`4195`). +- Unary & binary operations follow the ``keep_attrs`` flag (:issue:`3490`, :issue:`4065`, :issue:`3433`, :issue:`3595`, :pull:`4195`). By `Deepak Cherian `_. diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 5a08eff46da..fcc2d30a306 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -41,9 +41,11 @@ def _first_of_type(args, kind): + """" Returns either first object of type 'kind' or None if not found. """ for arg in args: if isinstance(arg, kind): return arg + raise NotImplementedError("This should be unreachable.") class _UFuncSignature: @@ -231,7 +233,7 @@ def apply_dataarray_vfunc( first_obj = _first_of_type(args, DataArray) - if keep_attrs and hasattr(first_obj, "name"): + if keep_attrs: name = first_obj.name else: name = result_name(args) @@ -249,7 +251,7 @@ def apply_dataarray_vfunc( (coords,) = result_coords out = DataArray(result_var, coords, name=name, fastpath=True) - if keep_attrs and hasattr(first_obj, "attrs"): + if keep_attrs: if isinstance(out, tuple): for da in out: # This is adding attrs in place @@ -404,7 +406,7 @@ def apply_dataset_vfunc( (coord_vars,) = list_of_coords out = _fast_dataset(result_vars, coord_vars) - if keep_attrs and isinstance(first_obj, Dataset): + if keep_attrs: if isinstance(out, tuple): for ds in out: # This is adding attrs in place @@ -658,7 +660,7 @@ def func(*arrays): ) ) - if keep_attrs and isinstance(first_obj, Variable): + if keep_attrs: var.attrs.update(first_obj.attrs) output.append(var) From a122e87483b39c50790bf0673c14280fb89b9400 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 18 Sep 2020 19:13:39 +0000 Subject: [PATCH 12/17] Update doc/whats-new.rst --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 46173369f5d..b97f3b1ff84 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -68,7 +68,6 @@ New Features By `Samnan Rahee `_ and `Anderson Banihirwe `_. - Bug fixes ~~~~~~~~~ - Variables which are chunked using dask only along some dimensions can be chunked while storing with zarr along previously From 14b4f1b29f620c117a6d39a6aca0a1c125c3ac1c Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 18 Sep 2020 13:47:53 -0600 Subject: [PATCH 13/17] Propagate attrs with DataArray unary ops --- xarray/core/dataarray.py | 10 ++++++++-- xarray/tests/test_dataarray.py | 19 +++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 86cb7ad988e..5aa1574320c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -55,7 +55,7 @@ from .indexes import Indexes, default_indexes, propagate_indexes from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords -from .options import OPTIONS +from .options import OPTIONS, _get_keep_attrs from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( IndexVariable, @@ -2730,13 +2730,19 @@ def __rmatmul__(self, other): def _unary_op(f: Callable[..., Any]) -> Callable[..., "DataArray"]: @functools.wraps(f) def func(self, *args, **kwargs): + keep_attrs = kwargs.pop("keep_attrs", None) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) with warnings.catch_warnings(): warnings.filterwarnings("ignore", r"All-NaN (slice|axis) encountered") warnings.filterwarnings( "ignore", r"Mean of empty slice", category=RuntimeWarning ) with np.errstate(all="ignore"): - return self.__array_wrap__(f(self.variable.data, *args, **kwargs)) + da = self.__array_wrap__(f(self.variable.data, *args, **kwargs)) + if keep_attrs: + da.attrs = self.attrs + return da return func diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 047b46949c0..8d7f0301d69 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2488,21 +2488,20 @@ def test_assign_attrs(self): assert_identical(new_actual, expected) assert actual.attrs == {"a": 1, "b": 2} - def test_propagate_attrs(self): + @pytest.mark.parametrize( + "func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs] + ) + def test_propagate_attrs(self, func): da = DataArray(self.va) # test defaults - assert (np.float64(1.0) * da).attrs == da.attrs - assert np.abs(da).attrs == da.attrs - # not sure about the next two - assert da.clip(0, 1).attrs != da.attrs - assert abs(da).attrs != da.attrs + assert func(da).attrs == da.attrs with set_options(keep_attrs=False): - assert da.clip(0, 1).attrs != da.attrs - assert (np.float64(1.0) * da).attrs != da.attrs - assert np.abs(da).attrs != da.attrs - assert abs(da).attrs != da.attrs + assert func(da).attrs == {} + + with set_options(keep_attrs=True): + assert func(da).attrs == da.attrs def test_fillna(self): a = DataArray([np.nan, 1, np.nan, 3], coords={"x": range(4)}, dims="x") From adf6ad09ccbef53ab0ff89596be43f989c180c02 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 18 Sep 2020 13:48:31 -0600 Subject: [PATCH 14/17] More tests --- xarray/tests/test_dataset.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b9ec93f5f3d..7ec2e525001 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4487,6 +4487,10 @@ def test_propagate_attrs(self, func): assert func(ds).attrs != ds.attrs assert func(ds).a.attrs != ds.a.attrs + with set_options(keep_attrs=False): + assert func(ds).attrs != ds.attrs + assert func(ds).a.attrs != ds.a.attrs + def test_where(self): ds = Dataset({"a": ("x", range(5))}) expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) From e8e6fbc0669945a7d4ef2527b9a25c47d29a12d9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 18 Sep 2020 13:49:59 -0600 Subject: [PATCH 15/17] Small cleanup --- xarray/core/computation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 7b724d57a35..4d2167429cd 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -260,9 +260,8 @@ def apply_dataarray_vfunc( args, join=join, copy=False, exclude=exclude_dims, raise_on_invalid=False ) - first_obj = _first_of_type(args, DataArray) - if keep_attrs: + first_obj = _first_of_type(args, DataArray) name = first_obj.name else: name = result_name(args) @@ -408,8 +407,6 @@ def apply_dataset_vfunc( """ from .dataset import Dataset - first_obj = _first_of_type(args, Dataset) - if dataset_join not in _JOINS_WITHOUT_FILL_VALUES and fill_value is _NO_FILL_VALUE: raise TypeError( "to apply an operation to datasets with different " @@ -417,6 +414,9 @@ def apply_dataset_vfunc( "dataset_fill_value argument." ) + if keep_attrs: + first_obj = _first_of_type(args, Dataset) + if len(args) > 1: args = deep_align( args, join=join, copy=False, exclude=exclude_dims, raise_on_invalid=False From 12bfec7cd790e2bc1df8e7518c2c061a3bf6e727 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 9 Oct 2020 17:28:15 -0600 Subject: [PATCH 16/17] Review comments. --- xarray/core/computation.py | 4 ++-- xarray/tests/test_dataset.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 4d2167429cd..7b62c2c705f 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -43,11 +43,11 @@ def _first_of_type(args, kind): - """" Returns either first object of type 'kind' or None if not found. """ + """ Return either first object of type 'kind' or raise if not found. """ for arg in args: if isinstance(arg, kind): return arg - raise NotImplementedError("This should be unreachable.") + raise ValueError("This should be unreachable.") class _UFuncSignature: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7ec2e525001..f985cbcf247 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4491,6 +4491,14 @@ def test_propagate_attrs(self, func): assert func(ds).attrs != ds.attrs assert func(ds).a.attrs != ds.a.attrs + with set_options(keep_attrs=True): + assert func(ds).attrs == ds.attrs + assert func(ds).a.attrs == ds.a.attrs + + with set_options(keep_attrs=True): + assert func(ds).attrs == ds.attrs + assert func(ds).a.attrs == ds.a.attrs + def test_where(self): ds = Dataset({"a": ("x", range(5))}) expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])}) From 111c345a97a31f9d0f4da79758d84ecb057c84f8 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 10 Oct 2020 20:27:46 -0600 Subject: [PATCH 17/17] Fix duplication --- xarray/tests/test_dataset.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f985cbcf247..f16ce21087d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4495,10 +4495,6 @@ def test_propagate_attrs(self, func): assert func(ds).attrs == ds.attrs assert func(ds).a.attrs == ds.a.attrs - with set_options(keep_attrs=True): - assert func(ds).attrs == ds.attrs - assert func(ds).a.attrs == ds.a.attrs - def test_where(self): ds = Dataset({"a": ("x", range(5))}) expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])})