diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c9ee52f3da0..b922e7f3949 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -48,6 +48,9 @@ New Features is faster and requires less memory. (:pull:`6548`) By `Michael Niklas `_. - Improved overall typing. +- :py:meth:`Dataset.to_dict` and :py:meth:`DataArray.to_dict` may now optionally include encoding + attributes. (:pull:`6635`) + By Joe Hamman `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 35c0aab3fb8..3365c581376 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3067,7 +3067,7 @@ def to_netcdf( invalid_netcdf=invalid_netcdf, ) - def to_dict(self, data: bool = True) -> dict: + def to_dict(self, data: bool = True, encoding: bool = False) -> dict: """ Convert this xarray.DataArray into a dictionary following xarray naming conventions. @@ -3081,15 +3081,20 @@ def to_dict(self, data: bool = True) -> dict: data : bool, optional Whether to include the actual data in the dictionary. When set to False, returns just the schema. + encoding : bool, optional + Whether to include the Dataset's encoding in the dictionary. See Also -------- DataArray.from_dict + Dataset.to_dict """ d = self.variable.to_dict(data=data) d.update({"coords": {}, "name": self.name}) for k in self.coords: d["coords"][k] = self.coords[k].variable.to_dict(data=data) + if encoding: + d["encoding"] = dict(self.encoding) return d @classmethod @@ -3155,6 +3160,9 @@ def from_dict(cls, d: dict) -> DataArray: raise ValueError("cannot convert dict without the key 'data''") else: obj = cls(data, coords, d.get("dims"), d.get("name"), d.get("attrs")) + + obj.encoding.update(d.get("encoding", {})) + return obj @classmethod diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8cf5138c259..e559a8551b6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5944,7 +5944,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): return df - def to_dict(self, data=True): + def to_dict(self, data: bool = True, encoding: bool = False) -> dict: """ Convert this dataset to a dictionary following xarray naming conventions. @@ -5958,21 +5958,34 @@ def to_dict(self, data=True): data : bool, optional Whether to include the actual data in the dictionary. When set to False, returns just the schema. + encoding : bool, optional + Whether to include the Dataset's encoding in the dictionary. + + Returns + ------- + d : dict See Also -------- Dataset.from_dict + DataArray.to_dict """ - d = { + d: dict = { "coords": {}, "attrs": decode_numpy_dict_values(self.attrs), "dims": dict(self.dims), "data_vars": {}, } for k in self.coords: - d["coords"].update({k: self[k].variable.to_dict(data=data)}) + d["coords"].update( + {k: self[k].variable.to_dict(data=data, encoding=encoding)} + ) for k in self.data_vars: - d["data_vars"].update({k: self[k].variable.to_dict(data=data)}) + d["data_vars"].update( + {k: self[k].variable.to_dict(data=data, encoding=encoding)} + ) + if encoding: + d["encoding"] = dict(self.encoding) return d @classmethod @@ -6061,6 +6074,7 @@ def from_dict(cls, d): obj = obj.set_coords(coords) obj.attrs.update(d.get("attrs", {})) + obj.encoding.update(d.get("encoding", {})) return obj diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 20f6bae8ad5..c34041abb2a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -533,13 +533,17 @@ def to_index(self): """Convert this variable to a pandas.Index""" return self.to_index_variable().to_index() - def to_dict(self, data=True): + def to_dict(self, data: bool = True, encoding: bool = False) -> dict: """Dictionary representation of variable.""" item = {"dims": self.dims, "attrs": decode_numpy_dict_values(self.attrs)} if data: item["data"] = ensure_us_time_resolution(self.values).tolist() else: item.update({"dtype": str(self.dtype), "shape": self.shape}) + + if encoding: + item["encoding"] = dict(self.encoding) + return item @property diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 01d17837f61..970e2a8e710 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3140,10 +3140,12 @@ def test_series_categorical_index(self): arr = DataArray(s) assert "'a'" in repr(arr) # should not error - def test_to_and_from_dict(self): + @pytest.mark.parametrize("encoding", [True, False]) + def test_to_and_from_dict(self, encoding) -> None: array = DataArray( np.random.randn(2, 3), {"x": ["a", "b"]}, ["x", "y"], name="foo" ) + array.encoding = {"bar": "spam"} expected = { "name": "foo", "dims": ("x", "y"), @@ -3151,7 +3153,9 @@ def test_to_and_from_dict(self): "attrs": {}, "coords": {"x": {"dims": ("x",), "data": ["a", "b"], "attrs": {}}}, } - actual = array.to_dict() + if encoding: + expected["encoding"] = {"bar": "spam"} + actual = array.to_dict(encoding=encoding) # check that they are identical assert expected == actual @@ -3198,7 +3202,7 @@ def test_to_and_from_dict(self): endiantype = "U1" expected_no_data["coords"]["x"].update({"dtype": endiantype, "shape": (2,)}) expected_no_data.update({"dtype": "float64", "shape": (2, 3)}) - actual_no_data = array.to_dict(data=False) + actual_no_data = array.to_dict(data=False, encoding=encoding) assert expected_no_data == actual_no_data def test_to_and_from_dict_with_time_dim(self):