diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index d830076e37b..de2afa9060c 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -107,6 +107,8 @@ Xarray also provides the ``max_gap`` keyword argument to limit the interpolation data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` for more. +.. _agg: + Aggregation =========== diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 83aaa10a20c..31365f39e65 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,43 +1,1975 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops -from .types import T_DataArray, T_Dataset +if TYPE_CHECKING: + from .dataarray import DataArray + from .dataset import Dataset + + +class DatasetReductions: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + + def count( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.count() + + Dimensions: () + Data variables: + da int64 5 + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.all() + + Dimensions: () + Data variables: + da bool False + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.any() + + Dimensions: () + Data variables: + da bool True + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.max() + + Dimensions: () + Data variables: + da float64 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.max(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.min() + + Dimensions: () + Data variables: + da float64 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.min(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.mean() + + Dimensions: () + Data variables: + da float64 1.8 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.mean(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.prod() + + Dimensions: () + Data variables: + da float64 12.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.prod(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.prod(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 12.0 + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + + Dimensions: () + Data variables: + da float64 9.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.sum(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.sum(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 9.0 + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + + Dimensions: () + Data variables: + da float64 0.7483 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.std(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.std(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.8367 + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + + Dimensions: () + Data variables: + da float64 0.56 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.var(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.var(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.7 + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + + Dimensions: () + Data variables: + da float64 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.median(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayReductions: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + + def count( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + + array(5) + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + + array(False) + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + + array(True) + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.max(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + + array(1.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.min(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + + array(1.8) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.mean(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + + array(12.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.prod(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.prod(skipna=True, min_count=2) + + array(12.) + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + + array(9.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.sum(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.sum(skipna=True, min_count=2) + + array(9.) + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + + array(0.74833148) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.std(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.std(skipna=True, ddof=1) + + array(0.83666003) + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + + array(0.56) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.var(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.var(skipna=True, ddof=1) + + array(0.7) + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.median(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DatasetGroupByReductions: + __slots__ = () -class DatasetReduce(Protocol): def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_Dataset: - ... - - -class DatasetGroupByReductions: - __slots__ = () + ) -> "Dataset": + raise NotImplementedError() def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -45,6 +1977,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -52,6 +1985,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -79,13 +2020,6 @@ def count( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) int64 1 2 2 - - See Also - -------- - numpy.count - Dataset.count - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.count, @@ -96,20 +2030,20 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -117,6 +2051,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -124,6 +2059,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -151,13 +2094,6 @@ def all( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) bool False True True - - See Also - -------- - numpy.all - Dataset.all - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_all, @@ -168,20 +2104,20 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -189,6 +2125,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -196,6 +2133,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -223,13 +2168,6 @@ def any( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) bool True True True - - See Also - -------- - numpy.any - Dataset.any - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_any, @@ -240,25 +2178,25 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -267,6 +2205,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -274,6 +2213,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -311,13 +2258,6 @@ def max( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 3.0 - - See Also - -------- - numpy.max - Dataset.max - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.max, @@ -329,25 +2269,25 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -356,6 +2296,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -363,6 +2304,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -400,13 +2349,6 @@ def min( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 1.0 - - See Also - -------- - numpy.min - Dataset.min - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.min, @@ -418,25 +2360,25 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -445,6 +2387,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -452,6 +2395,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -489,13 +2444,6 @@ def mean( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 2.0 - - See Also - -------- - numpy.mean - Dataset.mean - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.mean, @@ -507,26 +2455,26 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -541,6 +2489,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -548,6 +2497,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -595,13 +2556,6 @@ def prod( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 4.0 3.0 - - See Also - -------- - numpy.prod - Dataset.prod - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.prod, @@ -614,26 +2568,26 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -648,6 +2602,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -655,6 +2610,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -702,13 +2669,6 @@ def sum( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 4.0 4.0 - - See Also - -------- - numpy.sum - Dataset.sum - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.sum, @@ -721,26 +2681,30 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -748,6 +2712,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -755,6 +2720,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -793,43 +2770,51 @@ def std( Data variables: da (labels) float64 nan 0.0 1.0 - See Also - -------- - numpy.std - Dataset.std - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").std(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.414 """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -837,6 +2822,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -844,6 +2830,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -882,42 +2880,46 @@ def var( Data variables: da (labels) float64 nan 0.0 1.0 - See Also - -------- - numpy.var - Dataset.var - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").var(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 2.0 """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -926,6 +2928,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -933,6 +2936,18 @@ def median( New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -970,13 +2985,6 @@ def median( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 2.0 - - See Also - -------- - numpy.median - Dataset.median - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.median, @@ -991,21 +2999,33 @@ def median( class DatasetResampleReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1013,6 +3033,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1020,6 +3041,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1047,13 +3076,6 @@ def count( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) int64 1 3 1 - - See Also - -------- - numpy.count - Dataset.count - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.count, @@ -1064,20 +3086,20 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1085,6 +3107,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1092,6 +3115,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1119,13 +3150,6 @@ def all( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) bool True True False - - See Also - -------- - numpy.all - Dataset.all - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_all, @@ -1136,20 +3160,20 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1157,6 +3181,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1164,6 +3189,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1185,19 +3218,12 @@ def any( da (time) bool True True True True True False >>> ds.resample(time="3M").any() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True True - - See Also - -------- - numpy.any - Dataset.any - :ref:`resampling` - User guide on resampling operations. + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -1208,25 +3234,25 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1235,6 +3261,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1242,6 +3269,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1279,13 +3314,6 @@ def max( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 3.0 nan - - See Also - -------- - numpy.max - Dataset.max - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.max, @@ -1297,25 +3325,25 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1324,6 +3352,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1331,6 +3360,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1368,13 +3405,6 @@ def min( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 1.0 nan - - See Also - -------- - numpy.min - Dataset.min - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.min, @@ -1386,25 +3416,25 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1413,6 +3443,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1420,6 +3451,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1457,13 +3500,6 @@ def mean( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 2.0 nan - - See Also - -------- - numpy.mean - Dataset.mean - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.mean, @@ -1475,26 +3511,26 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -1509,6 +3545,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1516,6 +3553,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1563,13 +3612,6 @@ def prod( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 6.0 nan - - See Also - -------- - numpy.prod - Dataset.prod - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.prod, @@ -1582,26 +3624,26 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -1616,6 +3658,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1623,6 +3666,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1670,13 +3725,6 @@ def sum( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 6.0 nan - - See Also - -------- - numpy.sum - Dataset.sum - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.sum, @@ -1689,26 +3737,30 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1716,6 +3768,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1723,6 +3776,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1761,43 +3826,51 @@ def std( Data variables: da (time) float64 0.0 0.8165 nan - See Also - -------- - numpy.std - Dataset.std - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3M").std(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1805,6 +3878,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1812,6 +3886,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1850,42 +3936,46 @@ def var( Data variables: da (time) float64 0.0 0.6667 nan - See Also - -------- - numpy.var - Dataset.var - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3M").var(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1894,6 +3984,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1901,6 +3992,18 @@ def median( New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1938,13 +4041,6 @@ def median( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 2.0 nan - - See Also - -------- - numpy.median - Dataset.median - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.median, @@ -1956,37 +4052,36 @@ def median( ) -class DataArrayReduce(Protocol): +class DataArrayGroupByReductions: + __slots__ = () + def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_DataArray: - ... - - -class DataArrayGroupByReductions: - __slots__ = () + ) -> "DataArray": + raise NotImplementedError() def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1994,6 +4089,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2001,6 +4097,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2023,13 +4127,6 @@ def count( array([1, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.count - DataArray.count - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.count, @@ -2039,20 +4136,20 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2060,6 +4157,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2067,6 +4165,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2089,13 +4195,6 @@ def all( array([False, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.all - DataArray.all - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_all, @@ -2105,20 +4204,20 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2126,6 +4225,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2133,6 +4233,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2155,13 +4263,6 @@ def any( array([ True, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.any - DataArray.any - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_any, @@ -2171,25 +4272,25 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2198,6 +4299,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2205,6 +4307,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2235,13 +4345,6 @@ def max( array([nan, 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.max - DataArray.max - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.max, @@ -2252,25 +4355,25 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2279,6 +4382,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2286,6 +4390,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2316,13 +4428,6 @@ def min( array([nan, 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.min - DataArray.min - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.min, @@ -2333,25 +4438,25 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2360,6 +4465,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2367,6 +4473,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2397,13 +4515,6 @@ def mean( array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.mean - DataArray.mean - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.mean, @@ -2414,26 +4525,26 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -2448,6 +4559,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2455,6 +4567,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2493,13 +4617,6 @@ def prod( array([nan, 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.prod - DataArray.prod - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.prod, @@ -2511,26 +4628,26 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -2545,6 +4662,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2552,6 +4670,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2590,13 +4720,6 @@ def sum( array([nan, 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.sum - DataArray.sum - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.sum, @@ -2608,26 +4731,30 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2635,6 +4762,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2642,6 +4770,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2673,42 +4813,48 @@ def std( Coordinates: * labels (labels) object 'a' 'b' 'c' - See Also - -------- - numpy.std - DataArray.std - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").std(skipna=True, ddof=1) + + array([ nan, 0. , 1.41421356]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2716,6 +4862,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2723,6 +4870,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2754,41 +4913,43 @@ def var( Coordinates: * labels (labels) object 'a' 'b' 'c' - See Also - -------- - numpy.var - DataArray.var - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").var(skipna=True, ddof=1) + + array([nan, 0., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2797,6 +4958,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2804,6 +4966,18 @@ def median( New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2834,13 +5008,6 @@ def median( array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.median - DataArray.median - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.median, @@ -2854,21 +5021,33 @@ def median( class DataArrayResampleReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2876,6 +5055,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2883,6 +5063,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -2905,13 +5093,6 @@ def count( array([1, 3, 1]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.count - DataArray.count - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.count, @@ -2921,20 +5102,20 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2942,6 +5123,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2949,6 +5131,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -2971,13 +5161,6 @@ def all( array([ True, True, False]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.all - DataArray.all - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_all, @@ -2987,20 +5170,20 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3008,6 +5191,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3015,6 +5199,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3037,13 +5229,6 @@ def any( array([ True, True, True]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.any - DataArray.any - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_any, @@ -3053,25 +5238,25 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3080,6 +5265,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3087,6 +5273,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3117,13 +5311,6 @@ def max( array([ 1., 3., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.max - DataArray.max - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.max, @@ -3134,25 +5321,25 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3161,6 +5348,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3168,6 +5356,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3198,13 +5394,6 @@ def min( array([ 1., 1., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.min - DataArray.min - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.min, @@ -3215,25 +5404,25 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3242,6 +5431,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3249,6 +5439,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3279,13 +5481,6 @@ def mean( array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.mean - DataArray.mean - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.mean, @@ -3296,26 +5491,26 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -3330,6 +5525,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3337,6 +5533,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3375,13 +5583,6 @@ def prod( array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.prod - DataArray.prod - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.prod, @@ -3393,26 +5594,26 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -3427,6 +5628,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3434,6 +5636,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3472,13 +5686,6 @@ def sum( array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.sum - DataArray.sum - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.sum, @@ -3490,26 +5697,30 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3517,6 +5728,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3524,6 +5736,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3555,42 +5779,48 @@ def std( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - See Also - -------- - numpy.std - DataArray.std - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3M").std(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3598,6 +5828,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3605,6 +5836,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3636,41 +5879,43 @@ def var( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - See Also - -------- - numpy.var - DataArray.var - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3M").var(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3679,6 +5924,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3686,6 +5932,18 @@ def median( New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3716,13 +5974,6 @@ def median( array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.median - DataArray.median - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.median, diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 814e9a59877..bf8d6ccaeb6 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -105,7 +105,6 @@ class VariableArithmetic( class DatasetArithmetic( ImplementsDatasetReduce, - IncludeReduceMethods, IncludeCumMethods, SupportsArithmetic, DatasetOpsMixin, @@ -116,7 +115,6 @@ class DatasetArithmetic( class DataArrayArithmetic( ImplementsArrayReduce, - IncludeReduceMethods, IncludeCumMethods, IncludeNumpySameMethods, SupportsArithmetic, diff --git a/xarray/core/common.py b/xarray/core/common.py index bee59c6cc7d..cb6da986892 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -55,12 +55,14 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): - return self.reduce(func, dim, axis, skipna=skipna, **kwargs) + return self.reduce( + func=func, dim=dim, axis=axis, skipna=skipna, **kwargs + ) else: def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, axis, **kwargs) + return self.reduce(func=func, dim=dim, axis=axis, **kwargs) return wrapped_func @@ -93,13 +95,19 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool def wrapped_func(self, dim=None, skipna=None, **kwargs): return self.reduce( - func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs + func=func, + dim=dim, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, ) else: def wrapped_func(self, dim=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, numeric_only=numeric_only, **kwargs) + return self.reduce( + func=func, dim=dim, numeric_only=numeric_only, **kwargs + ) return wrapped_func diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e04e5cb9c51..d7c3fd9bab7 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -32,6 +32,7 @@ utils, weighted, ) +from ._reductions import DataArrayReductions from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor from .alignment import ( @@ -213,7 +214,9 @@ def __setitem__(self, key, value) -> None: _THIS_ARRAY = ReprObject("") -class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): +class DataArray( + AbstractArray, DataWithCoords, DataArrayArithmetic, DataArrayReductions +): """N-dimensional array with labeled coordinates and dimensions. DataArray provides a wrapper around numpy ndarrays that uses @@ -2655,6 +2658,7 @@ def reduce( self, func: Callable[..., Any], dim: None | Hashable | Sequence[Hashable] = None, + *, axis: None | int | Sequence[int] = None, keep_attrs: bool = None, keepdims: bool = False, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b3112bdc7ab..dd7807c2e7c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -48,6 +48,7 @@ utils, weighted, ) +from ._reductions import DatasetReductions from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .arithmetic import DatasetArithmetic from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes @@ -573,7 +574,7 @@ def __setitem__(self, key, value) -> None: self.dataset[pos_indexers] = value -class Dataset(DataWithCoords, DatasetArithmetic, Mapping): +class Dataset(DataWithCoords, DatasetReductions, DatasetArithmetic, Mapping): """A multi-dimensional, in memory, array database. A dataset resembles an in-memory representation of a NetCDF file, @@ -5004,6 +5005,7 @@ def reduce( self, func: Callable, dim: Hashable | Iterable[Hashable] = None, + *, keep_attrs: bool = None, keepdims: bool = False, numeric_only: bool = False, @@ -5039,7 +5041,7 @@ def reduce( Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - if "axis" in kwargs: + if kwargs.get("axis", None) is not None: raise ValueError( "passing 'axis' to Dataset reduce methods is ambiguous." " Please use 'dim' instead." diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index d3ec824159c..dea8949b84f 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,5 +1,6 @@ import datetime import warnings +from typing import Any, Callable, Hashable, Sequence, Union import numpy as np import pandas as pd @@ -866,7 +867,15 @@ def _combine(self, applied, shortcut=False): return combined def reduce( - self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + shortcut: bool = True, + **kwargs: Any, ): """Reduce the items in this group by applying `func` along some dimension(s). @@ -899,11 +908,15 @@ def reduce( if dim is None: dim = self._group_dim - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - def reduce_array(ar): - return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) + return ar.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) check_reduce_dims(dim, self.dims) @@ -981,7 +994,16 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ): """Reduce the items in this group by applying `func` along some dimension(s). @@ -1013,11 +1035,15 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): if dim is None: dim = self._group_dim - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - def reduce_dataset(ds): - return ds.reduce(func, dim, keep_attrs, **kwargs) + return ds.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) check_reduce_dims(dim, self.dims) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index e2f599e8b4e..ed665ad4048 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,4 +1,5 @@ import warnings +from typing import Any, Callable, Hashable, Sequence, Union from ._reductions import DataArrayResampleReductions, DatasetResampleReductions from .groupby import DataArrayGroupByBase, DatasetGroupByBase @@ -157,7 +158,7 @@ def _interpolate(self, kind="linear"): ) -class DataArrayResample(DataArrayResampleReductions, DataArrayGroupByBase, Resample): +class DataArrayResample(DataArrayGroupByBase, DataArrayResampleReductions, Resample): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -248,7 +249,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetResampleReductions, DatasetGroupByBase, Resample): +class DatasetResample(DatasetGroupByBase, DatasetResampleReductions, Resample): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): @@ -316,7 +317,16 @@ def apply(self, func, args=(), shortcut=None, **kwargs): ) return self.map(func=func, shortcut=shortcut, args=args, **kwargs) - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. @@ -341,4 +351,11 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ - return super().reduce(func, dim, keep_attrs, **kwargs) + return super().reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 55c68b7ff6b..438bdf8bdc3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2404,7 +2404,7 @@ def test_cumops(self): expected = DataArray([[-1, 0, 0], [-3, 0, 0]], coords, dims=["x", "y"]) assert_identical(expected, actual) - def test_reduce(self): + def test_reduce(self) -> None: coords = { "x": [-1, -2], "y": ["ab", "cd", "ef"], @@ -2445,7 +2445,7 @@ def test_reduce(self): expected = DataArray(orig.values.astype(int), dims=["x", "y"]).mean("x") assert_equal(actual, expected) - def test_reduce_keepdims(self): + def test_reduce_keepdims(self) -> None: coords = { "x": [-1, -2], "y": ["ab", "cd", "ef"], diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 8d6c4f96857..41fdd9a373d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4469,7 +4469,7 @@ def test_where_drop_no_indexes(self): actual = ds.where(ds == 1, drop=True) assert_identical(expected, actual) - def test_reduce(self): + def test_reduce(self) -> None: data = create_test_data() assert len(data.mean().coords) == 0 @@ -4480,21 +4480,21 @@ def test_reduce(self): assert_equal(data.min(dim=["dim1"]), data.min(dim="dim1")) - for reduct, expected in [ + for reduct, expected_dims in [ ("dim2", ["dim3", "time", "dim1"]), (["dim2", "time"], ["dim3", "dim1"]), (("dim2", "time"), ["dim3", "dim1"]), ((), ["dim2", "dim3", "time", "dim1"]), ]: - actual = list(data.min(dim=reduct).dims) - assert actual == expected + actual_dims = list(data.min(dim=reduct).dims) + assert actual_dims == expected_dims assert_equal(data.mean(dim=[]), data) with pytest.raises(ValueError): data.mean(axis=0) - def test_reduce_coords(self): + def test_reduce_coords(self) -> None: # regression test for GH1470 data = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"b": 4}) expected = xr.Dataset({"a": 2}, coords={"b": 4}) @@ -4518,7 +4518,7 @@ def test_mean_uint_dtype(self): ) assert_identical(actual, expected) - def test_reduce_bad_dim(self): + def test_reduce_bad_dim(self) -> None: data = create_test_data() with pytest.raises(ValueError, match=r"Dataset does not contain"): data.mean(dim="bad_dim") @@ -4553,7 +4553,7 @@ def test_reduce_cumsum_test_dims(self, reduct, expected, func): actual = getattr(data, func)(dim=reduct).dims assert list(actual) == expected - def test_reduce_non_numeric(self): + def test_reduce_non_numeric(self) -> None: data1 = create_test_data(seed=44) data2 = create_test_data(seed=44) add_vars = {"var4": ["dim1", "dim2"], "var5": ["dim1"]} @@ -4570,7 +4570,7 @@ def test_reduce_non_numeric(self): @pytest.mark.filterwarnings( "ignore:Once the behaviour of DataArray:DeprecationWarning" ) - def test_reduce_strings(self): + def test_reduce_strings(self) -> None: expected = Dataset({"x": "a"}) ds = Dataset({"x": ("y", ["a", "b"])}) ds.coords["y"] = [-10, 10] @@ -4607,7 +4607,7 @@ def test_reduce_strings(self): actual = ds.min() assert_identical(expected, actual) - def test_reduce_dtypes(self): + def test_reduce_dtypes(self) -> None: # regression test for GH342 expected = Dataset({"x": 1}) actual = Dataset({"x": True}).sum() @@ -4622,7 +4622,7 @@ def test_reduce_dtypes(self): actual = Dataset({"x": ("y", [1, 1j])}).sum() assert_identical(expected, actual) - def test_reduce_keep_attrs(self): + def test_reduce_keep_attrs(self) -> None: data = create_test_data() _attrs = {"attr1": "value1", "attr2": 2929} @@ -4664,7 +4664,7 @@ def test_reduce_scalars(self): actual = ds.var("a") assert_identical(expected, actual) - def test_reduce_only_one_axis(self): + def test_reduce_only_one_axis(self) -> None: def mean_only_one_axis(x, axis): if not isinstance(axis, integer_types): raise TypeError("non-integer axis") @@ -4680,7 +4680,7 @@ def mean_only_one_axis(x, axis): ): ds.reduce(mean_only_one_axis) - def test_reduce_no_axis(self): + def test_reduce_no_axis(self) -> None: def total_sum(x): return np.sum(x.flatten()) @@ -4692,7 +4692,7 @@ def total_sum(x): with pytest.raises(TypeError, match=r"unexpected keyword argument 'axis'"): ds.reduce(total_sum, dim="x") - def test_reduce_keepdims(self): + def test_reduce_keepdims(self) -> None: ds = Dataset( {"a": (["x", "y"], [[0, 1, 2, 3, 4]])}, coords={ diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index e12798b70c9..c329bc50c56 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -1,6 +1,5 @@ import datetime as dt import warnings -from textwrap import dedent import numpy as np import pandas as pd @@ -676,87 +675,6 @@ def test_multiple_dims(dtype, dask, skipna, func): assert_allclose(actual, expected) -def test_docs(): - # with min_count - actual = DataArray.sum.__doc__ - expected = dedent( - """\ - Reduce this DataArray's data by applying `sum` along some dimension(s). - - Parameters - ---------- - dim : str or sequence of str, optional - Dimension(s) over which to apply `sum`. - axis : int or sequence of int, optional - Axis(es) over which to apply `sum`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `sum` is calculated over axes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - min_count : int, default: None - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. New in version 0.10.8: Added with the default being - None. Changed in version 0.17.0: if specified on an integer array - and skipna=True, the result will be a float array. - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating `sum` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray object with `sum` applied to its data and the - indicated dimension(s) removed. - """ - ) - assert actual == expected - - # without min_count - actual = DataArray.std.__doc__ - expected = dedent( - """\ - Reduce this DataArray's data by applying `std` along some dimension(s). - - Parameters - ---------- - dim : str or sequence of str, optional - Dimension(s) over which to apply `std`. - axis : int or sequence of int, optional - Axis(es) over which to apply `std`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `std` is calculated over axes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating `std` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray object with `std` applied to its data and the - indicated dimension(s) removed. - """ - ) - assert actual == expected - - def test_datetime_to_numeric_datetime64(): times = pd.date_range("2000", periods=5, freq="7D").values result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 70c92d1a96f..e79c94e8907 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -3,157 +3,249 @@ For internal xarray development use only. Usage: - python xarray/util/generate_reductions.py > xarray/core/_reductions.py + python xarray/util/generate_reductions.py pytest --doctest-modules xarray/core/_reductions.py --accept || true - pytest --doctest-modules xarray/core/_reductions.py --accept + pytest --doctest-modules xarray/core/_reductions.py This requires [pytest-accept](https://github.com/max-sixty/pytest-accept). The second run of pytest is deliberate, since the first will return an error while replacing the doctests. """ - import collections import textwrap -from functools import partial -from typing import Callable, Optional +from dataclasses import dataclass MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops -from .types import T_DataArray, T_Dataset''' -OBJ_PREAMBLE = """ +if TYPE_CHECKING: + from .dataarray import DataArray + from .dataset import Dataset''' + + +CLASS_PREAMBLE = """ + +class {obj}{cls}Reductions: + __slots__ = () -class {obj}Reduce(Protocol): def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_{obj}: - ...""" + ) -> "{obj}": + raise NotImplementedError()""" +TEMPLATE_REDUCTION_SIGNATURE = ''' + def {method}( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *,{extra_kwargs} + keep_attrs: bool = None, + **kwargs, + ) -> "{obj}": + """ + Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). -CLASS_PREAMBLE = """ + Parameters + ----------''' -class {obj}{cls}Reductions: - __slots__ = ()""" +TEMPLATE_RETURNS = """ + Returns + ------- + reduced : {obj} + New {obj} with ``{method}`` applied to its data and the + indicated dimension(s) removed""" -_SKIPNA_DOCSTRING = """ -skipna : bool, optional +TEMPLATE_SEE_ALSO = """ + See Also + -------- + numpy.{method} + dask.array.{method} + {see_also_obj}.{method} + :ref:`{docref}` + User guide on {docref_description}.""" + +TEMPLATE_NOTES = """ + Notes + ----- + {notes}""" + +_DIM_DOCSTRING = """dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions.""" + +_SKIPNA_DOCSTRING = """skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64).""" -_MINCOUNT_DOCSTRING = """ -min_count : int, default: None +_MINCOUNT_DOCSTRING = """min_count : int, default: None The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array.""" +_DDOF_DOCSTRING = """ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements.""" + +_KEEP_ATTRS_DOCSTRING = """keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes.""" + +_KWARGS_DOCSTRING = """**kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``{method}`` on this object's data. + These could include dask-specific kwargs like ``split_every``.""" -BOOL_REDUCE_METHODS = ["all", "any"] -NAN_REDUCE_METHODS = [ - "max", - "min", - "mean", - "prod", - "sum", - "std", - "var", - "median", -] NAN_CUM_METHODS = ["cumsum", "cumprod"] -MIN_COUNT_METHODS = ["prod", "sum"] + NUMERIC_ONLY_METHODS = [ - "mean", - "std", - "var", - "sum", - "prod", - "median", "cumsum", "cumprod", ] +_NUMERIC_ONLY_NOTES = "Non-numeric variables will be removed prior to reducing." + +ExtraKwarg = collections.namedtuple("ExtraKwarg", "docs kwarg call example") +skipna = ExtraKwarg( + docs=_SKIPNA_DOCSTRING, + kwarg="skipna: bool = None,", + call="skipna=skipna,", + example="""\n + Use ``skipna`` to control whether NaNs are ignored. -TEMPLATE_REDUCTION = ''' - def {method}( - self: {obj}Reduce, - dim: Union[None, Hashable, Sequence[Hashable]] = None,{skip_na.kwarg}{min_count.kwarg} - keep_attrs: bool = None, - **kwargs, - ) -> T_{obj}: - """ - Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). + >>> {calculation}(skipna=False)""", +) +min_count = ExtraKwarg( + docs=_MINCOUNT_DOCSTRING, + kwarg="min_count: Optional[int] = None,", + call="min_count=min_count,", + example="""\n + Specify ``min_count`` for finer control over when NaNs are ignored. - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. {extra_dim}{extra_args}{skip_na.docs}{min_count.docs} - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``{method}`` on this object's data. + >>> {calculation}(skipna=True, min_count=2)""", +) +ddof = ExtraKwarg( + docs=_DDOF_DOCSTRING, + kwarg="ddof: int = 0,", + call="ddof=ddof,", + example="""\n + Specify ``ddof=1`` for an unbiased estimate. - Returns - ------- - reduced : {obj} - New {obj} with ``{method}`` applied to its data and the - indicated dimension(s) removed + >>> {calculation}(skipna=True, ddof=1)""", +) - Examples - --------{example} - See Also - -------- - numpy.{method} - {obj}.{method} - :ref:`{docref}` - User guide on {docref} operations. - """ - return self.reduce( - duck_array_ops.{array_method}, - dim=dim,{skip_na.call}{min_count.call}{numeric_only_call} - keep_attrs=keep_attrs, - **kwargs, - )''' +class Method: + def __init__( + self, + name, + bool_reduce=False, + extra_kwargs=tuple(), + numeric_only=False, + ): + self.name = name + self.extra_kwargs = extra_kwargs + self.numeric_only = numeric_only + + if bool_reduce: + self.array_method = f"array_{name}" + self.np_example_array = """ + ... np.array([True, True, True, True, True, False], dtype=bool),""" + + else: + self.array_method = name + self.np_example_array = """ + ... np.array([1, 2, 3, 1, 2, np.nan]),""" + +class ReductionGenerator: + def __init__( + self, + cls, + datastructure, + methods, + docref, + docref_description, + example_call_preamble, + see_also_obj=None, + ): + self.datastructure = datastructure + self.cls = cls + self.methods = methods + self.docref = docref + self.docref_description = docref_description + self.example_call_preamble = example_call_preamble + self.preamble = CLASS_PREAMBLE.format(obj=datastructure.name, cls=cls) + if not see_also_obj: + self.see_also_obj = self.datastructure.name + else: + self.see_also_obj = see_also_obj -def generate_groupby_example(obj: str, cls: str, method: str): - """Generate examples for method.""" - dx = "ds" if obj == "Dataset" else "da" - if cls == "Resample": - calculation = f'{dx}.resample(time="3M").{method}' - elif cls == "GroupBy": - calculation = f'{dx}.groupby("labels").{method}' - else: - raise ValueError + def generate_methods(self): + yield [self.preamble] + for method in self.methods: + yield self.generate_method(method) - if method in BOOL_REDUCE_METHODS: - np_array = """ - ... np.array([True, True, True, True, True, False], dtype=bool),""" + def generate_method(self, method): + template_kwargs = dict(obj=self.datastructure.name, method=method.name) - else: - np_array = """ - ... np.array([1, 2, 3, 1, 2, np.nan]),""" + if method.extra_kwargs: + extra_kwargs = "\n " + "\n ".join( + [kwarg.kwarg for kwarg in method.extra_kwargs if kwarg.kwarg] + ) + else: + extra_kwargs = "" + + yield TEMPLATE_REDUCTION_SIGNATURE.format( + **template_kwargs, + extra_kwargs=extra_kwargs, + ) + + for text in [ + _DIM_DOCSTRING.format(method=method.name), + *(kwarg.docs for kwarg in method.extra_kwargs if kwarg.docs), + _KEEP_ATTRS_DOCSTRING, + _KWARGS_DOCSTRING.format(method=method.name), + ]: + if text: + yield textwrap.indent(text, 8 * " ") + + yield TEMPLATE_RETURNS.format(**template_kwargs) + + yield TEMPLATE_SEE_ALSO.format( + **template_kwargs, + docref=self.docref, + docref_description=self.docref_description, + see_also_obj=self.see_also_obj, + ) - create_da = f""" - >>> da = xr.DataArray({np_array} + if method.numeric_only: + yield TEMPLATE_NOTES.format(notes=_NUMERIC_ONLY_NOTES) + + yield textwrap.indent(self.generate_example(method=method), "") + + yield ' """' + + yield self.generate_code(method) + + def generate_example(self, method): + create_da = f""" + >>> da = xr.DataArray({method.np_example_array} ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -161,130 +253,149 @@ def generate_groupby_example(obj: str, cls: str, method: str): ... ), ... )""" - if obj == "Dataset": - maybe_dataset = """ - >>> ds = xr.Dataset(dict(da=da)) - >>> ds""" - else: - maybe_dataset = """ - >>> da""" - - if method in NAN_REDUCE_METHODS: - maybe_skipna = f""" - - Use ``skipna`` to control whether NaNs are ignored. - - >>> {calculation}(skipna=False)""" - else: - maybe_skipna = "" - - if method in MIN_COUNT_METHODS: - maybe_mincount = f""" - - Specify ``min_count`` for finer control over when NaNs are ignored. + calculation = f"{self.datastructure.example_var_name}{self.example_call_preamble}.{method.name}" + if method.extra_kwargs: + extra_examples = "".join( + kwarg.example for kwarg in method.extra_kwargs if kwarg.example + ).format(calculation=calculation, method=method.name) + else: + extra_examples = "" - >>> {calculation}(skipna=True, min_count=2)""" - else: - maybe_mincount = "" + return f""" + Examples + --------{create_da}{self.datastructure.docstring_create} - return f"""{create_da}{maybe_dataset} + >>> {calculation}(){extra_examples}""" - >>> {calculation}(){maybe_skipna}{maybe_mincount}""" +class GenericReductionGenerator(ReductionGenerator): + def generate_code(self, method): + extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] -def generate_method( - obj: str, - docref: str, - method: str, - skipna: bool, - example_generator: Callable, - array_method: Optional[str] = None, -): - if not array_method: - array_method = method + if self.datastructure.numeric_only: + extra_kwargs.append(f"numeric_only={method.numeric_only},") - if obj == "Dataset": - if method in NUMERIC_ONLY_METHODS: - numeric_only_call = "\n numeric_only=True," + if extra_kwargs: + extra_kwargs = textwrap.indent("\n" + "\n".join(extra_kwargs), 12 * " ") else: - numeric_only_call = "\n numeric_only=False," - else: - numeric_only_call = "" - - kwarg = collections.namedtuple("kwarg", "docs kwarg call") - if skipna: - skip_na = kwarg( - docs=textwrap.indent(_SKIPNA_DOCSTRING, " "), - kwarg="\n skipna: bool = True,", - call="\n skipna=skipna,", - ) - else: - skip_na = kwarg(docs="", kwarg="", call="") - - if method in MIN_COUNT_METHODS: - min_count = kwarg( - docs=textwrap.indent(_MINCOUNT_DOCSTRING, " "), - kwarg="\n min_count: Optional[int] = None,", - call="\n min_count=min_count,", - ) - else: - min_count = kwarg(docs="", kwarg="", call="") - - return TEMPLATE_REDUCTION.format( - obj=obj, - docref=docref, - method=method, - array_method=array_method, - extra_dim="""If ``None``, will reduce over all dimensions - present in the grouped variable.""", - extra_args="", - skip_na=skip_na, - min_count=min_count, - numeric_only_call=numeric_only_call, - example=example_generator(obj=obj, method=method), - ) - - -def render(obj: str, cls: str, docref: str, example_generator: Callable): - yield CLASS_PREAMBLE.format(obj=obj, cls=cls) - yield generate_method( - obj, - method="count", - docref=docref, - skipna=False, - example_generator=example_generator, - ) - for method in BOOL_REDUCE_METHODS: - yield generate_method( - obj, - method=method, - docref=docref, - skipna=False, - array_method=f"array_{method}", - example_generator=example_generator, - ) - for method in NAN_REDUCE_METHODS: - yield generate_method( - obj, - method=method, - docref=docref, - skipna=True, - example_generator=example_generator, - ) + extra_kwargs = "" + return f"""\ + return self.reduce( + duck_array_ops.{method.array_method}, + dim=dim,{extra_kwargs} + keep_attrs=keep_attrs, + **kwargs, + )""" + + +REDUCTION_METHODS = ( + Method("count"), + Method("all", bool_reduce=True), + Method("any", bool_reduce=True), + Method("max", extra_kwargs=(skipna,)), + Method("min", extra_kwargs=(skipna,)), + Method("mean", extra_kwargs=(skipna,), numeric_only=True), + Method("prod", extra_kwargs=(skipna, min_count), numeric_only=True), + Method("sum", extra_kwargs=(skipna, min_count), numeric_only=True), + Method("std", extra_kwargs=(skipna, ddof), numeric_only=True), + Method("var", extra_kwargs=(skipna, ddof), numeric_only=True), + Method("median", extra_kwargs=(skipna,), numeric_only=True), +) + + +@dataclass +class DataStructure: + name: str + docstring_create: str + example_var_name: str + numeric_only: bool = False + + +DATASET_OBJECT = DataStructure( + name="Dataset", + docstring_create=""" + >>> ds = xr.Dataset(dict(da=da)) + >>> ds""", + example_var_name="ds", + numeric_only=True, +) +DATAARRAY_OBJECT = DataStructure( + name="DataArray", + docstring_create=""" + >>> da""", + example_var_name="da", + numeric_only=False, +) + +DATASET_GENERATOR = GenericReductionGenerator( + cls="", + datastructure=DATASET_OBJECT, + methods=REDUCTION_METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", + see_also_obj="DataArray", +) +DATAARRAY_GENERATOR = GenericReductionGenerator( + cls="", + datastructure=DATAARRAY_OBJECT, + methods=REDUCTION_METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", + see_also_obj="Dataset", +) + +DATAARRAY_GROUPBY_GENERATOR = GenericReductionGenerator( + cls="GroupBy", + datastructure=DATAARRAY_OBJECT, + methods=REDUCTION_METHODS, + docref="groupby", + docref_description="groupby operations", + example_call_preamble='.groupby("labels")', +) +DATAARRAY_RESAMPLE_GENERATOR = GenericReductionGenerator( + cls="Resample", + datastructure=DATAARRAY_OBJECT, + methods=REDUCTION_METHODS, + docref="resampling", + docref_description="resampling operations", + example_call_preamble='.resample(time="3M")', +) +DATASET_GROUPBY_GENERATOR = GenericReductionGenerator( + cls="GroupBy", + datastructure=DATASET_OBJECT, + methods=REDUCTION_METHODS, + docref="groupby", + docref_description="groupby operations", + example_call_preamble='.groupby("labels")', +) +DATASET_RESAMPLE_GENERATOR = GenericReductionGenerator( + cls="Resample", + datastructure=DATASET_OBJECT, + methods=REDUCTION_METHODS, + docref="resampling", + docref_description="resampling operations", + example_call_preamble='.resample(time="3M")', +) if __name__ == "__main__": - print(MODULE_PREAMBLE) - for obj in ["Dataset", "DataArray"]: - print(OBJ_PREAMBLE.format(obj=obj)) - for cls, docref in ( - ("GroupBy", "groupby"), - ("Resample", "resampling"), - ): - for line in render( - obj=obj, - cls=cls, - docref=docref, - example_generator=partial(generate_groupby_example, cls=cls), - ): - print(line) + import os + from pathlib import Path + + p = Path(os.getcwd()) + filepath = p.parent / "xarray" / "xarray" / "core" / "_reductions.py" + with open(filepath, mode="w", encoding="utf-8") as f: + f.write(MODULE_PREAMBLE + "\n") + for gen in [ + DATASET_GENERATOR, + DATAARRAY_GENERATOR, + DATASET_GROUPBY_GENERATOR, + DATASET_RESAMPLE_GENERATOR, + DATAARRAY_GROUPBY_GENERATOR, + DATAARRAY_RESAMPLE_GENERATOR, + ]: + for lines in gen.generate_methods(): + for line in lines: + f.write(line + "\n")