Skip to content

Commit a01460b

Browse files
authored
quantile: use skipna=None (#6303)
* quantile: use skipna=None * better term * move whats new entry * remove duplicated entry
1 parent e2b40d7 commit a01460b

9 files changed

+71
-18
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ Deprecations
3434
Bug fixes
3535
~~~~~~~~~
3636

37+
- Set ``skipna=None`` for all ``quantile`` methods (e.g. :py:meth:`Dataset.quantile`) and
38+
ensure it skips missing values for float dtypes (consistent with other methods). This should
39+
not change the behavior (:pull:`6303`). By `Mathias Hauser <https://github.com/mathause>`_.
3740

3841
Documentation
3942
~~~~~~~~~~~~~
@@ -86,7 +89,6 @@ Deprecations
8689
Bug fixes
8790
~~~~~~~~~
8891

89-
9092
- Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size
9193
can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling <https://github.com/d70-t>`_.
9294
- Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`, :pull:`6305`). By `Martin Bergemann <https://github.com/antarcticrainforest>`_ and `Stan West <https://github.com/stanwest>`_.

xarray/core/dataarray.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3440,7 +3440,7 @@ def quantile(
34403440
dim: str | Sequence[Hashable] | None = None,
34413441
method: QUANTILE_METHODS = "linear",
34423442
keep_attrs: bool = None,
3443-
skipna: bool = True,
3443+
skipna: bool = None,
34443444
interpolation: QUANTILE_METHODS = None,
34453445
) -> DataArray:
34463446
"""Compute the qth quantile of the data along the specified dimension.
@@ -3486,7 +3486,10 @@ def quantile(
34863486
the original object to the new one. If False (default), the new
34873487
object will be returned without attributes.
34883488
skipna : bool, optional
3489-
Whether to skip missing values when aggregating.
3489+
If True, skip missing values (as marked by NaN). By default, only
3490+
skips missing values for float dtypes; other dtypes either do not
3491+
have a sentinel missing value (int) or skipna=True has not been
3492+
implemented (object, datetime64 or timedelta64).
34903493
34913494
Returns
34923495
-------

xarray/core/dataset.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6160,7 +6160,7 @@ def quantile(
61606160
method: QUANTILE_METHODS = "linear",
61616161
numeric_only: bool = False,
61626162
keep_attrs: bool = None,
6163-
skipna: bool = True,
6163+
skipna: bool = None,
61646164
interpolation: QUANTILE_METHODS = None,
61656165
):
61666166
"""Compute the qth quantile of the data along the specified dimension.
@@ -6209,7 +6209,10 @@ def quantile(
62096209
numeric_only : bool, optional
62106210
If True, only apply ``func`` to variables with a numeric dtype.
62116211
skipna : bool, optional
6212-
Whether to skip missing values when aggregating.
6212+
If True, skip missing values (as marked by NaN). By default, only
6213+
skips missing values for float dtypes; other dtypes either do not
6214+
have a sentinel missing value (int) or skipna=True has not been
6215+
implemented (object, datetime64 or timedelta64).
62136216
62146217
Returns
62156218
-------

xarray/core/groupby.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ def quantile(
554554
dim=None,
555555
method="linear",
556556
keep_attrs=None,
557-
skipna=True,
557+
skipna=None,
558558
interpolation=None,
559559
):
560560
"""Compute the qth quantile over each array in the groups and
@@ -597,7 +597,10 @@ def quantile(
597597
version 1.22.0.
598598
599599
skipna : bool, optional
600-
Whether to skip missing values when aggregating.
600+
If True, skip missing values (as marked by NaN). By default, only
601+
skips missing values for float dtypes; other dtypes either do not
602+
have a sentinel missing value (int) or skipna=True has not been
603+
implemented (object, datetime64 or timedelta64).
601604
602605
Returns
603606
-------

xarray/core/variable.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,7 +1978,7 @@ def quantile(
19781978
dim: str | Sequence[Hashable] | None = None,
19791979
method: QUANTILE_METHODS = "linear",
19801980
keep_attrs: bool = None,
1981-
skipna: bool = True,
1981+
skipna: bool = None,
19821982
interpolation: QUANTILE_METHODS = None,
19831983
) -> Variable:
19841984
"""Compute the qth quantile of the data along the specified dimension.
@@ -2024,6 +2024,11 @@ def quantile(
20242024
If True, the variable's attributes (`attrs`) will be copied from
20252025
the original object to the new one. If False (default), the new
20262026
object will be returned without attributes.
2027+
skipna : bool, optional
2028+
If True, skip missing values (as marked by NaN). By default, only
2029+
skips missing values for float dtypes; other dtypes either do not
2030+
have a sentinel missing value (int) or skipna=True has not been
2031+
implemented (object, datetime64 or timedelta64).
20272032
20282033
Returns
20292034
-------
@@ -2059,7 +2064,10 @@ def quantile(
20592064

20602065
method = interpolation
20612066

2062-
_quantile_func = np.nanquantile if skipna else np.quantile
2067+
if skipna or (skipna is None and self.dtype.kind in "cfO"):
2068+
_quantile_func = np.nanquantile
2069+
else:
2070+
_quantile_func = np.quantile
20632071

20642072
if keep_attrs is None:
20652073
keep_attrs = _get_keep_attrs(default=False)

xarray/tests/test_dataarray.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2516,15 +2516,19 @@ def test_reduce_out(self):
25162516
with pytest.raises(TypeError):
25172517
orig.mean(out=np.ones(orig.shape))
25182518

2519-
@pytest.mark.parametrize("skipna", [True, False])
2519+
@pytest.mark.parametrize("skipna", [True, False, None])
25202520
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
25212521
@pytest.mark.parametrize(
25222522
"axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
25232523
)
25242524
def test_quantile(self, q, axis, dim, skipna) -> None:
2525-
actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
2526-
_percentile_func = np.nanpercentile if skipna else np.percentile
2527-
expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis)
2525+
2526+
va = self.va.copy(deep=True)
2527+
va[0, 0] = np.NaN
2528+
2529+
actual = DataArray(va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
2530+
_percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
2531+
expected = _percentile_func(va.values, np.array(q) * 100, axis=axis)
25282532
np.testing.assert_allclose(actual.values, expected)
25292533
if is_scalar(q):
25302534
assert "quantile" not in actual.dims

xarray/tests/test_dataset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4718,10 +4718,11 @@ def test_reduce_keepdims(self):
47184718
)
47194719
assert_identical(expected, actual)
47204720

4721-
@pytest.mark.parametrize("skipna", [True, False])
4721+
@pytest.mark.parametrize("skipna", [True, False, None])
47224722
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
47234723
def test_quantile(self, q, skipna) -> None:
47244724
ds = create_test_data(seed=123)
4725+
ds.var1.data[0, 0] = np.NaN
47254726

47264727
for dim in [None, "dim1", ["dim1"]]:
47274728
ds_quantile = ds.quantile(q, dim=dim, skipna=skipna)

xarray/tests/test_groupby.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,17 @@ def test_da_groupby_quantile() -> None:
203203
actual = array.groupby("x").quantile([0, 1])
204204
assert_identical(expected, actual)
205205

206+
array = xr.DataArray(
207+
data=[np.NaN, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x"
208+
)
209+
210+
for skipna in (True, False, None):
211+
e = [np.NaN, 5] if skipna is False else [2.5, 5]
212+
213+
expected = xr.DataArray(data=e, coords={"x": [1, 2], "quantile": 0.5}, dims="x")
214+
actual = array.groupby("x").quantile(0.5, skipna=skipna)
215+
assert_identical(expected, actual)
216+
206217
# Multiple dimensions
207218
array = xr.DataArray(
208219
data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]],
@@ -306,6 +317,20 @@ def test_ds_groupby_quantile() -> None:
306317
actual = ds.groupby("x").quantile([0, 1])
307318
assert_identical(expected, actual)
308319

320+
ds = xr.Dataset(
321+
data_vars={"a": ("x", [np.NaN, 2, 3, 4, 5, 6])},
322+
coords={"x": [1, 1, 1, 2, 2, 2]},
323+
)
324+
325+
for skipna in (True, False, None):
326+
e = [np.NaN, 5] if skipna is False else [2.5, 5]
327+
328+
expected = xr.Dataset(
329+
data_vars={"a": ("x", e)}, coords={"quantile": 0.5, "x": [1, 2]}
330+
)
331+
actual = ds.groupby("x").quantile(0.5, skipna=skipna)
332+
assert_identical(expected, actual)
333+
309334
# Multiple dimensions
310335
ds = xr.Dataset(
311336
data_vars={

xarray/tests/test_variable.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,16 +1700,20 @@ def raise_if_called(*args, **kwargs):
17001700
with set_options(use_bottleneck=False):
17011701
v.min()
17021702

1703-
@pytest.mark.parametrize("skipna", [True, False])
1703+
@pytest.mark.parametrize("skipna", [True, False, None])
17041704
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
17051705
@pytest.mark.parametrize(
17061706
"axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
17071707
)
17081708
def test_quantile(self, q, axis, dim, skipna):
1709-
v = Variable(["x", "y"], self.d)
1709+
1710+
d = self.d.copy()
1711+
d[0, 0] = np.NaN
1712+
1713+
v = Variable(["x", "y"], d)
17101714
actual = v.quantile(q, dim=dim, skipna=skipna)
1711-
_percentile_func = np.nanpercentile if skipna else np.percentile
1712-
expected = _percentile_func(self.d, np.array(q) * 100, axis=axis)
1715+
_percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
1716+
expected = _percentile_func(d, np.array(q) * 100, axis=axis)
17131717
np.testing.assert_allclose(actual.values, expected)
17141718

17151719
@requires_dask

0 commit comments

Comments
 (0)