From 218e5cda89d66ad0d3367c55a886255d4904d2b2 Mon Sep 17 00:00:00 2001 From: Kyungtae Kim Date: Tue, 8 Oct 2024 00:29:40 +0900 Subject: [PATCH 1/4] BUG: fix #59965 skipna=True operations don't skip NaN in FloatingArrays - Issue: The skipna was not properly handled for BaseMaskedArray - Fix: Added mask for NA values - Test: Added test to series/test_reductions since the test uses --- pandas/core/array_algos/masked_reductions.py | 3 +++ pandas/tests/series/test_reductions.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py index f2a32fbe2b0e5..4a5a8f0c4e61d 100644 --- a/pandas/core/array_algos/masked_reductions.py +++ b/pandas/core/array_algos/masked_reductions.py @@ -12,6 +12,7 @@ from pandas._libs import missing as libmissing +from pandas.core.missing import isna from pandas.core.nanops import check_below_min_count if TYPE_CHECKING: @@ -57,6 +58,8 @@ def _reductions( else: return func(values, axis=axis, **kwargs) else: + mask |= isna(values) + if check_below_min_count(values.shape, mask, min_count) and ( axis is None or values.ndim == 1 ): diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 7bbb902e14a36..15ed3097db8ac 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -223,3 +223,11 @@ def test_median_with_convertible_string_raises(): df = ser.to_frame() with pytest.raises(TypeError, match=msg): df.median() + + +def test_mean_with_skipna(): + # GH#59965 skipna=True operations don't skip NaN in FloatingArrays + series1 = Series({"a": 0.0, "b": 1, "c": 1}) + series2 = Series({"a": 0.0, "b": 2, "c": 2}) + result = series1.convert_dtypes() / series2.convert_dtypes() + assert pd.notna(result.mean(skipna=True)) From cc63891b8641e57b3f41a026297896cf93222a3f Mon Sep 17 00:00:00 2001 From: Kyungtae Kim Date: Mon, 14 Oct 2024 20:44:45 +0900 Subject: [PATCH 2/4] Apply mask for only the type that can become null after calculation --- pandas/core/array_algos/masked_reductions.py | 3 ++- pandas/tests/series/test_reductions.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py index 4a5a8f0c4e61d..83b12cf4723c9 100644 --- a/pandas/core/array_algos/masked_reductions.py +++ b/pandas/core/array_algos/masked_reductions.py @@ -58,7 +58,8 @@ def _reductions( else: return func(values, axis=axis, **kwargs) else: - mask |= isna(values) + if values.dtype == np.float64: + mask |= isna(values) if check_below_min_count(values.shape, mask, min_count) and ( axis is None or values.ndim == 1 diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 15ed3097db8ac..a038b6d5bbc5b 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -6,7 +6,10 @@ from pandas.compat import HAS_PYARROW import pandas as pd -from pandas import Series +from pandas import ( + Series, + notna, +) import pandas._testing as tm @@ -230,4 +233,4 @@ def test_mean_with_skipna(): series1 = Series({"a": 0.0, "b": 1, "c": 1}) series2 = Series({"a": 0.0, "b": 2, "c": 2}) result = series1.convert_dtypes() / series2.convert_dtypes() - assert pd.notna(result.mean(skipna=True)) + assert notna(result.mean(skipna=True)) From edc977ea982f10d7036871a6b18ba5906bb701b4 Mon Sep 17 00:00:00 2001 From: Kyungtae Kim Date: Wed, 23 Oct 2024 23:27:46 +0900 Subject: [PATCH 3/4] Use is_float_dtype --- pandas/core/array_algos/masked_reductions.py | 4 +++- pandas/tests/series/test_reductions.py | 13 +++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py index 83b12cf4723c9..6d095699d9995 100644 --- a/pandas/core/array_algos/masked_reductions.py +++ b/pandas/core/array_algos/masked_reductions.py @@ -12,6 +12,8 @@ from pandas._libs import missing as libmissing +from pandas.core.dtypes.common import is_float_dtype + from pandas.core.missing import isna from pandas.core.nanops import check_below_min_count @@ -58,7 +60,7 @@ def _reductions( else: return func(values, axis=axis, **kwargs) else: - if values.dtype == np.float64: + if is_float_dtype(values): mask |= isna(values) if check_below_min_count(values.shape, mask, min_count) and ( diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index a038b6d5bbc5b..27bfb854abc03 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -6,10 +6,7 @@ from pandas.compat import HAS_PYARROW import pandas as pd -from pandas import ( - Series, - notna, -) +from pandas import Series import pandas._testing as tm @@ -230,7 +227,7 @@ def test_median_with_convertible_string_raises(): def test_mean_with_skipna(): # GH#59965 skipna=True operations don't skip NaN in FloatingArrays - series1 = Series({"a": 0.0, "b": 1, "c": 1}) - series2 = Series({"a": 0.0, "b": 2, "c": 2}) - result = series1.convert_dtypes() / series2.convert_dtypes() - assert notna(result.mean(skipna=True)) + series1 = Series({"a": 0.0, "b": 1, "c": 1}, dtype="Float64") + series2 = Series({"a": 0.0, "b": 2, "c": 2}, dtype="Float64") + result = series1 / series2 + assert pd.notna(result.mean(skipna=True)) From 7c02c77074f0523d8cdc2c88c6b1bec3afd1074b Mon Sep 17 00:00:00 2001 From: Kyungtae Kim Date: Mon, 28 Oct 2024 23:47:51 +0900 Subject: [PATCH 4/4] Use np.isclose to make test stronger --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/tests/series/test_reductions.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e5376177d3381..77dbaaa4630a0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -62,6 +62,7 @@ Other enhancements - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) +- Support skipna=True in operations on Float64 arrays with null values (:issue:`59965`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 27bfb854abc03..6c995849d6c7d 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -230,4 +230,4 @@ def test_mean_with_skipna(): series1 = Series({"a": 0.0, "b": 1, "c": 1}, dtype="Float64") series2 = Series({"a": 0.0, "b": 2, "c": 2}, dtype="Float64") result = series1 / series2 - assert pd.notna(result.mean(skipna=True)) + assert np.isclose(result.mean(skipna=True), 0.5)