From 78c8d8363f209a3b1cbef208339fef9bb9ebc15e Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 21 Jul 2021 23:43:02 +0530 Subject: [PATCH 1/8] BUG: EWM silently failed float32 --- pandas/core/frame.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 48b18a33f9c9f..6e6299ca8f810 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4280,6 +4280,8 @@ def check_int_infer_dtype(dtypes): # error: Argument 1 to "append" of "list" has incompatible type # "Type[signedinteger[Any]]"; expected "Type[signedinteger[Any]]" converted_dtypes.append(np.int64) # type: ignore[arg-type] + elif dtype == "float": + converted_dtypes.extend([np.float64, np.float32]) else: # error: Argument 1 to "append" of "list" has incompatible type # "Union[dtype[Any], ExtensionDtype]"; expected From 3917ab892cb557d4412bd49f2b208c93bf9c8fb7 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Thu, 22 Jul 2021 13:02:24 +0530 Subject: [PATCH 2/8] added tests --- pandas/core/frame.py | 5 ++++- pandas/tests/window/test_ewm.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6e6299ca8f810..13e8d466f41c3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4281,7 +4281,10 @@ def check_int_infer_dtype(dtypes): # "Type[signedinteger[Any]]"; expected "Type[signedinteger[Any]]" converted_dtypes.append(np.int64) # type: ignore[arg-type] elif dtype == "float": - converted_dtypes.extend([np.float64, np.float32]) + # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20 + converted_dtypes.extend( + [np.float64, np.float32, np.float16] + ) # type: ignore[list-item] else: # error: Argument 1 to "append" of "list" has incompatible type # "Union[dtype[Any], ExtensionDtype]"; expected diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 8da902ea830d1..04d0916424df7 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -181,3 +181,14 @@ def test_ewma_times_adjust_false_raises(): Series(range(1)).ewm( 0.1, adjust=False, times=date_range("2000", freq="D", periods=1) ) + + +@pytest.mark.parametrize("func", ["mean", "std", "var"]) +@pytest.mark.parametrize("dtype", [np.float32, np.float16, np.float64, float, "float"]) +def test_float_dtype_ewma(dtype, func): + # GH#42452 + df = DataFrame(np.random.rand(20, 3), dtype=dtype) + e = df.ewm(alpha=0.5, axis=1) + result = getattr(e, func)().shape + expected = (20, 3) + assert result == expected, f"Shape of ewm {func} must match dataframe" From 346e03f804ddd71d23b93d651eff05664342c98b Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Thu, 22 Jul 2021 13:17:58 +0530 Subject: [PATCH 3/8] resolved mypy error --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 13e8d466f41c3..7a8162b345df3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4283,8 +4283,8 @@ def check_int_infer_dtype(dtypes): elif dtype == "float": # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20 converted_dtypes.extend( - [np.float64, np.float32, np.float16] - ) # type: ignore[list-item] + [np.float64, np.float32, np.float16] # type: ignore[list-item] + ) else: # error: Argument 1 to "append" of "list" has incompatible type # "Union[dtype[Any], ExtensionDtype]"; expected From 2677e83670d4e324e690482ba085e84ac339e0d1 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 24 Jul 2021 16:01:55 +0530 Subject: [PATCH 4/8] added constant data in test --- pandas/tests/window/test_ewm.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 04d0916424df7..e6337805fa056 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -187,8 +187,28 @@ def test_ewma_times_adjust_false_raises(): @pytest.mark.parametrize("dtype", [np.float32, np.float16, np.float64, float, "float"]) def test_float_dtype_ewma(dtype, func): # GH#42452 - df = DataFrame(np.random.rand(20, 3), dtype=dtype) + expected_mean = DataFrame( + { + 0: range(5), + 1: range(4, 9), + 2: [7.428571, 9, 10.571429, 12.142857, 13.714286], + }, + dtype=float, + ) + expected_std = DataFrame( + { + 0: [np.nan] * 5, + 1: [4.242641] * 5, + 2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788], + } + ) + expected_var = expected_std ** 2 + df = DataFrame({0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=dtype) e = df.ewm(alpha=0.5, axis=1) - result = getattr(e, func)().shape - expected = (20, 3) - assert result == expected, f"Shape of ewm {func} must match dataframe" + result = getattr(e, func)() + expected = { + "expected_var": expected_var, + "expected_mean": expected_mean, + "expected_std": expected_std, + } + tm.assert_frame_equal(result, expected[f"expected_{func}"]) From a0a531e32f0a4179ed85f58f048e0e1f6e1af23a Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 24 Jul 2021 20:39:22 +0530 Subject: [PATCH 5/8] added pytest.fixture & whatsnew --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/tests/window/test_ewm.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index ad6a9d994bf7b..6763c3043b102 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -270,6 +270,7 @@ Groupby/resample/rolling - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`) - Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`) - Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`) +- Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`) Reshaping ^^^^^^^^^ diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index e6337805fa056..31e3eaf76d13b 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -184,8 +184,7 @@ def test_ewma_times_adjust_false_raises(): @pytest.mark.parametrize("func", ["mean", "std", "var"]) -@pytest.mark.parametrize("dtype", [np.float32, np.float16, np.float64, float, "float"]) -def test_float_dtype_ewma(dtype, func): +def test_float_dtype_ewma(func, float_dtype): # GH#42452 expected_mean = DataFrame( { @@ -203,7 +202,9 @@ def test_float_dtype_ewma(dtype, func): } ) expected_var = expected_std ** 2 - df = DataFrame({0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=dtype) + df = DataFrame( + {0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_dtype + ) e = df.ewm(alpha=0.5, axis=1) result = getattr(e, func)() expected = { From e2e4fb6094bb40c160861c62de869db3e219d4fe Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 25 Jul 2021 16:23:44 +0530 Subject: [PATCH 6/8] parametrized expected df; removed float16 --- pandas/core/frame.py | 2 +- pandas/tests/window/test_ewm.py | 64 ++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7a8162b345df3..1a6f13f6ad2a3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4283,7 +4283,7 @@ def check_int_infer_dtype(dtypes): elif dtype == "float": # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20 converted_dtypes.extend( - [np.float64, np.float32, np.float16] # type: ignore[list-item] + [np.float64, np.float32] # type: ignore[list-item] ) else: # error: Argument 1 to "append" of "list" has incompatible type diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 31e3eaf76d13b..977ce281c4b33 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -183,33 +183,49 @@ def test_ewma_times_adjust_false_raises(): ) -@pytest.mark.parametrize("func", ["mean", "std", "var"]) -def test_float_dtype_ewma(func, float_dtype): +@pytest.mark.parametrize( + "func, expected", + [ + [ + "mean", + DataFrame( + { + 0: range(5), + 1: range(4, 9), + 2: [7.428571, 9, 10.571429, 12.142857, 13.714286], + }, + dtype=float, + ), + ], + [ + "std", + DataFrame( + { + 0: [np.nan] * 5, + 1: [4.242641] * 5, + 2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788], + } + ), + ], + [ + "var", + DataFrame( + { + 0: [np.nan] * 5, + 1: [18.0] * 5, + 2: [21.428571, 27, 33.428571, 40.714286, 48.857143], + } + ), + ], + ], +) +def test_float_dtype_ewma(func, expected, float_dtype): # GH#42452 - expected_mean = DataFrame( - { - 0: range(5), - 1: range(4, 9), - 2: [7.428571, 9, 10.571429, 12.142857, 13.714286], - }, - dtype=float, - ) - expected_std = DataFrame( - { - 0: [np.nan] * 5, - 1: [4.242641] * 5, - 2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788], - } - ) - expected_var = expected_std ** 2 + df = DataFrame( {0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_dtype ) e = df.ewm(alpha=0.5, axis=1) result = getattr(e, func)() - expected = { - "expected_var": expected_var, - "expected_mean": expected_mean, - "expected_std": expected_std, - } - tm.assert_frame_equal(result, expected[f"expected_{func}"]) + + tm.assert_frame_equal(result, expected) From aafef93b00999dcaa7b19b042ae4a207582760f0 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 31 Jul 2021 13:03:50 +0530 Subject: [PATCH 7/8] added test for float32 --- pandas/tests/window/test_rolling.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 77ca482936298..f446eb494d5fa 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1424,3 +1424,11 @@ def test_rolling_zero_window(): result = s.rolling(0).min() expected = Series([np.nan]) tm.assert_series_equal(result, expected) + + +def test_rolling_float_dtype(float_dtype): + # GH#42452 + df = DataFrame({"A": range(5), "B": range(10, 15)}, dtype=float_dtype) + expected = DataFrame({"A": [np.nan] * 5, "B": range(10, 20, 2)}, dtype=float_dtype) + result = df.rolling(2, axis=1).sum() + tm.assert_frame_equal(result, expected, check_dtype=False) From c866bed3c93356d24d2c899c79b61cf0589b42f8 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 1 Aug 2021 14:52:51 +0530 Subject: [PATCH 8/8] added tests on select_dtypes --- pandas/core/frame.py | 2 +- .../tests/frame/methods/test_select_dtypes.py | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1a6f13f6ad2a3..823de2133f0b3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4280,7 +4280,7 @@ def check_int_infer_dtype(dtypes): # error: Argument 1 to "append" of "list" has incompatible type # "Type[signedinteger[Any]]"; expected "Type[signedinteger[Any]]" converted_dtypes.append(np.int64) # type: ignore[arg-type] - elif dtype == "float": + elif dtype == "float" or dtype is float: # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20 converted_dtypes.extend( [np.float64, np.float32] # type: ignore[list-item] diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 3ff1ceba7996b..4cfd9975652e3 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -407,3 +407,37 @@ def test_select_dtypes_numeric_nullable_string(self, nullable_string_dtype): df = DataFrame(arr) is_selected = df.select_dtypes(np.number).shape == df.shape assert not is_selected + + @pytest.mark.parametrize( + "expected, float_dtypes", + [ + [ + DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)} + ).astype(dtype={"A": float, "B": np.float64, "C": np.float32}), + float, + ], + [ + DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)} + ).astype(dtype={"A": float, "B": np.float64, "C": np.float32}), + "float", + ], + [DataFrame({"C": range(10, 7, -1)}, dtype=np.float32), np.float32], + [ + DataFrame({"A": range(3), "B": range(5, 8)}).astype( + dtype={"A": float, "B": np.float64} + ), + np.float64, + ], + ], + ) + def test_select_dtypes_float_dtype(self, expected, float_dtypes): + # GH#42452 + dtype_dict = {"A": float, "B": np.float64, "C": np.float32} + df = DataFrame( + {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)}, + ) + df = df.astype(dtype_dict) + result = df.select_dtypes(include=float_dtypes) + tm.assert_frame_equal(result, expected)