From a9c8d8514a31497d78e0abae8b2c8a4a8366e79b Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 15:42:45 -0500 Subject: [PATCH 01/14] Initial test case --- pandas/tests/frame/test_arithmetic.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index bc69ec388bf0c..eb79a17a75f5a 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2199,3 +2199,19 @@ def test_mixed_col_index_dtype(using_infer_string): dtype = "string" expected.columns = expected.columns.astype(dtype) tm.assert_frame_equal(result, expected) + + +def test_df_mul_series_fill_value(): + # GH 61581 + data = np.arange(50).reshape(10, 5) + columns = list("ABCDE") + df = DataFrame(data, columns=columns) + for i in range(5): + df.iat[i, i] = np.nan + df.iat[i + 1, i] = np.nan + df.iat[i + 4, i] = np.nan + + df_result = df[["A", "B", "C", "D"]].mul(df["E"], axis=0, fill_value=5) + df_expected = df[["A", "B", "C", "D"]].mul(df["E"].fillna(5), axis=0) + + tm.assert_frame_equal(df_result, df_expected) From f303a04cf9dc4636869b5919ce4d460fda22d40a Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 17:05:27 -0500 Subject: [PATCH 02/14] Updated test case to account for results of mul being NaN if both inputs are NaN --- pandas/tests/frame/test_arithmetic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index eb79a17a75f5a..37b0f764d7eed 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2211,7 +2211,11 @@ def test_df_mul_series_fill_value(): df.iat[i + 1, i] = np.nan df.iat[i + 4, i] = np.nan - df_result = df[["A", "B", "C", "D"]].mul(df["E"], axis=0, fill_value=5) - df_expected = df[["A", "B", "C", "D"]].mul(df["E"].fillna(5), axis=0) + df_a = df.iloc[:, :-1] + df_b = df.iloc[:, -1] + nan_mask = df_a.isna().astype(int).mul(df_b.isna().astype(int), axis=0).astype(bool) + + df_result = df_a.mul(df_b, axis=0, fill_value=5) + df_expected = (df_a.fillna(5).mul(df_b.fillna(5), axis=0)).mask(nan_mask, np.nan) tm.assert_frame_equal(df_result, df_expected) From 5ac26a4a4ebad933da937bcc10906f2514cebc77 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 17:21:49 -0500 Subject: [PATCH 03/14] Removed test cases which expect an error from fill_value --- pandas/tests/frame/test_arithmetic.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 37b0f764d7eed..74e85d9c41922 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -628,12 +628,6 @@ def test_arith_flex_frame_corner(self, float_frame): expected = float_frame.sort_index() * np.nan tm.assert_frame_equal(result, expected) - with pytest.raises(NotImplementedError, match="fill_value"): - float_frame.add(float_frame.iloc[0], fill_value=3) - - with pytest.raises(NotImplementedError, match="fill_value"): - float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) - @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"]) def test_arith_flex_series_ops(self, simple_frame, op): # after arithmetic refactor, add truediv here @@ -667,19 +661,6 @@ def test_arith_flex_series_broadcasting(self, any_real_numpy_dtype): result = df.div(df[0], axis="index") tm.assert_frame_equal(result, expected) - def test_arith_flex_zero_len_raises(self): - # GH 19522 passing fill_value to frame flex arith methods should - # raise even in the zero-length special cases - ser_len0 = Series([], dtype=object) - df_len0 = DataFrame(columns=["A", "B"]) - df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) - - with pytest.raises(NotImplementedError, match="fill_value"): - df.add(ser_len0, fill_value="E") - - with pytest.raises(NotImplementedError, match="fill_value"): - df_len0.sub(df["A"], axis=None, fill_value=3) - def test_flex_add_scalar_fill_value(self): # GH#12723 dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float") From a60fbb03a01766c5474d3ca68baf6ee5cfeb17e8 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 17:46:37 -0500 Subject: [PATCH 04/14] Updated test case to include other operators which included fill_value --- pandas/tests/frame/test_arithmetic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 74e85d9c41922..5852a8fde42bb 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2182,7 +2182,8 @@ def test_mixed_col_index_dtype(using_infer_string): tm.assert_frame_equal(result, expected) -def test_df_mul_series_fill_value(): +@pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "mod", "truediv", "pow"]) +def test_df_series_fill_value(op): # GH 61581 data = np.arange(50).reshape(10, 5) columns = list("ABCDE") @@ -2196,7 +2197,9 @@ def test_df_mul_series_fill_value(): df_b = df.iloc[:, -1] nan_mask = df_a.isna().astype(int).mul(df_b.isna().astype(int), axis=0).astype(bool) - df_result = df_a.mul(df_b, axis=0, fill_value=5) - df_expected = (df_a.fillna(5).mul(df_b.fillna(5), axis=0)).mask(nan_mask, np.nan) + df_result = getattr(df_a, op)(df_b, axis=0, fill_value=5) + df_expected = getattr(df_a.fillna(5), op)(df_b.fillna(5), axis=0).mask( + nan_mask, np.nan + ) tm.assert_frame_equal(df_result, df_expected) From 87ecfc45def523df28266af0ad678d3876630a15 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 17:48:36 -0500 Subject: [PATCH 05/14] Removed restriction on using fill_value with series Updated docs --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4e0e497379fa2..909453b4ee95e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -778,6 +778,7 @@ MultiIndex - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`) +- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`) - Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`) I/O diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 632ab12edd7e4..a7b955520eabc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8382,11 +8382,6 @@ def _flex_arith_method( if self._should_reindex_frame_op(other, op, axis, fill_value, level): return self._arith_method_with_reindex(other, op) - if isinstance(other, Series) and fill_value is not None: - # TODO: We could allow this in cases where we end up going - # through the DataFrame path - raise NotImplementedError(f"fill_value {fill_value} not supported.") - other = ops.maybe_prepare_scalar_for_op(other, self.shape) self, other = self._align_for_op(other, axis, flex=True, level=level) From bc805fd094abe1f3d0be160360a1ef057e2a22a6 Mon Sep 17 00:00:00 2001 From: eicchen Date: Tue, 15 Jul 2025 14:13:20 -0500 Subject: [PATCH 06/14] Included PR suggestions, added seperate dtype test (WIP) --- pandas/tests/frame/test_arithmetic.py | 51 +++++++++++++++++++++------ 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 5852a8fde42bb..9faea7415749e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2183,23 +2183,54 @@ def test_mixed_col_index_dtype(using_infer_string): @pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "mod", "truediv", "pow"]) -def test_df_series_fill_value(op): +def test_df_fill_value_operations(op): # GH 61581 - data = np.arange(50).reshape(10, 5) + input_data = np.arange(50).reshape(10, 5) + fill_val = 5 columns = list("ABCDE") - df = DataFrame(data, columns=columns) + df = DataFrame(input_data, columns=columns) for i in range(5): df.iat[i, i] = np.nan df.iat[i + 1, i] = np.nan df.iat[i + 4, i] = np.nan - df_a = df.iloc[:, :-1] - df_b = df.iloc[:, -1] - nan_mask = df_a.isna().astype(int).mul(df_b.isna().astype(int), axis=0).astype(bool) + df_base = df.iloc[:, :-1] + df_mult = df.iloc[:, -1] + mask = df.isna().values + mask = mask[:, :-1] & mask[:, [-1]] - df_result = getattr(df_a, op)(df_b, axis=0, fill_value=5) - df_expected = getattr(df_a.fillna(5), op)(df_b.fillna(5), axis=0).mask( - nan_mask, np.nan - ) + df_result = getattr(df_base, op)(df_mult, axis=0, fill_value=fill_val) + df_expected = getattr(df_base.fillna(fill_val), op)( + df_mult.fillna(fill_val), axis=0 + ).mask(mask, np.nan) tm.assert_frame_equal(df_result, df_expected) + + +# ! Currently implementing +# @pytest.mark.parametrize("input_data, fill_val", +# [ +# (np.arange(50).reshape(10, 5), 5), #Numpy +# (pd.array(np.random.choice([True, False], size=(10, 5)), +# dtype="boolean"), True), +# ] +# ) +# def test_df_fill_value_dtype(input_data, fill_val): +# # GH 61581 +# columns = list("ABCDE") +# df = DataFrame(input_data, columns=columns) +# for i in range(5): +# df.iat[i, i] = np.nan +# df.iat[i + 1, i] = np.nan +# df.iat[i + 4, i] = np.nan + +# df_base = df.iloc[:, :-1] +# df_mult = df.iloc[:, -1] +# mask = df.isna().values +# mask = mask[:, :-1] & mask[:, [-1]] + +# df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) +# df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), +# axis=0)).mask(mask, np.nan) + +# tm.assert_frame_equal(df_result, df_expected) From be0961632ad9307e24ff975857f0057d25cd0d3c Mon Sep 17 00:00:00 2001 From: eicchen Date: Tue, 15 Jul 2025 21:50:27 -0500 Subject: [PATCH 07/14] temp files --- test.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test2.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 test.py create mode 100644 test2.py diff --git a/test.py b/test.py new file mode 100644 index 0000000000000..be4c57afe3aa7 --- /dev/null +++ b/test.py @@ -0,0 +1,60 @@ +# mypy: ignore-errors +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def print_side_by_side(df1, df2): + # Convert to string and split into lines + df1_str = df1.to_string(index=False).split("\n") + df2_str = df2.to_string(index=False).split("\n") + + # Pad lines to the same length for alignment + max_len_1 = max(len(line) for line in df1_str) + padded_df1 = [line.ljust(max_len_1) for line in df1_str] + + # Print side-by-side + print("Result".ljust(max_len_1) + " | Expected") + for line1, line2 in zip(padded_df1, df2_str): + print(f"{line1} | {line2}") + + +# data = np.arange(50).reshape(10, 5) +# fill_val = 5 + +# data = pd.array(np.random.choice([True, False], size=(10, 5)), dtype="boolean") +# fill_val = True + +data = np.arange(50).reshape(10, 5) +# data_mult = pd.array([i for i in range(10)], dtype=tm.SIGNED_INT_NUMPY_DTYPES[0]) +data_mult = pd.array(list(range(10)), dtype=tm.SIGNED_INT_EA_DTYPES[0]) +fill_val = 5 + +# print(tm.ALL_INT_DTYPES) +# print(tm.SIGNED_INT_EA_DTYPES) +# tm.SIGNED_INT_NUMPY_DTYPES[0] +print(type(data_mult)) + +# TODO masking not working with EA with dim > 1 +# NOTE currently trying to get EA testing set up + +columns = list("ABCDE") +df_base = pd.DataFrame(data, columns=columns) +for i in range(5): + df_base.iat[i, i] = np.nan + df_base.iat[i + 1, i] = np.nan + df_base.iat[i + 4, i] = np.nan + +mask = df_base.isna().values + +data_mult_re = data_mult.reshape(10, 1) +mask = mask[:, :-1] & data_mult_re + +df_result = df_base.mul(data_mult, axis=0, fill_value=fill_val) +print(df_result) +# df_expected = (df_base.fillna(fill_val).mul(data_mult.fillna(fill_val), +# axis=0)).mask(mask, np.nan) + +# print_side_by_side(df_result, df_expected) +# # tm.assert_frame_equal(df_result, df_expected) diff --git a/test2.py b/test2.py new file mode 100644 index 0000000000000..a060a8229f1b4 --- /dev/null +++ b/test2.py @@ -0,0 +1,52 @@ +# mypy: ignore-errors +import numpy as np + +import pandas as pd + + +def print_side_by_side(df1, df2): + # Convert to string and split into lines + df1_str = df1.to_string(index=False).split("\n") + df2_str = df2.to_string(index=False).split("\n") + + # Pad lines to the same length for alignment + max_len_1 = max(len(line) for line in df1_str) + padded_df1 = [line.ljust(max_len_1) for line in df1_str] + + # Print side-by-side + print("Result".ljust(max_len_1) + " | Expected") + for line1, line2 in zip(padded_df1, df2_str): + print(f"{line1} | {line2}") + + +data = np.arange(50).reshape(10, 5) +fill_val = 5 + +# data = pd.array(np.random.choice([True, False], size=(10, 5)), dtype="boolean") +# fill_val = True + +# data = pd.array([i for i in range(50)], dtype="int") +# fill_val = 5 + +print(type(data)) + +columns = list("ABCDE") +df = pd.DataFrame(data, columns=columns) +for i in range(5): + df.iat[i, i] = np.nan + df.iat[i + 1, i] = np.nan + df.iat[i + 4, i] = np.nan + +df_base = df.iloc[:, :-1] +df_mult = df.iloc[:, [-1]] + +mask = df.isna().values +mask = mask[:, :-1] & mask[:, [-1]] + +df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) +df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), axis=0)).mask( + mask, np.nan +) + +print_side_by_side(df_result, df_expected) +# tm.assert_frame_equal(df_result, df_expected) From 1ebcf6e2e494b956f182a24b0323a7238dfbd5ee Mon Sep 17 00:00:00 2001 From: eicchen Date: Mon, 18 Aug 2025 16:46:07 -0500 Subject: [PATCH 08/14] Added test case to test EA and NUMPY dtypes --- pandas/tests/frame/test_arithmetic.py | 60 +++++++++++++++------------ 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 9faea7415749e..0c282b3cf0de3 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2207,30 +2207,36 @@ def test_df_fill_value_operations(op): tm.assert_frame_equal(df_result, df_expected) -# ! Currently implementing -# @pytest.mark.parametrize("input_data, fill_val", -# [ -# (np.arange(50).reshape(10, 5), 5), #Numpy -# (pd.array(np.random.choice([True, False], size=(10, 5)), -# dtype="boolean"), True), -# ] -# ) -# def test_df_fill_value_dtype(input_data, fill_val): -# # GH 61581 -# columns = list("ABCDE") -# df = DataFrame(input_data, columns=columns) -# for i in range(5): -# df.iat[i, i] = np.nan -# df.iat[i + 1, i] = np.nan -# df.iat[i + 4, i] = np.nan - -# df_base = df.iloc[:, :-1] -# df_mult = df.iloc[:, -1] -# mask = df.isna().values -# mask = mask[:, :-1] & mask[:, [-1]] - -# df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) -# df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), -# axis=0)).mask(mask, np.nan) - -# tm.assert_frame_equal(df_result, df_expected) +dt_params = [ + (tm.ALL_INT_NUMPY_DTYPES, 5), + (tm.ALL_INT_EA_DTYPES, 5), + (tm.FLOAT_NUMPY_DTYPES, 4.9), + (tm.FLOAT_EA_DTYPES, 4.9), +] + +dt_param_flat = [(dt, val) for lst, val in dt_params for dt in lst] + + +@pytest.mark.parametrize("data_type, fill_val", dt_param_flat) +def test_df_fill_value_dtype(data_type, fill_val): + # GH 61581 + base_data = np.arange(50).reshape(10, 5) + df_data = pd.array(base_data, dtype=data_type) + columns = list("ABCDE") + df = DataFrame(df_data, columns=columns) + for i in range(5): + df.iat[i, i] = np.nan + df.iat[i + 1, i] = pd.NA + df.iat[i + 4, i] = pd.NA + + df_base = df.iloc[:, :-1] + df_mult = df.iloc[:, -1] + mask = df.isna().values + mask = mask[:, :-1] & mask[:, [-1]] + + df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) + df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), axis=0)).mask( + mask, np.nan + ) + + tm.assert_frame_equal(df_result, df_expected) From a5940d5ca440479deff64a9bd7ce1169cad1a166 Mon Sep 17 00:00:00 2001 From: eicchen Date: Wed, 20 Aug 2025 20:20:37 -0500 Subject: [PATCH 09/14] addressed changes brought up in PR, converted test cases to not use non-1D EAs --- pandas/core/frame.py | 7 +- pandas/tests/frame/test_arithmetic.py | 96 +++++++++++++-------------- test.py | 60 ----------------- test2.py | 52 --------------- 4 files changed, 48 insertions(+), 167 deletions(-) delete mode 100644 test.py delete mode 100644 test2.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 29d3a67e74e02..8d72105eea999 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8444,12 +8444,7 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): """ rvalues = series._values if not isinstance(rvalues, np.ndarray): - # TODO(EA2D): no need to special-case with 2D EAs - if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): - # We can losslessly+cheaply cast to ndarray - rvalues = np.asarray(rvalues) - else: - return series + rvalues = np.asarray(rvalues) if axis == 0: rvalues = rvalues.reshape(-1, 1) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 353d3fe9324ab..228d62878fc38 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2175,61 +2175,59 @@ def test_mixed_col_index_dtype(string_dtype_no_object): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "mod", "truediv", "pow"]) -def test_df_fill_value_operations(op): - # GH 61581 - input_data = np.arange(50).reshape(10, 5) - fill_val = 5 - columns = list("ABCDE") - df = DataFrame(input_data, columns=columns) - for i in range(5): - df.iat[i, i] = np.nan - df.iat[i + 1, i] = np.nan - df.iat[i + 4, i] = np.nan - - df_base = df.iloc[:, :-1] - df_mult = df.iloc[:, -1] - mask = df.isna().values - mask = mask[:, :-1] & mask[:, [-1]] - - df_result = getattr(df_base, op)(df_mult, axis=0, fill_value=fill_val) - df_expected = getattr(df_base.fillna(fill_val), op)( - df_mult.fillna(fill_val), axis=0 - ).mask(mask, np.nan) - - tm.assert_frame_equal(df_result, df_expected) - - dt_params = [ - (tm.ALL_INT_NUMPY_DTYPES, 5), - (tm.ALL_INT_EA_DTYPES, 5), - (tm.FLOAT_NUMPY_DTYPES, 4.9), - (tm.FLOAT_EA_DTYPES, 4.9), + (tm.ALL_INT_NUMPY_DTYPES[0], 5), + (tm.ALL_INT_EA_DTYPES[0], 5), + (tm.FLOAT_NUMPY_DTYPES[0], 4.9), + (tm.FLOAT_EA_DTYPES[0], 4.9), ] -dt_param_flat = [(dt, val) for lst, val in dt_params for dt in lst] +axes = [0, 1] -@pytest.mark.parametrize("data_type, fill_val", dt_param_flat) -def test_df_fill_value_dtype(data_type, fill_val): +@pytest.mark.parametrize( + "data_type,fill_val, axis", + [(dt, val, axis) for axis in axes for dt, val in dt_params], +) +def test_df_fill_value_dtype(data_type, fill_val, axis): # GH 61581 - base_data = np.arange(50).reshape(10, 5) - df_data = pd.array(base_data, dtype=data_type) + base_data = np.arange(25).reshape(5, 5) + mult_list = [1, np.nan, 5, np.nan, 3] + np_int_flag = 0 + + try: + mult_data = pd.array(mult_list, dtype=data_type) + except ValueError as e: + # Numpy int type cannot represent NaN, it will end up here + if "cannot convert float NaN to integer" in str(e): + mult_data = np.asarray(mult_list) + np_int_flag = 1 + columns = list("ABCDE") - df = DataFrame(df_data, columns=columns) - for i in range(5): - df.iat[i, i] = np.nan - df.iat[i + 1, i] = pd.NA - df.iat[i + 4, i] = pd.NA - - df_base = df.iloc[:, :-1] - df_mult = df.iloc[:, -1] - mask = df.isna().values - mask = mask[:, :-1] & mask[:, [-1]] - - df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) - df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), axis=0)).mask( - mask, np.nan - ) + df = DataFrame(base_data, columns=columns) + + for i in range(df.shape[0]): + try: + df.iat[i, i] = np.nan + df.iat[i + 1, i] = pd.NA + df.iat[i + 3, i] = pd.NA + except IndexError: + pass + + mult_mat = np.broadcast_to(mult_data, df.shape) + if axis == 0: + mask = np.isnan(mult_mat).T + else: + mask = np.isnan(mult_mat) + mask = df.isna().values & mask + + df_result = df.mul(mult_data, axis=axis, fill_value=fill_val) + if np_int_flag == 1: + mult_np = np.nan_to_num(mult_data, nan=fill_val) + df_expected = (df.fillna(fill_val).mul(mult_np, axis=axis)).mask(mask, np.nan) + else: + df_expected = ( + df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis) + ).mask(mask, np.nan) tm.assert_frame_equal(df_result, df_expected) diff --git a/test.py b/test.py deleted file mode 100644 index be4c57afe3aa7..0000000000000 --- a/test.py +++ /dev/null @@ -1,60 +0,0 @@ -# mypy: ignore-errors -import numpy as np - -import pandas as pd -import pandas._testing as tm - - -def print_side_by_side(df1, df2): - # Convert to string and split into lines - df1_str = df1.to_string(index=False).split("\n") - df2_str = df2.to_string(index=False).split("\n") - - # Pad lines to the same length for alignment - max_len_1 = max(len(line) for line in df1_str) - padded_df1 = [line.ljust(max_len_1) for line in df1_str] - - # Print side-by-side - print("Result".ljust(max_len_1) + " | Expected") - for line1, line2 in zip(padded_df1, df2_str): - print(f"{line1} | {line2}") - - -# data = np.arange(50).reshape(10, 5) -# fill_val = 5 - -# data = pd.array(np.random.choice([True, False], size=(10, 5)), dtype="boolean") -# fill_val = True - -data = np.arange(50).reshape(10, 5) -# data_mult = pd.array([i for i in range(10)], dtype=tm.SIGNED_INT_NUMPY_DTYPES[0]) -data_mult = pd.array(list(range(10)), dtype=tm.SIGNED_INT_EA_DTYPES[0]) -fill_val = 5 - -# print(tm.ALL_INT_DTYPES) -# print(tm.SIGNED_INT_EA_DTYPES) -# tm.SIGNED_INT_NUMPY_DTYPES[0] -print(type(data_mult)) - -# TODO masking not working with EA with dim > 1 -# NOTE currently trying to get EA testing set up - -columns = list("ABCDE") -df_base = pd.DataFrame(data, columns=columns) -for i in range(5): - df_base.iat[i, i] = np.nan - df_base.iat[i + 1, i] = np.nan - df_base.iat[i + 4, i] = np.nan - -mask = df_base.isna().values - -data_mult_re = data_mult.reshape(10, 1) -mask = mask[:, :-1] & data_mult_re - -df_result = df_base.mul(data_mult, axis=0, fill_value=fill_val) -print(df_result) -# df_expected = (df_base.fillna(fill_val).mul(data_mult.fillna(fill_val), -# axis=0)).mask(mask, np.nan) - -# print_side_by_side(df_result, df_expected) -# # tm.assert_frame_equal(df_result, df_expected) diff --git a/test2.py b/test2.py deleted file mode 100644 index a060a8229f1b4..0000000000000 --- a/test2.py +++ /dev/null @@ -1,52 +0,0 @@ -# mypy: ignore-errors -import numpy as np - -import pandas as pd - - -def print_side_by_side(df1, df2): - # Convert to string and split into lines - df1_str = df1.to_string(index=False).split("\n") - df2_str = df2.to_string(index=False).split("\n") - - # Pad lines to the same length for alignment - max_len_1 = max(len(line) for line in df1_str) - padded_df1 = [line.ljust(max_len_1) for line in df1_str] - - # Print side-by-side - print("Result".ljust(max_len_1) + " | Expected") - for line1, line2 in zip(padded_df1, df2_str): - print(f"{line1} | {line2}") - - -data = np.arange(50).reshape(10, 5) -fill_val = 5 - -# data = pd.array(np.random.choice([True, False], size=(10, 5)), dtype="boolean") -# fill_val = True - -# data = pd.array([i for i in range(50)], dtype="int") -# fill_val = 5 - -print(type(data)) - -columns = list("ABCDE") -df = pd.DataFrame(data, columns=columns) -for i in range(5): - df.iat[i, i] = np.nan - df.iat[i + 1, i] = np.nan - df.iat[i + 4, i] = np.nan - -df_base = df.iloc[:, :-1] -df_mult = df.iloc[:, [-1]] - -mask = df.isna().values -mask = mask[:, :-1] & mask[:, [-1]] - -df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) -df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), axis=0)).mask( - mask, np.nan -) - -print_side_by_side(df_result, df_expected) -# tm.assert_frame_equal(df_result, df_expected) From dcf3391ed32202d8a110f8fe620ed3ab5ffe616d Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 21 Aug 2025 01:58:01 -0500 Subject: [PATCH 10/14] Limit np conversion to IntegerArray and FloatArray --- pandas/core/frame.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8d72105eea999..69a5f3886609d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -123,6 +123,10 @@ notna, ) +from pandas.arrays import ( + FloatingArray, + IntegerArray, +) from pandas.core import ( algorithms, common as com, @@ -8444,7 +8448,12 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): """ rvalues = series._values if not isinstance(rvalues, np.ndarray): - rvalues = np.asarray(rvalues) + if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]") or isinstance( + rvalues, (IntegerArray, FloatingArray) + ): + rvalues = np.asarray(rvalues) + else: + return series if axis == 0: rvalues = rvalues.reshape(-1, 1) From 1179098e35f3109fde511997a0c709a170e83514 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 21 Aug 2025 13:16:12 -0500 Subject: [PATCH 11/14] Updated EA catch method in _maybe_align_series_as_frame --- pandas/core/frame.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 69a5f3886609d..2338735b45605 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -123,10 +123,6 @@ notna, ) -from pandas.arrays import ( - FloatingArray, - IntegerArray, -) from pandas.core import ( algorithms, common as com, @@ -8447,13 +8443,23 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): blockwise. """ rvalues = series._values - if not isinstance(rvalues, np.ndarray): - if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]") or isinstance( - rvalues, (IntegerArray, FloatingArray) - ): - rvalues = np.asarray(rvalues) + if isinstance(rvalues, PeriodArray): + return series + if not isinstance(rvalues, (np.ndarray,)) and rvalues.dtype not in ( + "datetime64[ns]", + "timedelta64[ns]", + ): + if axis == 0: + df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues)) else: - return series + nrows = self.shape[0] + df = DataFrame( + {i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])}, + dtype=rvalues.dtype, + ) + df.index = self.index + df.columns = self.columns + return df if axis == 0: rvalues = rvalues.reshape(-1, 1) From 2dfb4bf60a5373d0e63efced2e5c6db157d4706a Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 21 Aug 2025 15:39:10 -0500 Subject: [PATCH 12/14] Addressed errors from changes in som tests --- pandas/core/frame.py | 2 +- pandas/tests/arithmetic/test_period.py | 8 ++------ pandas/tests/arrays/string_/test_string.py | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2338735b45605..43ed32b838388 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8445,7 +8445,7 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): rvalues = series._values if isinstance(rvalues, PeriodArray): return series - if not isinstance(rvalues, (np.ndarray,)) and rvalues.dtype not in ( + if not isinstance(rvalues, np.ndarray) and rvalues.dtype not in ( "datetime64[ns]", "timedelta64[ns]", ): diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 67762e0b89c73..8c6825eb6b567 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1361,12 +1361,8 @@ def test_period_add_timestamp_raises(self, box_with_array): arr + ts with pytest.raises(TypeError, match=msg): ts + arr - if box_with_array is pd.DataFrame: - # TODO: before implementing resolution-inference we got the same - # message with DataFrame and non-DataFrame. Why did that change? - msg = "cannot add PeriodArray and Timestamp" - else: - msg = "cannot add PeriodArray and DatetimeArray" + msg = "cannot add PeriodArray and DatetimeArray" + print(box_with_array) with pytest.raises(TypeError, match=msg): arr + Series([ts]) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 96e1cc05e284c..c2a69db1e5bf0 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -254,7 +254,7 @@ def test_mul(dtype): tm.assert_extension_array_equal(result, expected) -@pytest.mark.xfail(reason="GH-28527") +# @pytest.mark.xfail(reason="GH-28527") def test_add_strings(dtype): arr = pd.array(["a", "b", "c", "d"], dtype=dtype) df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object) From ce0b2efbcc7d8e0c824e1512ee0a7ef1b4f18e0f Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 21 Aug 2025 16:34:59 -0500 Subject: [PATCH 13/14] removed comment and errant print statement --- pandas/tests/arithmetic/test_period.py | 1 - pandas/tests/arrays/string_/test_string.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 8c6825eb6b567..48bbcc81f8dfd 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1362,7 +1362,6 @@ def test_period_add_timestamp_raises(self, box_with_array): with pytest.raises(TypeError, match=msg): ts + arr msg = "cannot add PeriodArray and DatetimeArray" - print(box_with_array) with pytest.raises(TypeError, match=msg): arr + Series([ts]) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index c2a69db1e5bf0..bfaca6076bed2 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -254,7 +254,6 @@ def test_mul(dtype): tm.assert_extension_array_equal(result, expected) -# @pytest.mark.xfail(reason="GH-28527") def test_add_strings(dtype): arr = pd.array(["a", "b", "c", "d"], dtype=dtype) df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object) From eaac6558aa81b65c261bc49e2b461d38b2b65a3f Mon Sep 17 00:00:00 2001 From: eicchen Date: Sat, 23 Aug 2025 11:35:56 -0500 Subject: [PATCH 14/14] Commented out test_add_frame's xfail to test CI --- pandas/tests/arrays/string_/test_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index bfaca6076bed2..752c7fa439f04 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -268,7 +268,7 @@ def test_add_strings(dtype): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(reason="GH-28527") +# @pytest.mark.xfail(reason="GH-28527") def test_add_frame(dtype): arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype) df = pd.DataFrame([["x", np.nan, "y", np.nan]])