Skip to content

BUG: Dataframe arithmatic operators don't work with Series using fill_value #61828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,7 @@ MultiIndex
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`)
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`)

Expand Down
27 changes: 16 additions & 11 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8443,13 +8443,23 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
blockwise.
"""
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
if isinstance(rvalues, PeriodArray):
return series
if not isinstance(rvalues, np.ndarray) and rvalues.dtype not in (
"datetime64[ns]",
"timedelta64[ns]",
):
if axis == 0:
df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues))
else:
return series
nrows = self.shape[0]
df = DataFrame(
{i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])},
dtype=rvalues.dtype,
)
df.index = self.index
df.columns = self.columns
return df

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
Expand All @@ -8473,11 +8483,6 @@ def _flex_arith_method(
if self._should_reindex_frame_op(other, op, axis, fill_value, level):
return self._arith_method_with_reindex(other, op)

if isinstance(other, Series) and fill_value is not None:
# TODO: We could allow this in cases where we end up going
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")

other = ops.maybe_prepare_scalar_for_op(other, self.shape)
self, other = self._align_for_op(other, axis, flex=True, level=level)

Expand Down
7 changes: 1 addition & 6 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,12 +1361,7 @@ def test_period_add_timestamp_raises(self, box_with_array):
arr + ts
with pytest.raises(TypeError, match=msg):
ts + arr
if box_with_array is pd.DataFrame:
# TODO: before implementing resolution-inference we got the same
# message with DataFrame and non-DataFrame. Why did that change?
msg = "cannot add PeriodArray and Timestamp"
else:
msg = "cannot add PeriodArray and DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + Series([ts])
with pytest.raises(TypeError, match=msg):
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,6 @@ def test_mul(dtype):
tm.assert_extension_array_equal(result, expected)


@pytest.mark.xfail(reason="GH-28527")
def test_add_strings(dtype):
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
Expand All @@ -269,7 +268,7 @@ def test_add_strings(dtype):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(reason="GH-28527")
# @pytest.mark.xfail(reason="GH-28527")
def test_add_frame(dtype):
arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
df = pd.DataFrame([["x", np.nan, "y", np.nan]])
Expand Down
77 changes: 58 additions & 19 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,12 +626,6 @@ def test_arith_flex_frame_corner(self, float_frame):
expected = float_frame.sort_index() * np.nan
tm.assert_frame_equal(result, expected)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], fill_value=3)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)

@pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"])
def test_arith_flex_series_ops(self, simple_frame, op):
# after arithmetic refactor, add truediv here
Expand Down Expand Up @@ -665,19 +659,6 @@ def test_arith_flex_series_broadcasting(self, any_real_numpy_dtype):
result = df.div(df[0], axis="index")
tm.assert_frame_equal(result, expected)

def test_arith_flex_zero_len_raises(self):
# GH 19522 passing fill_value to frame flex arith methods should
# raise even in the zero-length special cases
ser_len0 = Series([], dtype=object)
df_len0 = DataFrame(columns=["A", "B"])
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

with pytest.raises(NotImplementedError, match="fill_value"):
df.add(ser_len0, fill_value="E")

with pytest.raises(NotImplementedError, match="fill_value"):
df_len0.sub(df["A"], axis=None, fill_value=3)

def test_flex_add_scalar_fill_value(self):
# GH#12723
dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float")
Expand Down Expand Up @@ -2192,3 +2173,61 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
expected.columns = expected.columns.astype(string_dtype_no_object)

tm.assert_frame_equal(result, expected)


dt_params = [
(tm.ALL_INT_NUMPY_DTYPES[0], 5),
(tm.ALL_INT_EA_DTYPES[0], 5),
(tm.FLOAT_NUMPY_DTYPES[0], 4.9),
(tm.FLOAT_EA_DTYPES[0], 4.9),
]

axes = [0, 1]


@pytest.mark.parametrize(
"data_type,fill_val, axis",
[(dt, val, axis) for axis in axes for dt, val in dt_params],
)
def test_df_fill_value_dtype(data_type, fill_val, axis):
# GH 61581
base_data = np.arange(25).reshape(5, 5)
mult_list = [1, np.nan, 5, np.nan, 3]
np_int_flag = 0

try:
mult_data = pd.array(mult_list, dtype=data_type)
except ValueError as e:
# Numpy int type cannot represent NaN, it will end up here
if "cannot convert float NaN to integer" in str(e):
mult_data = np.asarray(mult_list)
np_int_flag = 1

columns = list("ABCDE")
df = DataFrame(base_data, columns=columns)

for i in range(df.shape[0]):
try:
df.iat[i, i] = np.nan
df.iat[i + 1, i] = pd.NA
df.iat[i + 3, i] = pd.NA
except IndexError:
pass

mult_mat = np.broadcast_to(mult_data, df.shape)
if axis == 0:
mask = np.isnan(mult_mat).T
else:
mask = np.isnan(mult_mat)
mask = df.isna().values & mask

df_result = df.mul(mult_data, axis=axis, fill_value=fill_val)
if np_int_flag == 1:
mult_np = np.nan_to_num(mult_data, nan=fill_val)
df_expected = (df.fillna(fill_val).mul(mult_np, axis=axis)).mask(mask, np.nan)
else:
df_expected = (
df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis)
).mask(mask, np.nan)

tm.assert_frame_equal(df_result, df_expected)
Loading