-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: flex op with DataFrame, Series and ea vs ndarray #34277
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
baf4e38
ea3aaba
4ef99a7
612fab6
c59650c
b857721
ddb9fa3
5c85d1a
86eaed9
a99028e
95442be
1c36218
19d8c7d
e9ca4c2
ee17990
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,7 +57,7 @@ | |
) | ||
|
||
if TYPE_CHECKING: | ||
from pandas import DataFrame # noqa:F401 | ||
from pandas import DataFrame, Series # noqa:F401 | ||
|
||
# ----------------------------------------------------------------------------- | ||
# constants | ||
|
@@ -511,19 +511,7 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str): | |
# We assume that self.align(other, ...) has already been called | ||
|
||
rvalues = right._values | ||
if isinstance(rvalues, np.ndarray): | ||
# TODO(EA2D): no need to special-case with 2D EAs | ||
# We can operate block-wise | ||
if axis == 0: | ||
rvalues = rvalues.reshape(-1, 1) | ||
else: | ||
rvalues = rvalues.reshape(1, -1) | ||
|
||
rvalues = np.broadcast_to(rvalues, left.shape) | ||
|
||
array_op = get_array_op(func, str_rep=str_rep) | ||
bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"]) | ||
return type(left)(bm) | ||
assert not isinstance(rvalues, np.ndarray) # handled by align_series_as_frame | ||
|
||
if axis == 0: | ||
new_data = dispatch_to_series(left, right, func) | ||
|
@@ -619,6 +607,7 @@ def to_series(right): | |
left, right = left.align( | ||
right, join="outer", axis=axis, level=level, copy=False | ||
) | ||
right = _align_series_as_frame(left, right, axis) | ||
|
||
return left, right | ||
|
||
|
@@ -679,6 +668,25 @@ def _frame_arith_method_with_reindex( | |
return result.reindex(join_columns, axis=1) | ||
|
||
|
||
def _align_series_as_frame(frame: "DataFrame", series: "Series", axis: int): | ||
""" | ||
If the Series operand is not EA-dtype, we can broadcast to 2D and operate | ||
blockwise. | ||
""" | ||
rvalues = series._values | ||
if not isinstance(rvalues, np.ndarray): | ||
# TODO(EA2D): no need to special-case with 2D EAs | ||
return series | ||
|
||
if axis == 0: | ||
rvalues = rvalues.reshape(-1, 1) | ||
else: | ||
rvalues = rvalues.reshape(1, -1) | ||
|
||
rvalues = np.broadcast_to(rvalues, frame.shape) | ||
return type(frame)(rvalues, index=frame.index, columns=frame.columns) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to turn this into a DataFrame? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The alternative is to re-implement the ensure-shape-shape-values code in ops.blockwise (see the first commit, which did it that way) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does it need to be the same shape? The block op or array op should be able to handle this broadcasting automatically themselves? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ndarray handles broadcasting, but DTA/TDA dont. I checked with seberg a few months ago who said there isnt a perf penalty to operating on the broadcasted ndarray. ive got plans to make it so we dont need to wrap the ndarray in a DataFrame (which we actually do in _align_method_FRAME too) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You mean the "hidden" 2D version of DTA/TDA? (EAs in general handle broadcasting) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
im not sure thats accurate in general (in particular the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ah, yes, indeed, that's a broadcasting that will not be generally supported (we probably should though, worth to open an issue about that). Now, I was thinking about the |
||
|
||
|
||
def _arith_method_FRAME(cls, op, special): | ||
str_rep = _get_opstr(op) | ||
op_name = _get_op_name(op, special) | ||
|
@@ -701,6 +709,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): | |
): | ||
return _frame_arith_method_with_reindex(self, other, op) | ||
|
||
if isinstance(other, ABCSeries) and fill_value is not None: | ||
# TODO: We could allow this in cases where we end up going | ||
# through the DataFrame path | ||
raise NotImplementedError(f"fill_value {fill_value} not supported.") | ||
self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) | ||
|
||
if isinstance(other, ABCDataFrame): | ||
|
@@ -716,9 +728,6 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): | |
pass_op = op if axis in [0, "columns", None] else na_op | ||
pass_op = pass_op if not is_logical else op | ||
|
||
if fill_value is not None: | ||
raise NotImplementedError(f"fill_value {fill_value} not supported.") | ||
|
||
axis = self._get_axis_number(axis) if axis is not None else 1 | ||
new_data = _combine_series_frame( | ||
self, other, pass_op, axis=axis, str_rep=str_rep | ||
|
Uh oh!
There was an error while loading. Please reload this page.