-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
use numexpr for Series comparisons #32047
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
14b455e
b3072e5
6273e7d
3a55dc1
e37ba75
94fddbb
00e2069
23976f7
c1d314d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -125,7 +125,7 @@ def na_op(x, y): | |
return na_op | ||
|
||
|
||
def na_arithmetic_op(left, right, op, str_rep: str): | ||
def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = False): | ||
""" | ||
Return the result of evaluating op on the passed in values. | ||
|
||
|
@@ -136,6 +136,8 @@ def na_arithmetic_op(left, right, op, str_rep: str): | |
left : np.ndarray | ||
right : np.ndarray or scalar | ||
str_rep : str or None | ||
is_cmp : bool, default False | ||
If this a comparison operation. | ||
|
||
Returns | ||
------- | ||
|
@@ -150,6 +152,8 @@ def na_arithmetic_op(left, right, op, str_rep: str): | |
try: | ||
result = expressions.evaluate(op, str_rep, left, right) | ||
except TypeError: | ||
if is_cmp: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what hits this AND is a is_cmp? can you add a comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so we have 7 cases that get here. 4 of these are something like
|
||
raise | ||
result = masked_arith_op(left, right, op) | ||
|
||
return missing.dispatch_fill_zeros(op, left, right, result) | ||
|
@@ -201,7 +205,10 @@ def arithmetic_op( | |
|
||
|
||
def comparison_op( | ||
left: Union[np.ndarray, ABCExtensionArray], right: Any, op | ||
left: Union[np.ndarray, ABCExtensionArray], | ||
right: Any, | ||
op, | ||
str_rep: Optional[str] = None, | ||
) -> Union[np.ndarray, ABCExtensionArray]: | ||
""" | ||
Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. | ||
|
@@ -250,7 +257,10 @@ def comparison_op( | |
op_name = f"__{op.__name__}__" | ||
method = getattr(lvalues, op_name) | ||
with np.errstate(all="ignore"): | ||
res_values = method(rvalues) | ||
res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep, is_cmp=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is thi not handled in na_arithmetic_op? seems odd to handle here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
if is_scalar(res_values): | ||
# numexpr choked | ||
res_values = method(rvalues) | ||
|
||
if res_values is NotImplemented: | ||
res_values = invalid_comparison(lvalues, rvalues, op) | ||
|
@@ -385,7 +395,7 @@ def get_array_op(op, str_rep: Optional[str] = None): | |
""" | ||
op_name = op.__name__.strip("_") | ||
if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: | ||
return partial(comparison_op, op=op) | ||
return partial(comparison_op, op=op, str_rep=str_rep) | ||
elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: | ||
return partial(logical_op, op=op) | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,7 +66,12 @@ def test_df_numeric_cmp_dt64_raises(self): | |
ts = pd.Timestamp.now() | ||
df = pd.DataFrame({"x": range(5)}) | ||
|
||
msg = "Invalid comparison between dtype=int64 and Timestamp" | ||
msg = "|".join( | ||
[ | ||
"Invalid comparison between dtype=int64 and Timestamp", | ||
"'[<>]' not supported between instances of 'Timestamp' and 'int'", | ||
] | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we test explicitly with one message and then the other instead? or even better can we make the error messages consistent? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, turns out we only need the new one, updated. |
||
|
||
with pytest.raises(TypeError, match=msg): | ||
df > ts | ||
|
Uh oh!
There was an error while loading. Please reload this page.