diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index f53b5045abff3..0c1e1e90c003b 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -9,14 +9,11 @@ import numpy as np -from pandas._libs import Timedelta, Timestamp, lib, ops as libops +from pandas._libs import Timedelta, Timestamp, lib from pandas.errors import NullFrequencyError from pandas.util._decorators import Appender -from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( - ensure_object, - is_bool_dtype, is_datetime64_dtype, is_extension_array_dtype, is_integer_dtype, @@ -27,30 +24,28 @@ ) from pandas.core.dtypes.generic import ( ABCDataFrame, - ABCDatetimeArray, - ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass, ABCSeries, - ABCTimedeltaArray, - ABCTimedeltaIndex, ) from pandas.core.dtypes.missing import isna, notna from pandas._typing import ArrayLike from pandas.core.construction import array, extract_array from pandas.core.ops.array_ops import ( - comp_method_OBJECT_ARRAY, + arithmetic_op, + comparison_op, define_na_arithmetic_op, - na_arithmetic_op, + logical_op, ) +from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401 from pandas.core.ops.docstrings import ( _arith_doc_FRAME, _flex_comp_doc_FRAME, _make_flex_doc, _op_descriptions, ) -from pandas.core.ops.invalid import invalid_comparison +from pandas.core.ops.invalid import invalid_comparison # noqa:F401 from pandas.core.ops.methods import ( # noqa:F401 add_flex_arithmetic_methods, add_special_arithmetic_methods, @@ -643,30 +638,8 @@ def wrapper(left, right): left, right = _align_method_SERIES(left, right) res_name = get_op_result_name(left, right) - keep_null_freq = isinstance( - right, - ( - ABCDatetimeIndex, - ABCDatetimeArray, - ABCTimedeltaIndex, - ABCTimedeltaArray, - Timestamp, - ), - ) - lvalues = extract_array(left, extract_numpy=True) - rvalues = extract_array(right, extract_numpy=True) - - rvalues = maybe_upcast_for_op(rvalues, lvalues.shape) - - if should_extension_dispatch(left, rvalues) or isinstance( - rvalues, (ABCTimedeltaArray, ABCDatetimeArray, Timestamp) - ): - result = dispatch_to_extension_op(op, lvalues, rvalues, keep_null_freq) - - else: - with np.errstate(all="ignore"): - result = na_arithmetic_op(lvalues, rvalues, op, str_rep, eval_kwargs) + result = arithmetic_op(lvalues, right, op, str_rep, eval_kwargs) # We do not pass dtype to ensure that the Series constructor # does inference in the case where `result` has object-dtype. @@ -702,46 +675,10 @@ def wrapper(self, other): if isinstance(other, ABCSeries) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled Series objects") - other = lib.item_from_zerodim(other) - if isinstance(other, list): - # TODO: same for tuples? - other = np.asarray(other) - - if isinstance(other, (np.ndarray, ABCExtensionArray, ABCIndexClass)): - # TODO: make this treatment consistent across ops and classes. - # We are not catching all listlikes here (e.g. frozenset, tuple) - # The ambiguous case is object-dtype. See GH#27803 - if len(self) != len(other): - raise ValueError("Lengths must match to compare") - lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) - if should_extension_dispatch(lvalues, rvalues): - res_values = dispatch_to_extension_op(op, lvalues, rvalues) - - elif is_scalar(rvalues) and isna(rvalues): - # numpy does not like comparisons vs None - if op is operator.ne: - res_values = np.ones(len(lvalues), dtype=bool) - else: - res_values = np.zeros(len(lvalues), dtype=bool) - - elif is_object_dtype(lvalues.dtype): - res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) - - else: - op_name = "__{op}__".format(op=op.__name__) - method = getattr(lvalues, op_name) - with np.errstate(all="ignore"): - res_values = method(rvalues) - - if res_values is NotImplemented: - res_values = invalid_comparison(lvalues, rvalues, op) - if is_scalar(res_values): - raise TypeError( - "Could not compare {typ} type with Series".format(typ=type(rvalues)) - ) + res_values = comparison_op(lvalues, rvalues, op) result = self._constructor(res_values, index=self.index) result = finalizer(result) @@ -762,58 +699,7 @@ def _bool_method_SERIES(cls, op, special): """ op_name = _get_op_name(op, special) - def na_op(x, y): - try: - result = op(x, y) - except TypeError: - assert not isinstance(y, (list, ABCSeries, ABCIndexClass)) - if isinstance(y, np.ndarray): - # bool-bool dtype operations should be OK, should not get here - assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)) - x = ensure_object(x) - y = ensure_object(y) - result = libops.vec_binop(x, y, op) - else: - # let null fall thru - assert lib.is_scalar(y) - if not isna(y): - y = bool(y) - try: - result = libops.scalar_binop(x, y, op) - except ( - TypeError, - ValueError, - AttributeError, - OverflowError, - NotImplementedError, - ): - raise TypeError( - "cannot compare a dtyped [{dtype}] array " - "with a scalar of type [{typ}]".format( - dtype=x.dtype, typ=type(y).__name__ - ) - ) - - return result - - fill_int = lambda x: x - - def fill_bool(x, left=None): - # if `left` is specifically not-boolean, we do not cast to bool - if x.dtype.kind in ["c", "f", "O"]: - # dtypes that can hold NA - mask = isna(x) - if mask.any(): - x = x.astype(object) - x[mask] = False - - if left is None or is_bool_dtype(left.dtype): - x = x.astype(bool) - return x - def wrapper(self, other): - is_self_int_dtype = is_integer_dtype(self.dtype) - self, other = _align_method_SERIES(self, other, align_asobject=True) res_name = get_op_result_name(self, other) @@ -829,33 +715,10 @@ def wrapper(self, other): # Defer to DataFrame implementation; fail early return NotImplemented - other = lib.item_from_zerodim(other) - if is_list_like(other) and not hasattr(other, "dtype"): - # e.g. list, tuple - other = construct_1d_object_array_from_listlike(other) - lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) - if should_extension_dispatch(self, rvalues): - res_values = dispatch_to_extension_op(op, lvalues, rvalues) - - else: - if isinstance(rvalues, (ABCSeries, ABCIndexClass, np.ndarray)): - is_other_int_dtype = is_integer_dtype(rvalues.dtype) - rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues) - - else: - # i.e. scalar - is_other_int_dtype = lib.is_integer(rvalues) - - # For int vs int `^`, `|`, `&` are bitwise operators and return - # integer dtypes. Otherwise these are boolean ops - filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool - - res_values = na_op(lvalues, rvalues) - res_values = filler(res_values) - + res_values = logical_op(lvalues, rvalues, op) result = self._constructor(res_values, index=self.index, name=res_name) return finalizer(result) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index d8b00bc629d44..b72ef69ede199 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -2,20 +2,41 @@ Functions for arithmetic and comparison operations on NumPy arrays and ExtensionArrays. """ +import operator +from typing import Any, Dict, Union + import numpy as np -from pandas._libs import ops as libops +from pandas._libs import Timestamp, lib, ops as libops from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, find_common_type, maybe_upcast_putmask, ) -from pandas.core.dtypes.common import is_object_dtype, is_scalar -from pandas.core.dtypes.generic import ABCIndex, ABCSeries -from pandas.core.dtypes.missing import notna +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDatetimeArray, + ABCDatetimeIndex, + ABCExtensionArray, + ABCIndex, + ABCIndexClass, + ABCSeries, + ABCTimedeltaArray, + ABCTimedeltaIndex, +) +from pandas.core.dtypes.missing import isna, notna +from pandas.core.construction import extract_array from pandas.core.ops import missing +from pandas.core.ops.invalid import invalid_comparison from pandas.core.ops.roperator import rpow @@ -132,3 +153,233 @@ def na_arithmetic_op(left, right, op, str_rep, eval_kwargs): result = masked_arith_op(left, right, op) return missing.dispatch_fill_zeros(op, left, right, result) + + +def arithmetic_op( + left: Union[np.ndarray, ABCExtensionArray], + right: Any, + op, + str_rep: str, + eval_kwargs: Dict[str, str], +): + """ + Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame or Index. Series is *not* excluded. + op : {operator.add, operator.sub, ...} + Or one of the reversed variants from roperator. + + Returns + ------- + ndarrray or ExtensionArray + Or a 2-tuple of these in the case of divmod or rdivmod. + """ + + from pandas.core.ops import ( + maybe_upcast_for_op, + should_extension_dispatch, + dispatch_to_extension_op, + ) + + keep_null_freq = isinstance( + right, + ( + ABCDatetimeIndex, + ABCDatetimeArray, + ABCTimedeltaIndex, + ABCTimedeltaArray, + Timestamp, + ), + ) + + # NB: We assume that extract_array has already been called on `left`, but + # cannot make the same assumption about `right`. This is because we need + # to define `keep_null_freq` before calling extract_array on it. + lvalues = left + rvalues = extract_array(right, extract_numpy=True) + + rvalues = maybe_upcast_for_op(rvalues, lvalues.shape) + + if should_extension_dispatch(left, rvalues) or isinstance( + rvalues, (ABCTimedeltaArray, ABCDatetimeArray, Timestamp) + ): + # TimedeltaArray, DatetimeArray, and Timestamp are included here + # because they have `freq` attribute which is handled correctly + # by dispatch_to_extension_op. + res_values = dispatch_to_extension_op(op, lvalues, rvalues, keep_null_freq) + + else: + with np.errstate(all="ignore"): + res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep, eval_kwargs) + + return res_values + + +def comparison_op( + left: Union[np.ndarray, ABCExtensionArray], right: Any, op +) -> Union[np.ndarray, ABCExtensionArray]: + """ + Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame, Series, or Index. + op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} + + Returns + ------- + ndarrray or ExtensionArray + """ + from pandas.core.ops import should_extension_dispatch, dispatch_to_extension_op + + # NB: We assume extract_array has already been called on left and right + lvalues = left + rvalues = right + + rvalues = lib.item_from_zerodim(rvalues) + if isinstance(rvalues, list): + # TODO: same for tuples? + rvalues = np.asarray(rvalues) + + if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)): + # TODO: make this treatment consistent across ops and classes. + # We are not catching all listlikes here (e.g. frozenset, tuple) + # The ambiguous case is object-dtype. See GH#27803 + if len(lvalues) != len(rvalues): + raise ValueError("Lengths must match to compare") + + if should_extension_dispatch(lvalues, rvalues): + res_values = dispatch_to_extension_op(op, lvalues, rvalues) + + elif is_scalar(rvalues) and isna(rvalues): + # numpy does not like comparisons vs None + if op is operator.ne: + res_values = np.ones(len(lvalues), dtype=bool) + else: + res_values = np.zeros(len(lvalues), dtype=bool) + + elif is_object_dtype(lvalues.dtype): + res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) + + else: + op_name = "__{op}__".format(op=op.__name__) + method = getattr(lvalues, op_name) + with np.errstate(all="ignore"): + res_values = method(rvalues) + + if res_values is NotImplemented: + res_values = invalid_comparison(lvalues, rvalues, op) + if is_scalar(res_values): + raise TypeError( + "Could not compare {typ} type with Series".format(typ=type(rvalues)) + ) + + return res_values + + +def na_logical_op(x, y, op): + try: + result = op(x, y) + except TypeError: + if isinstance(y, np.ndarray): + # bool-bool dtype operations should be OK, should not get here + assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)) + x = ensure_object(x) + y = ensure_object(y) + result = libops.vec_binop(x, y, op) + else: + # let null fall thru + assert lib.is_scalar(y) + if not isna(y): + y = bool(y) + try: + result = libops.scalar_binop(x, y, op) + except ( + TypeError, + ValueError, + AttributeError, + OverflowError, + NotImplementedError, + ): + raise TypeError( + "cannot compare a dtyped [{dtype}] array " + "with a scalar of type [{typ}]".format( + dtype=x.dtype, typ=type(y).__name__ + ) + ) + + return result + + +def logical_op( + left: Union[np.ndarray, ABCExtensionArray], right: Any, op +) -> Union[np.ndarray, ABCExtensionArray]: + """ + Evaluate a logical operation `|`, `&`, or `^`. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame, Series, or Index. + op : {operator.and_, operator.or_, operator.xor} + Or one of the reversed variants from roperator. + + Returns + ------- + ndarrray or ExtensionArray + """ + from pandas.core.ops import should_extension_dispatch, dispatch_to_extension_op + + fill_int = lambda x: x + + def fill_bool(x, left=None): + # if `left` is specifically not-boolean, we do not cast to bool + if x.dtype.kind in ["c", "f", "O"]: + # dtypes that can hold NA + mask = isna(x) + if mask.any(): + x = x.astype(object) + x[mask] = False + + if left is None or is_bool_dtype(left.dtype): + x = x.astype(bool) + return x + + is_self_int_dtype = is_integer_dtype(left.dtype) + + right = lib.item_from_zerodim(right) + if is_list_like(right) and not hasattr(right, "dtype"): + # e.g. list, tuple + right = construct_1d_object_array_from_listlike(right) + + # NB: We assume extract_array has already been called on left and right + lvalues = left + rvalues = right + + if should_extension_dispatch(lvalues, rvalues): + res_values = dispatch_to_extension_op(op, lvalues, rvalues) + + else: + if isinstance(rvalues, np.ndarray): + is_other_int_dtype = is_integer_dtype(rvalues.dtype) + rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues) + + else: + # i.e. scalar + is_other_int_dtype = lib.is_integer(rvalues) + + # For int vs int `^`, `|`, `&` are bitwise operators and return + # integer dtypes. Otherwise these are boolean ops + filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool + + res_values = na_logical_op(lvalues, rvalues, op) + res_values = filler(res_values) # type: ignore + + return res_values