diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bda5f8f4326f1..dc90bcf2239ac 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -328,6 +328,32 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray) _generate_range """ + @property + def ndim(self): + return self._data.ndim + + @property + def shape(self): + return self._data.shape + + def __len__(self): + return len(self._data) + + @property + def T(self): + # Note: we drop any freq + return type(self)(self._data.T, dtype=self.dtype) + + def reshape(self, *args, **kwargs): + # Note: we drop any freq + data = self._data.reshape(*args, **kwargs) + return type(self)(data, dtype=self.dtype) + + def ravel(self, *args, **kwargs): + # Note: we drop any freq + data = self._data.ravel(*args, **kwargs) + return type(self)(data, dtype=self.dtype) + @property def _box_func(self): """ @@ -396,9 +422,6 @@ def size(self) -> int: """The number of elements in this array.""" return np.prod(self.shape) - def __len__(self): - return len(self._data) - def __getitem__(self, key): """ This getitem defers to the underlying array, which by-definition can @@ -1032,7 +1055,7 @@ def _add_nat(self): # GH#19124 pd.NaT is treated like a timedelta for both timedelta # and datetime dtypes - result = np.zeros(len(self), dtype=np.int64) + result = np.zeros(self.shape, dtype=np.int64) result.fill(iNaT) return type(self)(result, dtype=self.dtype, freq=None) @@ -1046,7 +1069,7 @@ def _sub_nat(self): # For datetime64 dtypes by convention we treat NaT as a datetime, so # this subtraction returns a timedelta64 dtype. # For period dtype, timedelta64 is a close-enough return dtype. - result = np.zeros(len(self), dtype=np.int64) + result = np.zeros(self.shape, dtype=np.int64) result.fill(iNaT) return result.view("timedelta64[ns]") diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 0335058a69c63..c0283915f7d92 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -76,6 +76,17 @@ """ +def compat_2d(meth): + def new_meth(self, *args, **kwargs): + if self.ndim > 1: + result = meth(self.ravel(), *args, **kwargs) + return result.reshape(self.shape) + return meth(self, *args, **kwargs) + + new_meth.__name__ = meth.__name__ + return new_meth + + def tz_to_dtype(tz): """ Return a datetime64[ns] dtype appropriate for the given timezone. @@ -361,7 +372,7 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): "ndarray, or Series or Index containing one of those." ) raise ValueError(msg.format(type(values).__name__)) - if values.ndim != 1: + if values.ndim not in [1, 2]: raise ValueError("Only 1-dimensional input arrays are supported.") if values.dtype == "i8": @@ -818,6 +829,7 @@ def _sub_datetime_arraylike(self, other): new_values[arr_mask] = iNaT return new_values.view("timedelta64[ns]") + @compat_2d def _add_offset(self, offset): assert not isinstance(offset, Tick) try: @@ -825,6 +837,7 @@ def _add_offset(self, offset): values = self.tz_localize(None) else: values = self + result = offset.apply_index(values) if self.tz is not None: result = result.tz_localize(self.tz) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 6c9462ff4fa4d..81588b0dee96d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -222,7 +222,7 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): "ndarray, or Series or Index containing one of those." ) raise ValueError(msg.format(type(values).__name__)) - if values.ndim != 1: + if values.ndim not in [1, 2]: raise ValueError("Only 1-dimensional input arrays are supported.") if values.dtype == "i8": @@ -1076,7 +1076,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): ) data = np.array(data, copy=copy) - if data.ndim != 1: + if data.ndim not in [1, 2]: raise ValueError("Only 1-dimensional input arrays are supported.") assert data.dtype == "m8[ns]", data diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 398fa9b0c1fc0..0e90a395d7f7e 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -12,7 +12,11 @@ from pandas._libs import Timedelta, Timestamp, lib from pandas.util._decorators import Appender -from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype +from pandas.core.dtypes.common import ( + is_extension_array_dtype, + is_list_like, + is_timedelta64_dtype, +) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCExtensionArray, @@ -24,6 +28,7 @@ from pandas.core.construction import extract_array from pandas.core.ops.array_ops import ( arithmetic_op, + array_op, comparison_op, define_na_arithmetic_op, logical_op, @@ -350,7 +355,7 @@ def fill_binop(left, right, fill_value): # Dispatch logic -def dispatch_to_series(left, right, func, str_rep=None, axis=None): +def dispatch_to_series(left, right, func, str_rep=None, axis=None, eval_kwargs=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -369,11 +374,57 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. + eval_kwargs = eval_kwargs or {} + import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) + if lib.is_scalar(right) or np.ndim(right) == 0: + new_blocks = [] + mgr = left._data + for blk in mgr.blocks: + # Reshape for EA Block + blk_vals = blk.values + if hasattr(blk_vals, "reshape"): + # ndarray, DTA/TDA/PA + blk_vals = blk_vals.reshape(blk.shape) + blk_vals = blk_vals.T + + new_vals = array_op(blk_vals, right, func, str_rep, eval_kwargs) + + # Reshape for EA Block + if is_extension_array_dtype(new_vals.dtype): + from pandas.core.internals.blocks import make_block + + if hasattr(new_vals, "reshape"): + # ndarray, DTA/TDA/PA + new_vals = new_vals.reshape(blk.shape[::-1]) + assert new_vals.shape[-1] == len(blk.mgr_locs) + for i in range(new_vals.shape[-1]): + nb = make_block(new_vals[..., i], placement=[blk.mgr_locs[i]]) + new_blocks.append(nb) + else: + # Categorical, IntegerArray + assert len(blk.mgr_locs) == 1 + assert new_vals.shape == (blk.shape[-1],) + nb = make_block(new_vals, placement=blk.mgr_locs, ndim=2) + new_blocks.append(nb) + elif blk.values.ndim == 1: + # need to bump up to 2D + new_vals = new_vals.reshape(-1, 1) + assert new_vals.T.shape == blk.shape + nb = blk.make_block(new_vals.T) + new_blocks.append(nb) + else: + assert new_vals.T.shape == blk.shape + nb = blk.make_block(new_vals.T) + new_blocks.append(nb) + + bm = type(mgr)(new_blocks, mgr.axes) + return type(left)(bm) + def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} @@ -526,7 +577,7 @@ def wrapper(self, other): lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) - res_values = comparison_op(lvalues, rvalues, op) + res_values = comparison_op(lvalues, rvalues, op, None, {}) return _construct_result(self, res_values, index=self.index, name=res_name) @@ -552,7 +603,7 @@ def wrapper(self, other): lvalues = extract_array(self, extract_numpy=True) rvalues = extract_array(other, extract_numpy=True) - res_values = logical_op(lvalues, rvalues, op) + res_values = logical_op(lvalues, rvalues, op, None, {}) return _construct_result(self, res_values, index=self.index, name=res_name) wrapper.__name__ = op_name @@ -723,7 +774,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if fill_value is not None: self = self.fillna(fill_value) - new_data = dispatch_to_series(self, other, op) + new_data = dispatch_to_series( + self, other, op, str_rep=str_rep, eval_kwargs=eval_kwargs + ) return self._construct_result(new_data) f.__name__ = op_name @@ -749,7 +802,9 @@ def f(self, other, axis=default_axis, level=None): # Another DataFrame if not self._indexed_same(other): self, other = self.align(other, "outer", level=level, copy=False) - new_data = dispatch_to_series(self, other, op, str_rep) + new_data = dispatch_to_series( + self, other, op, str_rep=str_rep, eval_kwargs={} + ) return self._construct_result(new_data) elif isinstance(other, ABCSeries): @@ -781,7 +836,9 @@ def f(self, other): raise ValueError( "Can only compare identically-labeled DataFrame objects" ) - new_data = dispatch_to_series(self, other, op, str_rep) + new_data = dispatch_to_series( + self, other, op, str_rep=str_rep, eval_kwargs={} + ) return self._construct_result(new_data) elif isinstance(other, ABCSeries): diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index a225eec93b27e..c4cb2178c5035 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -56,7 +56,7 @@ def comp_method_OBJECT_ARRAY(op, x, y): result = libops.vec_compare(x, y, op) else: - result = libops.scalar_compare(x, y, op) + result = libops.scalar_compare(x.ravel(), y, op).reshape(x.shape) return result @@ -156,12 +156,22 @@ def na_arithmetic_op(left, right, op, str_rep: str, eval_kwargs): return missing.dispatch_fill_zeros(op, left, right, result) +def array_op(left, right, op, str_rep, eval_kwargs): + op_name = op.__name__.strip("_") + if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: + return comparison_op(left, right, op, str_rep, eval_kwargs) + elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: + return logical_op(left, right, op, str_rep, eval_kwargs) + else: + return arithmetic_op(left, right, op, str_rep, eval_kwargs) + + def arithmetic_op( left: Union[np.ndarray, ABCExtensionArray], right: Any, op, str_rep: str, - eval_kwargs: Dict[str, str], + eval_kwargs: Dict[str, bool], ): """ Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... @@ -218,7 +228,7 @@ def arithmetic_op( def comparison_op( - left: Union[np.ndarray, ABCExtensionArray], right: Any, op + left: Union[np.ndarray, ABCExtensionArray], right: Any, op, str_rep, eval_kwargs ) -> Union[np.ndarray, ABCExtensionArray]: """ Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. @@ -256,10 +266,11 @@ def comparison_op( elif is_scalar(rvalues) and isna(rvalues): # numpy does not like comparisons vs None + # TODO: Should we be using invalid_comparison here? if op is operator.ne: - res_values = np.ones(len(lvalues), dtype=bool) + res_values = np.ones(lvalues.shape, dtype=bool) else: - res_values = np.zeros(len(lvalues), dtype=bool) + res_values = np.zeros(lvalues.shape, dtype=bool) elif is_object_dtype(lvalues.dtype): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) @@ -323,7 +334,7 @@ def na_logical_op(x: np.ndarray, y, op): def logical_op( - left: Union[np.ndarray, ABCExtensionArray], right: Any, op + left: Union[np.ndarray, ABCExtensionArray], right: Any, op, str_rep, eval_kwargs ) -> Union[np.ndarray, ABCExtensionArray]: """ Evaluate a logical operation `|`, `&`, or `^`. diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index ed693d873efb8..d4fdeffa2c2db 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -755,18 +755,18 @@ def test_pi_sub_isub_offset(self): rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) - def test_pi_add_offset_n_gt1(self, box_transpose_fail): + def test_pi_add_offset_n_gt1(self, box_with_array): # GH#23215 # add offset to PeriodIndex with freq.n > 1 - box, transpose = box_transpose_fail + box = box_with_array per = pd.Period("2016-01", freq="2M") pi = pd.PeriodIndex([per]) expected = pd.PeriodIndex(["2016-03"], freq="2M") - pi = tm.box_expected(pi, box, transpose=transpose) - expected = tm.box_expected(expected, box, transpose=transpose) + pi = tm.box_expected(pi, box) + expected = tm.box_expected(expected, box) result = pi + per.freq tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index c3cda22497ecb..acb419c09210f 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -23,9 +23,8 @@ def test_from_sequence_invalid_type(self): def test_only_1dim_accepted(self): arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") - with pytest.raises(ValueError, match="Only 1-dimensional"): - # 2-dim - DatetimeArray(arr.reshape(2, 2)) + # 2-dim allowed for ops compat + DatetimeArray(arr.reshape(2, 2)) with pytest.raises(ValueError, match="Only 1-dimensional"): # 0-dim diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 42e7bee97e671..ddfd2339e6c49 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -11,9 +11,8 @@ def test_only_1dim_accepted(self): # GH#25282 arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") - with pytest.raises(ValueError, match="Only 1-dimensional"): - # 2-dim - TimedeltaArray(arr.reshape(2, 2)) + # 2-dim allowed for ops compat + TimedeltaArray(arr.reshape(2, 2)) with pytest.raises(ValueError, match="Only 1-dimensional"): # 0-dim diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 82c197ac054f0..f5f6c9ad6b3da 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -122,7 +122,8 @@ def test_ops(self): result = getattr(df, rop)(m) assert_frame_equal(result, expected) - # GH7192 + # GH7192: Note we need a large number of rows to ensure this + # goes through the numexpr path df = DataFrame(dict(A=np.random.randn(25000))) df.iloc[0:5] = np.nan expected = 1 - np.isnan(df.iloc[0:25])