diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0af7b6d80ce0e..3aaca1eea486e 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -20,7 +20,6 @@ from pandas.compat import bind_method import pandas.core.missing as missing import pandas.algos as _algos -import pandas.core.algorithms as algos from pandas.core.common import (is_list_like, notnull, isnull, _values_from_object, _maybe_match_name, needs_i8_conversion, is_datetimelike_v_numeric, @@ -258,30 +257,87 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, exclude=exclude) -class _TimeOp(object): +class _Op(object): + """ - Wrapper around Series datetime/time/timedelta arithmetic operations. - Generally, you should use classmethod ``maybe_convert_for_time_op`` as an - entry point. + Wrapper around Series arithmetic operations. + Generally, you should use classmethod ``_Op.get_op`` as an entry point. + + This validates and coerces lhs and rhs depending on its dtype and + based on op. See _TimeOp also. + + Parameters + ---------- + left : Series + lhs of op + right : object + rhs of op + name : str + name of op + na_op : callable + a function which wraps op """ - fill_value = iNaT + + fill_value = np.nan wrap_results = staticmethod(lambda x: x) dtype = None def __init__(self, left, right, name, na_op): + self.left = left + self.right = right + + self.name = name + self.na_op = na_op + + self.lvalues = left + self.rvalues = right + + @classmethod + def get_op(cls, left, right, name, na_op): + """ + Get op dispatcher, returns _Op or _TimeOp. + + If ``left`` and ``right`` are appropriate for datetime arithmetic with + operation ``name``, processes them and returns a ``_TimeOp`` object + that stores all the required values. Otherwise, it will generate + either a ``_Op``, indicating that the operation is performed via + normal numpy path. + """ + is_timedelta_lhs = is_timedelta64_dtype(left) + is_datetime_lhs = (is_datetime64_dtype(left) or + is_datetime64tz_dtype(left)) - # need to make sure that we are aligning the data if isinstance(left, ABCSeries) and isinstance(right, ABCSeries): - left, right = left.align(right, copy=False) + # avoid repated alignment + if not left.index.equals(right.index): + left, right = left.align(right, copy=False) + + index, lidx, ridx = left.index.join(right.index, how='outer', + return_indexers=True) + # if DatetimeIndex have different tz, convert to UTC + left.index = index + right.index = index + + if not (is_datetime_lhs or is_timedelta_lhs): + return _Op(left, right, name, na_op) + else: + return _TimeOp(left, right, name, na_op) + + +class _TimeOp(_Op): + """ + Wrapper around Series datetime/time/timedelta arithmetic operations. + Generally, you should use classmethod ``_Op.get_op`` as an entry point. + """ + fill_value = iNaT + + def __init__(self, left, right, name, na_op): + super(_TimeOp, self).__init__(left, right, name, na_op) lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) - self.name = name - self.na_op = na_op - # left - self.left = left self.is_offset_lhs = self._is_offset(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) self.is_datetime64_lhs = is_datetime64_dtype(lvalues) @@ -292,7 +348,6 @@ def __init__(self, left, right, name, na_op): self.is_floating_lhs = left.dtype.kind == 'f' # right - self.right = right self.is_offset_rhs = self._is_offset(right) self.is_datetime64_rhs = is_datetime64_dtype(rvalues) self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) @@ -543,26 +598,6 @@ def _is_offset(self, arr_or_obj): else: return False - @classmethod - def maybe_convert_for_time_op(cls, left, right, name, na_op): - """ - if ``left`` and ``right`` are appropriate for datetime arithmetic with - operation ``name``, processes them and returns a ``_TimeOp`` object - that stores all the required values. Otherwise, it will generate - either a ``NotImplementedError`` or ``None``, indicating that the - operation is unsupported for datetimes (e.g., an unsupported r_op) or - that the data is not the right type for time ops. - """ - # decide if we can do it - is_timedelta_lhs = is_timedelta64_dtype(left) - is_datetime_lhs = (is_datetime64_dtype(left) or - is_datetime64tz_dtype(left)) - - if not (is_datetime_lhs or is_timedelta_lhs): - return None - - return cls(left, right, name, na_op) - def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None, **eval_kwargs): @@ -615,53 +650,28 @@ def wrapper(left, right, name=name, na_op=na_op): if isinstance(right, pd.DataFrame): return NotImplemented - time_converted = _TimeOp.maybe_convert_for_time_op(left, right, name, - na_op) + converted = _Op.get_op(left, right, name, na_op) - if time_converted is None: - lvalues, rvalues = left, right - dtype = None - wrap_results = lambda x: x - elif time_converted is NotImplemented: - return NotImplemented - else: - left, right = time_converted.left, time_converted.right - lvalues, rvalues = time_converted.lvalues, time_converted.rvalues - dtype = time_converted.dtype - wrap_results = time_converted.wrap_results - na_op = time_converted.na_op + left, right = converted.left, converted.right + lvalues, rvalues = converted.lvalues, converted.rvalues + dtype = converted.dtype + wrap_results = converted.wrap_results + na_op = converted.na_op if isinstance(rvalues, ABCSeries): - rindex = getattr(rvalues, 'index', rvalues) name = _maybe_match_name(left, rvalues) lvalues = getattr(lvalues, 'values', lvalues) rvalues = getattr(rvalues, 'values', rvalues) - if left.index.equals(rindex): - index = left.index - else: - index, lidx, ridx = left.index.join(rindex, how='outer', - return_indexers=True) - - if lidx is not None: - lvalues = algos.take_1d(lvalues, lidx) - - if ridx is not None: - rvalues = algos.take_1d(rvalues, ridx) - - result = wrap_results(safe_na_op(lvalues, rvalues)) - return left._constructor(result, index=index, - name=name, dtype=dtype) + # _Op aligns left and right else: - # scalars + name = left.name if (hasattr(lvalues, 'values') and not isinstance(lvalues, pd.DatetimeIndex)): lvalues = lvalues.values - result = wrap_results(safe_na_op(lvalues, rvalues)) - return left._constructor(result, - index=left.index, name=left.name, - dtype=dtype) - + result = wrap_results(safe_na_op(lvalues, rvalues)) + return left._constructor(result, index=left.index, + name=name, dtype=dtype) return wrapper @@ -895,6 +905,32 @@ def wrapper(self, other): _op_descriptions[reverse_op]['reverse'] = k +_flex_doc_SERIES = """ +%s of series and other, element-wise (binary operator `%s`). + +Equivalent to ``%s``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other: Series or scalar value +fill_value : None or float value, default None (NaN) + Fill missing (NaN) values with this value. If both Series are + missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Returns +------- +result : Series + +See also +-------- +Series.%s +""" + + def _flex_method_SERIES(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs): op_name = name.replace('__', '') @@ -904,30 +940,8 @@ def _flex_method_SERIES(op, name, str_rep, default_axis=None, fill_zeros=None, else: equiv = 'series ' + op_desc['op'] + ' other' - doc = """ - %s of series and other, element-wise (binary operator `%s`). - - Equivalent to ``%s``, but with support to substitute a fill_value for - missing data in one of the inputs. - - Parameters - ---------- - other: Series or scalar value - fill_value : None or float value, default None (NaN) - Fill missing (NaN) values with this value. If both Series are - missing, the result will be missing - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - - Returns - ------- - result : Series - - See also - -------- - Series.%s - """ % (op_desc['desc'], op_name, equiv, op_desc['reverse']) + doc = _flex_doc_SERIES % (op_desc['desc'], op_name, equiv, + op_desc['reverse']) @Appender(doc) def flex_wrapper(self, other, level=None, fill_value=None, axis=0): @@ -983,6 +997,75 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): result : DataFrame """ +_flex_doc_FRAME = """ +%s of dataframe and other, element-wise (binary operator `%s`). + +Equivalent to ``%s``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other : Series, DataFrame, or constant +axis : {0, 1, 'index', 'columns'} + For Series input, axis to match Series index on +fill_value : None or float value, default None + Fill missing (NaN) values with this value. If both DataFrame + locations are missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Notes +----- +Mismatched indices will be unioned together + +Returns +------- +result : DataFrame + +See also +-------- +DataFrame.%s +""" + + +def _align_method_FRAME(left, right, axis): + """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ + + def to_series(right): + msg = 'Unable to coerce to Series, length must be {0}: given {1}' + if axis is not None and left._get_axis_name(axis) == 'index': + if len(left.index) != len(right): + raise ValueError(msg.format(len(left.index), len(right))) + right = left._constructor_sliced(right, index=left.index) + else: + if len(left.columns) != len(right): + raise ValueError(msg.format(len(left.columns), len(right))) + right = left._constructor_sliced(right, index=left.columns) + return right + + if isinstance(right, (list, tuple)): + right = to_series(right) + + elif isinstance(right, np.ndarray) and right.ndim: # skips np scalar + + if right.ndim == 1: + right = to_series(right) + + elif right.ndim == 2: + if left.shape != right.shape: + msg = ("Unable to coerce to DataFrame, " + "shape must be {0}: given {1}") + raise ValueError(msg.format(left.shape, right.shape)) + + right = left._constructor(right, index=left.index, + columns=left.columns) + else: + msg = 'Unable to coerce to Series/DataFrame, dim must be <= 2: {0}' + raise ValueError(msg.format(right.shape, )) + + return right + def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns', fill_zeros=None, **eval_kwargs): @@ -1027,75 +1110,20 @@ def na_op(x, y): else: equiv = 'dataframe ' + op_desc['op'] + ' other' - doc = """ - %s of dataframe and other, element-wise (binary operator `%s`). - - Equivalent to ``%s``, but with support to substitute a fill_value for - missing data in one of the inputs. - - Parameters - ---------- - other : Series, DataFrame, or constant - axis : {0, 1, 'index', 'columns'} - For Series input, axis to match Series index on - fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame - locations are missing, the result will be missing - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - - Notes - ----- - Mismatched indices will be unioned together - - Returns - ------- - result : DataFrame - - See also - -------- - DataFrame.%s - """ % (op_desc['desc'], op_name, equiv, op_desc['reverse']) + doc = _flex_doc_FRAME % (op_desc['desc'], op_name, equiv, + op_desc['reverse']) else: doc = _arith_doc_FRAME % name @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): + + other = _align_method_FRAME(self, other, axis) + if isinstance(other, pd.DataFrame): # Another DataFrame return self._combine_frame(other, na_op, fill_value, level) elif isinstance(other, ABCSeries): return self._combine_series(other, na_op, fill_value, axis, level) - elif isinstance(other, (list, tuple)): - if axis is not None and self._get_axis_name(axis) == 'index': - # TODO: Get all of these to use _constructor_sliced - # casted = self._constructor_sliced(other, index=self.index) - casted = pd.Series(other, index=self.index) - else: - # casted = self._constructor_sliced(other, index=self.columns) - casted = pd.Series(other, index=self.columns) - return self._combine_series(casted, na_op, fill_value, axis, level) - elif isinstance(other, np.ndarray) and other.ndim: # skips np scalar - if other.ndim == 1: - if axis is not None and self._get_axis_name(axis) == 'index': - # casted = self._constructor_sliced(other, - # index=self.index) - casted = pd.Series(other, index=self.index) - else: - # casted = self._constructor_sliced(other, - # index=self.columns) - casted = pd.Series(other, index=self.columns) - return self._combine_series(casted, na_op, fill_value, axis, - level) - elif other.ndim == 2: - # casted = self._constructor(other, index=self.index, - # columns=self.columns) - casted = pd.DataFrame(other, index=self.index, - columns=self.columns) - return self._combine_frame(casted, na_op, fill_value, level) - else: - raise ValueError("Incompatible argument shape: %s" % - (other.shape, )) else: if fill_value is not None: self = self.fillna(fill_value) @@ -1135,39 +1163,14 @@ def na_op(x, y): @Appender('Wrapper for flexible comparison methods %s' % name) def f(self, other, axis=default_axis, level=None): + + other = _align_method_FRAME(self, other, axis) + if isinstance(other, pd.DataFrame): # Another DataFrame return self._flex_compare_frame(other, na_op, str_rep, level) elif isinstance(other, ABCSeries): return self._combine_series(other, na_op, None, axis, level) - - elif isinstance(other, (list, tuple)): - if axis is not None and self._get_axis_name(axis) == 'index': - casted = pd.Series(other, index=self.index) - else: - casted = pd.Series(other, index=self.columns) - - return self._combine_series(casted, na_op, None, axis, level) - - elif isinstance(other, np.ndarray): - if other.ndim == 1: - if axis is not None and self._get_axis_name(axis) == 'index': - casted = pd.Series(other, index=self.index) - else: - casted = pd.Series(other, index=self.columns) - - return self._combine_series(casted, na_op, None, axis, level) - - elif other.ndim == 2: - casted = pd.DataFrame(other, index=self.index, - columns=self.columns) - - return self._flex_compare_frame(casted, na_op, str_rep, level) - - else: - raise ValueError("Incompatible argument shape: %s" % - (other.shape, )) - else: return self._combine_const(other, na_op) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index ee7c296f563f0..e2e0f568e4098 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -417,10 +417,11 @@ def test_arith_flex_frame(self): # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) - with assertRaisesRegexp(ValueError, 'shape'): + msg = "Unable to coerce to Series/DataFrame" + with assertRaisesRegexp(ValueError, msg): f(self.frame, ndim_5) - with assertRaisesRegexp(ValueError, 'shape'): + with assertRaisesRegexp(ValueError, msg): getattr(self.frame, op)(ndim_5) # res_add = self.frame.add(self.frame) @@ -581,8 +582,9 @@ def _check_unaligned_frame(meth, op, df, other): # scalar assert_frame_equal(f(0), o(df, 0)) # NAs + msg = "Unable to coerce to Series/DataFrame" assert_frame_equal(f(np.nan), o(df, np.nan)) - with assertRaisesRegexp(ValueError, 'shape'): + with assertRaisesRegexp(ValueError, msg): f(ndim_5) # Series @@ -662,6 +664,17 @@ def _test_seq(df, idx_ser, col_ser): exp = DataFrame({'col': [False, True, False]}) assert_frame_equal(result, exp) + def test_dti_tz_convert_to_utc(self): + base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', + '2011-01-03'], tz='UTC') + idx1 = base.tz_convert('Asia/Tokyo')[:2] + idx2 = base.tz_convert('US/Eastern')[1:] + + df1 = DataFrame({'A': [1, 2]}, index=idx1) + df2 = DataFrame({'A': [1, 1]}, index=idx2) + exp = DataFrame({'A': [np.nan, 3, np.nan]}, index=base) + assert_frame_equal(df1 + df2, exp) + def test_arith_flex_series(self): df = self.simple @@ -1176,6 +1189,53 @@ def test_inplace_ops_identity(self): assert_frame_equal(df2, expected) self.assertIs(df._data, df2._data) + def test_alignment_non_pandas(self): + index = ['A', 'B', 'C'] + columns = ['X', 'Y', 'Z'] + df = pd.DataFrame(np.random.randn(3, 3), index=index, columns=columns) + + align = pd.core.ops._align_method_FRAME + + for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3])]: + + tm.assert_series_equal(align(df, val, 'index'), + Series([1, 2, 3], index=df.index)) + tm.assert_series_equal(align(df, val, 'columns'), + Series([1, 2, 3], index=df.columns)) + + # length mismatch + msg = 'Unable to coerce to Series, length must be 3: given 2' + for val in [[1, 2], (1, 2), np.array([1, 2])]: + with tm.assertRaisesRegexp(ValueError, msg): + align(df, val, 'index') + + with tm.assertRaisesRegexp(ValueError, msg): + align(df, val, 'columns') + + val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal(align(df, val, 'index'), + DataFrame(val, index=df.index, + columns=df.columns)) + tm.assert_frame_equal(align(df, val, 'columns'), + DataFrame(val, index=df.index, + columns=df.columns)) + + # shape mismatch + msg = 'Unable to coerce to DataFrame, shape must be' + val = np.array([[1, 2, 3], [4, 5, 6]]) + with tm.assertRaisesRegexp(ValueError, msg): + align(df, val, 'index') + + with tm.assertRaisesRegexp(ValueError, msg): + align(df, val, 'columns') + + val = np.zeros((3, 3, 3)) + with tm.assertRaises(ValueError): + align(df, val, 'index') + with tm.assertRaises(ValueError): + align(df, val, 'columns') + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 9c401e9ce6da8..5ebe528ff8cab 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -571,11 +571,11 @@ def run_ops(ops, get_ser, test_ser): td2 / td1 # ## datetime64 ### - dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), Timestamp( - '20120103')]) + dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), + Timestamp('20120103')]) dt1.iloc[2] = np.nan - dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), Timestamp( - '20120104')]) + dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), + Timestamp('20120104')]) ops = ['__add__', '__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__', '__radd__', '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', '__rpow__'] @@ -607,9 +607,10 @@ def run_ops(ops, get_ser, test_ser): ops = ['__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__', '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', '__rpow__'] - dt1 = Series( - date_range('2000-01-01 09:00:00', periods=5, - tz='US/Eastern'), name='foo') + + tz = 'US/Eastern' + dt1 = Series(date_range('2000-01-01 09:00:00', periods=5, + tz=tz), name='foo') dt2 = dt1.copy() dt2.iloc[2] = np.nan td1 = Series(timedelta_range('1 days 1 min', periods=5, freq='H')) @@ -618,58 +619,48 @@ def run_ops(ops, get_ser, test_ser): run_ops(ops, dt1, td1) result = dt1 + td1[0] - expected = ( - dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt2 + td2[0] - expected = ( - dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) # odd numpy behavior with scalar timedeltas if not _np_version_under1p8: result = td1[0] + dt1 - expected = ( - dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = td2[0] + dt2 - expected = ( - dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt1 - td1[0] - expected = ( - dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) self.assertRaises(TypeError, lambda: td1[0] - dt1) result = dt2 - td2[0] - expected = ( - dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) self.assertRaises(TypeError, lambda: td2[0] - dt2) result = dt1 + td1 - expected = ( - dt1.dt.tz_localize(None) + td1).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt2 + td2 - expected = ( - dt2.dt.tz_localize(None) + td2).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) + td2).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt1 - td1 - expected = ( - dt1.dt.tz_localize(None) - td1).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) - td1).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt2 - td2 - expected = ( - dt2.dt.tz_localize(None) - td2).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz) + assert_series_equal(result, exp) self.assertRaises(TypeError, lambda: td1 - dt1) self.assertRaises(TypeError, lambda: td2 - dt2) @@ -1555,3 +1546,12 @@ def test_datetime64_with_index(self): df['expected'] = df['date'] - df.index.to_series() df['result'] = df['date'] - df.index assert_series_equal(df['result'], df['expected'], check_names=False) + + def test_dti_tz_convert_to_utc(self): + base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + tz='UTC') + idx1 = base.tz_convert('Asia/Tokyo')[:2] + idx2 = base.tz_convert('US/Eastern')[1:] + + res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) + assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))