diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 3db2dd849ccee..ba8a15b60ba56 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -39,7 +39,8 @@ ABCSeries, ABCDataFrame, ABCIndex, - ABCPeriodIndex) + ABCPeriodIndex, + ABCSparseSeries) def _gen_eval_kwargs(name): @@ -109,6 +110,31 @@ def _gen_fill_zeros(name): return fill_value +def _get_frame_op_default_axis(name): + """ + Only DataFrame cares about default_axis, specifically: + special methods have default_axis=None and flex methods + have default_axis='columns'. + + Parameters + ---------- + name : str + + Returns + ------- + default_axis: str or None + """ + if name.replace('__r', '__') in ['__and__', '__or__', '__xor__']: + # bool methods + return 'columns' + elif name.startswith('__'): + # __add__, __mul__, ... + return None + else: + # add, mul, ... + return 'columns' + + # ----------------------------------------------------------------------------- # Docstring Generation and Templates @@ -281,17 +307,17 @@ def _gen_fill_zeros(name): _agg_doc_PANEL = """ -Wrapper method for {wrp_method} +Wrapper method for {op_name} Parameters ---------- -other : {construct} or {cls_name} -axis : {{{axis_order}}} +other : DataFrame or Panel +axis : {{items, major_axis, minor_axis}} Axis to broadcast over Returns ------- -{cls_name} +Panel """ @@ -337,14 +363,18 @@ def _make_flex_doc(op_name, typ): # methods -def _create_methods(arith_method, comp_method, bool_method, - use_numexpr, special=False, default_axis='columns', - have_divmod=False): +def _create_methods(cls, arith_method, comp_method, bool_method, + special=False): # creates actual methods based upon arithmetic, comp and bool method # constructors. - # NOTE: Only frame cares about default_axis, specifically: special methods - # have default axis None, whereas flex methods have default axis 'columns' + # numexpr is available for non-sparse classes + subtyp = getattr(cls, '_subtyp', '') + use_numexpr = 'sparse' not in subtyp + + have_divmod = issubclass(cls, ABCSeries) + # divmod is available for Series and SparseSeries + # if we're not using numexpr, then don't pass a str_rep if use_numexpr: op = lambda x: x @@ -360,44 +390,28 @@ def names(x): else: names = lambda x: x - # Inframe, all special methods have default_axis=None, flex methods have - # default_axis set to the default (columns) # yapf: disable new_methods = dict( - add=arith_method(operator.add, names('add'), op('+'), - default_axis=default_axis), - radd=arith_method(lambda x, y: y + x, names('radd'), op('+'), - default_axis=default_axis), - sub=arith_method(operator.sub, names('sub'), op('-'), - default_axis=default_axis), - mul=arith_method(operator.mul, names('mul'), op('*'), - default_axis=default_axis), - truediv=arith_method(operator.truediv, names('truediv'), op('/'), - default_axis=default_axis), - floordiv=arith_method(operator.floordiv, names('floordiv'), op('//'), - default_axis=default_axis), + add=arith_method(operator.add, names('add'), op('+')), + radd=arith_method(lambda x, y: y + x, names('radd'), op('+')), + sub=arith_method(operator.sub, names('sub'), op('-')), + mul=arith_method(operator.mul, names('mul'), op('*')), + truediv=arith_method(operator.truediv, names('truediv'), op('/')), + floordiv=arith_method(operator.floordiv, names('floordiv'), op('//')), # Causes a floating point exception in the tests when numexpr enabled, # so for now no speedup - mod=arith_method(operator.mod, names('mod'), None, - default_axis=default_axis), - pow=arith_method(operator.pow, names('pow'), op('**'), - default_axis=default_axis), + mod=arith_method(operator.mod, names('mod'), None), + pow=arith_method(operator.pow, names('pow'), op('**')), # not entirely sure why this is necessary, but previously was included # so it's here to maintain compatibility - rmul=arith_method(operator.mul, names('rmul'), op('*'), - default_axis=default_axis), - rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-'), - default_axis=default_axis), + rmul=arith_method(operator.mul, names('rmul'), op('*')), + rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-')), rtruediv=arith_method(lambda x, y: operator.truediv(y, x), - names('rtruediv'), op('/'), - default_axis=default_axis), + names('rtruediv'), op('/')), rfloordiv=arith_method(lambda x, y: operator.floordiv(y, x), - names('rfloordiv'), op('//'), - default_axis=default_axis), - rpow=arith_method(lambda x, y: y**x, names('rpow'), op('**'), - default_axis=default_axis), - rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'), - default_axis=default_axis)) + names('rfloordiv'), op('//')), + rpow=arith_method(lambda x, y: y**x, names('rpow'), op('**')), + rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'))) # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] @@ -425,10 +439,7 @@ def names(x): names('rxor'), op('^')))) if have_divmod: # divmod doesn't have an op that is supported by numexpr - new_methods['divmod'] = arith_method(divmod, - names('divmod'), - None, - default_axis=default_axis) + new_methods['divmod'] = arith_method(divmod, names('divmod'), None) new_methods = {names(k): v for k, v in new_methods.items()} return new_methods @@ -444,8 +455,7 @@ def add_methods(cls, new_methods, force): # Arithmetic def add_special_arithmetic_methods(cls, arith_method=None, comp_method=None, bool_method=None, - use_numexpr=True, force=False, - have_divmod=False): + force=False): """ Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. @@ -454,27 +464,17 @@ def add_special_arithmetic_methods(cls, arith_method=None, ---------- arith_method : function (optional) factory for special arithmetic methods, with op string: - f(op, name, str_rep, default_axis=None) + f(op, name, str_rep) comp_method : function (optional) factory for rich comparison - signature: f(op, name, str_rep) bool_method : function (optional) factory for boolean methods - signature: f(op, name, str_rep) - use_numexpr : bool, default True - whether to accelerate with numexpr, defaults to True force : bool, default False if False, checks whether function is defined **on ``cls.__dict__``** before defining if True, always defines functions on class base - have_divmod : bool, (optional) - should a divmod method be added? this method is special because it - returns a tuple of cls instead of a single element of type cls """ - - # in frame, special methods have default_axis = None, comp methods use - # 'columns' - - new_methods = _create_methods(arith_method, comp_method, - bool_method, use_numexpr, default_axis=None, - special=True, have_divmod=have_divmod) + new_methods = _create_methods(cls, arith_method, comp_method, bool_method, + special=True) # inplace operators (I feel like these should get passed an `inplace=True` # or just be removed @@ -517,7 +517,7 @@ def f(self, other): def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None, flex_bool_method=None, - use_numexpr=True, force=False): + force=False): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. @@ -525,20 +525,16 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, Parameters ---------- flex_arith_method : function - factory for special arithmetic methods, with op string: - f(op, name, str_rep, default_axis=None) + factory for flex arithmetic methods, with op string: + f(op, name, str_rep) flex_comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) - use_numexpr : bool, default True - whether to accelerate with numexpr, defaults to True force : bool, default False if False, checks whether function is defined **on ``cls.__dict__``** before defining if True, always defines functions on class base """ - # in frame, default axis is 'columns', doesn't matter for series and panel - new_methods = _create_methods(flex_arith_method, + new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, flex_bool_method, - use_numexpr, default_axis='columns', special=False) new_methods.update(dict(multiply=new_methods['mul'], subtract=new_methods['sub'], @@ -597,7 +593,7 @@ def _construct_divmod_result(left, result, index, name, dtype): ) -def _arith_method_SERIES(op, name, str_rep, default_axis=None): +def _arith_method_SERIES(op, name, str_rep): """ Wrapper function for Series arithmetic operations, to avoid code duplication. @@ -637,15 +633,9 @@ def safe_na_op(lvalues, rvalues): with np.errstate(all='ignore'): return na_op(lvalues, rvalues) except Exception: - if isinstance(rvalues, ABCSeries): - if is_object_dtype(rvalues): - # if dtype is object, try elementwise op - return libalgos.arrmap_object(rvalues, - lambda x: op(lvalues, x)) - else: - if is_object_dtype(lvalues): - return libalgos.arrmap_object(lvalues, - lambda x: op(x, rvalues)) + if is_object_dtype(lvalues): + return libalgos.arrmap_object(lvalues, + lambda x: op(x, rvalues)) raise def wrapper(left, right, name=name, na_op=na_op): @@ -671,7 +661,7 @@ def wrapper(left, right, name=name, na_op=na_op): lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): - rvalues = getattr(rvalues, 'values', rvalues) + rvalues = rvalues.values result = safe_na_op(lvalues, rvalues) return construct_result(left, result, @@ -933,7 +923,7 @@ def wrapper(self, other): return wrapper -def _flex_method_SERIES(op, name, str_rep, default_axis=None): +def _flex_method_SERIES(op, name, str_rep): doc = _make_flex_doc(name, 'series') @Appender(doc) @@ -964,8 +954,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): series_special_funcs = dict(arith_method=_arith_method_SERIES, comp_method=_comp_method_SERIES, - bool_method=_bool_method_SERIES, - have_divmod=True) + bool_method=_bool_method_SERIES) # ----------------------------------------------------------------------------- @@ -1015,9 +1004,10 @@ def to_series(right): return right -def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns'): +def _arith_method_FRAME(op, name, str_rep=None): eval_kwargs = _gen_eval_kwargs(name) fill_zeros = _gen_fill_zeros(name) + default_axis = _get_frame_op_default_axis(name) def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -1088,7 +1078,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f -def _flex_comp_method_FRAME(op, name, str_rep=None, default_axis='columns'): +def _flex_comp_method_FRAME(op, name, str_rep=None): + default_axis = _get_frame_op_default_axis(name) def na_op(x, y): try: @@ -1167,8 +1158,7 @@ def f(self, other): # ----------------------------------------------------------------------------- # Panel -def _arith_method_PANEL(op, name, str_rep=None, default_axis=None): - +def _arith_method_PANEL(op, name, str_rep=None): # work only for scalars def f(self, other): if not is_scalar(other): @@ -1228,6 +1218,122 @@ def f(self, other, axis=None): return f +def _flex_method_PANEL(op, name, str_rep=None): + eval_kwargs = _gen_eval_kwargs(name) + fill_zeros = _gen_fill_zeros(name) + + def na_op(x, y): + import pandas.core.computation.expressions as expressions + + try: + result = expressions.evaluate(op, str_rep, x, y, + errors='raise', + **eval_kwargs) + except TypeError: + result = op(x, y) + + # handles discrepancy between numpy and numexpr on division/mod + # by 0 though, given that these are generally (always?) + # non-scalars, I'm not sure whether it's worth it at the moment + result = missing.fill_zeros(result, x, y, name, fill_zeros) + return result + + if name in _op_descriptions: + doc = _make_flex_doc(name, 'panel') + else: + # doc strings substitors + doc = _agg_doc_PANEL.format(op_name=name) + + @Appender(doc) + def f(self, other, axis=0): + return self._combine(other, na_op, axis=axis) + + f.__name__ = name + return f + + panel_special_funcs = dict(arith_method=_arith_method_PANEL, comp_method=_comp_method_PANEL, bool_method=_arith_method_PANEL) + + +# ----------------------------------------------------------------------------- +# Sparse + + +def _arith_method_SPARSE_SERIES(op, name, str_rep=None): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + + str_rep is not used, but is present for compatibility. + """ + + def wrapper(self, other): + if isinstance(other, ABCDataFrame): + return NotImplemented + elif isinstance(other, ABCSeries): + if not isinstance(other, ABCSparseSeries): + other = other.to_sparse(fill_value=self.fill_value) + return _sparse_series_op(self, other, op, name) + elif is_scalar(other): + with np.errstate(all='ignore'): + new_values = op(self.values, other) + return self._constructor(new_values, + index=self.index, + name=self.name) + else: # pragma: no cover + raise TypeError('operation with {other} not supported' + .format(other=type(other))) + + wrapper.__name__ = name + if name.startswith("__"): + # strip special method names, e.g. `__add__` needs to be `add` when + # passed to _sparse_series_op + name = name[2:-2] + return wrapper + + +def _sparse_series_op(left, right, op, name): + left, right = left.align(right, join='outer', copy=False) + new_index = left.index + new_name = com._maybe_match_name(left, right) + + from pandas.core.sparse.array import _sparse_array_op + result = _sparse_array_op(left.values, right.values, op, name, + series=True) + return left._constructor(result, index=new_index, name=new_name) + + +def _arith_method_SPARSE_ARRAY(op, name, str_rep=None): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + + def wrapper(self, other): + from pandas.core.sparse.array import ( + SparseArray, _sparse_array_op, _wrap_result, _get_fill) + if isinstance(other, np.ndarray): + if len(self) != len(other): + raise AssertionError("length mismatch: {self} vs. {other}" + .format(self=len(self), other=len(other))) + if not isinstance(other, SparseArray): + dtype = getattr(other, 'dtype', None) + other = SparseArray(other, fill_value=self.fill_value, + dtype=dtype) + return _sparse_array_op(self, other, op, name) + elif is_scalar(other): + with np.errstate(all='ignore'): + fill = op(_get_fill(self), np.asarray(other)) + result = op(self.sp_values, other) + + return _wrap_result(name, result, self.sp_index, fill) + else: # pragma: no cover + raise TypeError('operation with {other} not supported' + .format(other=type(other))) + + if name.startswith("__"): + name = name[2:-2] + wrapper.__name__ = name + return wrapper diff --git a/pandas/core/panel.py b/pandas/core/panel.py index afdd9bae3006f..2cb80e938afb9 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -16,7 +16,6 @@ from pandas.core.dtypes.missing import notna import pandas.core.ops as ops -import pandas.core.missing as missing import pandas.core.common as com from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict) @@ -1521,52 +1520,6 @@ def _extract_axis(self, data, axis=0, intersect=False): return _ensure_index(index) - @classmethod - def _add_aggregate_operations(cls, use_numexpr=True): - """ add the operations to the cls; evaluate the doc strings again """ - - def _panel_arith_method(op, name, str_rep=None, default_axis=None): - - eval_kwargs = ops._gen_eval_kwargs(name) - fill_zeros = ops._gen_fill_zeros(name) - - def na_op(x, y): - import pandas.core.computation.expressions as expressions - - try: - result = expressions.evaluate(op, str_rep, x, y, - errors='raise', - **eval_kwargs) - except TypeError: - result = op(x, y) - - # handles discrepancy between numpy and numexpr on division/mod - # by 0 though, given that these are generally (always?) - # non-scalars, I'm not sure whether it's worth it at the moment - result = missing.fill_zeros(result, x, y, name, fill_zeros) - return result - - if name in ops._op_descriptions: - doc = ops._make_flex_doc(name, 'panel') - else: - # doc strings substitors - doc = ops._agg_doc_PANEL.format( - construct=cls._constructor_sliced.__name__, - cls_name=cls.__name__, wrp_method=name, - axis_order=', '.join(cls._AXIS_ORDERS)) - - @Appender(doc) - def f(self, other, axis=0): - return self._combine(other, na_op, axis=axis) - - f.__name__ = name - return f - - # add `div`, `mul`, `pow`, etc.. - ops.add_flex_arithmetic_methods( - cls, _panel_arith_method, use_numexpr=use_numexpr, - flex_comp_method=ops._comp_method_PANEL) - Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, stat_axis=1, aliases={'major': 'major_axis', @@ -1575,7 +1528,8 @@ def f(self, other, axis=0): 'minor_axis': 'columns'}) ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) -Panel._add_aggregate_operations() +ops.add_flex_arithmetic_methods(Panel, ops._flex_method_PANEL, + flex_comp_method=ops._comp_method_PANEL) Panel._add_numeric_operations() diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 059e399593971..fa07400a0706e 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -14,8 +14,7 @@ from pandas.compat import range from pandas.compat.numpy import function as nv -from pandas.core.dtypes.generic import ( - ABCSparseArray, ABCSparseSeries) +from pandas.core.dtypes.generic import ABCSparseSeries from pandas.core.dtypes.common import ( _ensure_platform_int, is_float, is_integer, @@ -43,38 +42,6 @@ _sparray_doc_kwargs = dict(klass='SparseArray') -def _arith_method_SPARSE_ARRAY(op, name, str_rep=None, default_axis=None): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - """ - - def wrapper(self, other): - if isinstance(other, np.ndarray): - if len(self) != len(other): - raise AssertionError("length mismatch: {self} vs. {other}" - .format(self=len(self), other=len(other))) - if not isinstance(other, ABCSparseArray): - dtype = getattr(other, 'dtype', None) - other = SparseArray(other, fill_value=self.fill_value, - dtype=dtype) - return _sparse_array_op(self, other, op, name) - elif is_scalar(other): - with np.errstate(all='ignore'): - fill = op(_get_fill(self), np.asarray(other)) - result = op(self.sp_values, other) - - return _wrap_result(name, result, self.sp_index, fill) - else: # pragma: no cover - raise TypeError('operation with {other} not supported' - .format(other=type(other))) - - if name.startswith("__"): - name = name[2:-2] - wrapper.__name__ = name - return wrapper - - def _get_fill(arr): # coerce fill_value to arr dtype if possible # int64 SparseArray can have NaN as fill_value if there is no missing @@ -864,7 +831,6 @@ def _make_index(length, indices, kind): ops.add_special_arithmetic_methods(SparseArray, - arith_method=_arith_method_SPARSE_ARRAY, - comp_method=_arith_method_SPARSE_ARRAY, - bool_method=_arith_method_SPARSE_ARRAY, - use_numexpr=False) + arith_method=ops._arith_method_SPARSE_ARRAY, + comp_method=ops._arith_method_SPARSE_ARRAY, + bool_method=ops._arith_method_SPARSE_ARRAY) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index c7f5b0ba67c19..cc08ccf77ad26 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -981,7 +981,5 @@ def homogenize(series_dict): # use unaccelerated ops for sparse objects -ops.add_flex_arithmetic_methods(SparseDataFrame, use_numexpr=False, - **ops.frame_flex_funcs) -ops.add_special_arithmetic_methods(SparseDataFrame, use_numexpr=False, - **ops.frame_special_funcs) +ops.add_flex_arithmetic_methods(SparseDataFrame, **ops.frame_flex_funcs) +ops.add_special_arithmetic_methods(SparseDataFrame, **ops.frame_special_funcs) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 2c8fd20f8eab1..4e207f9d1838c 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -9,12 +9,10 @@ import warnings from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.common import is_scalar from pandas.compat.numpy import function as nv from pandas.core.index import Index, _ensure_index, InvalidIndexError from pandas.core.series import Series -from pandas.core.frame import DataFrame from pandas.core.internals import SingleBlockManager from pandas.core import generic import pandas.core.common as com @@ -23,7 +21,7 @@ from pandas.util._decorators import Appender from pandas.core.sparse.array import ( - make_sparse, _sparse_array_op, SparseArray, + make_sparse, SparseArray, _make_index) from pandas._libs.sparse import BlockIndex, IntIndex import pandas._libs.sparse as splib @@ -37,53 +35,6 @@ axes_single_arg="{0, 'index'}", optional_labels='', optional_axis='') -# ----------------------------------------------------------------------------- -# Wrapper function for Series arithmetic methods - - -def _arith_method_SPARSE_SERIES(op, name, str_rep=None, default_axis=None): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - - str_rep and default_axis are not used, but are - present for compatibility. - """ - - def wrapper(self, other): - if isinstance(other, Series): - if not isinstance(other, SparseSeries): - other = other.to_sparse(fill_value=self.fill_value) - return _sparse_series_op(self, other, op, name) - elif isinstance(other, DataFrame): - return NotImplemented - elif is_scalar(other): - with np.errstate(all='ignore'): - new_values = op(self.values, other) - return self._constructor(new_values, - index=self.index, - name=self.name) - else: # pragma: no cover - raise TypeError('operation with {other} not supported' - .format(other=type(other))) - - wrapper.__name__ = name - if name.startswith("__"): - # strip special method names, e.g. `__add__` needs to be `add` when - # passed to _sparse_series_op - name = name[2:-2] - return wrapper - - -def _sparse_series_op(left, right, op, name): - left, right = left.align(right, join='outer', copy=False) - new_index = left.index - new_name = com._maybe_match_name(left, right) - - result = _sparse_array_op(left.values, right.values, op, name, - series=True) - return left._constructor(result, index=new_index, name=new_name) - class SparseSeries(Series): """Data structure for labeled, sparse floating point data @@ -861,14 +812,11 @@ def from_coo(cls, A, dense_index=False): # overwrite series methods with unaccelerated versions -ops.add_special_arithmetic_methods(SparseSeries, use_numexpr=False, - **ops.series_special_funcs) -ops.add_flex_arithmetic_methods(SparseSeries, use_numexpr=False, - **ops.series_flex_funcs) +ops.add_special_arithmetic_methods(SparseSeries, **ops.series_special_funcs) +ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs) # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, - arith_method=_arith_method_SPARSE_SERIES, - comp_method=_arith_method_SPARSE_SERIES, - bool_method=None, use_numexpr=False, - force=True) + ops._arith_method_SPARSE_SERIES, + comp_method=ops._arith_method_SPARSE_SERIES, + bool_method=None, force=True)