From 47a0352314e2710bb70c09fa36a769623fa8aee4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 25 Jan 2018 09:42:32 -0800 Subject: [PATCH 1/5] make more ops kwargs explicit, move flex_method_PANEL --- pandas/core/ops.py | 182 ++++++++++++++++++++--------------- pandas/core/panel.py | 50 +--------- pandas/core/sparse/array.py | 5 +- pandas/core/sparse/frame.py | 6 +- pandas/core/sparse/series.py | 14 +-- 5 files changed, 116 insertions(+), 141 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 3db2dd849ccee..2f093d63842e6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -109,6 +109,31 @@ def _gen_fill_zeros(name): return fill_value +def _get_frame_op_default_axis(name): + """ + Only DataFrame cares about default_axis, specifically: + special methods have default_axis=None and flex methods + have default_axis='columns'. + + Parameters + ---------- + name : str + + Returns + ------- + default_axis: str or None + """ + if name.replace('__r', '__') in ['__and__', '__or__', '__xor__']: + # bool methods + return 'columns' + elif name.startswith('__'): + # __add__, __mul__, ... + return None + else: + # add, mul, ... + return 'columns' + + # ----------------------------------------------------------------------------- # Docstring Generation and Templates @@ -281,17 +306,17 @@ def _gen_fill_zeros(name): _agg_doc_PANEL = """ -Wrapper method for {wrp_method} +Wrapper method for {op_name} Parameters ---------- -other : {construct} or {cls_name} -axis : {{{axis_order}}} +other : DataFrame or Panel +axis : {{items, major_axis, minor_axis}} Axis to broadcast over Returns ------- -{cls_name} +Panel """ @@ -338,13 +363,10 @@ def _make_flex_doc(op_name, typ): def _create_methods(arith_method, comp_method, bool_method, - use_numexpr, special=False, default_axis='columns', - have_divmod=False): + use_numexpr, special=False, have_divmod=False): # creates actual methods based upon arithmetic, comp and bool method # constructors. - # NOTE: Only frame cares about default_axis, specifically: special methods - # have default axis None, whereas flex methods have default axis 'columns' # if we're not using numexpr, then don't pass a str_rep if use_numexpr: op = lambda x: x @@ -360,44 +382,28 @@ def names(x): else: names = lambda x: x - # Inframe, all special methods have default_axis=None, flex methods have - # default_axis set to the default (columns) # yapf: disable new_methods = dict( - add=arith_method(operator.add, names('add'), op('+'), - default_axis=default_axis), - radd=arith_method(lambda x, y: y + x, names('radd'), op('+'), - default_axis=default_axis), - sub=arith_method(operator.sub, names('sub'), op('-'), - default_axis=default_axis), - mul=arith_method(operator.mul, names('mul'), op('*'), - default_axis=default_axis), - truediv=arith_method(operator.truediv, names('truediv'), op('/'), - default_axis=default_axis), - floordiv=arith_method(operator.floordiv, names('floordiv'), op('//'), - default_axis=default_axis), + add=arith_method(operator.add, names('add'), op('+')), + radd=arith_method(lambda x, y: y + x, names('radd'), op('+')), + sub=arith_method(operator.sub, names('sub'), op('-')), + mul=arith_method(operator.mul, names('mul'), op('*')), + truediv=arith_method(operator.truediv, names('truediv'), op('/')), + floordiv=arith_method(operator.floordiv, names('floordiv'), op('//')), # Causes a floating point exception in the tests when numexpr enabled, # so for now no speedup - mod=arith_method(operator.mod, names('mod'), None, - default_axis=default_axis), - pow=arith_method(operator.pow, names('pow'), op('**'), - default_axis=default_axis), + mod=arith_method(operator.mod, names('mod'), None), + pow=arith_method(operator.pow, names('pow'), op('**')), # not entirely sure why this is necessary, but previously was included # so it's here to maintain compatibility - rmul=arith_method(operator.mul, names('rmul'), op('*'), - default_axis=default_axis), - rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-'), - default_axis=default_axis), + rmul=arith_method(operator.mul, names('rmul'), op('*')), + rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-')), rtruediv=arith_method(lambda x, y: operator.truediv(y, x), - names('rtruediv'), op('/'), - default_axis=default_axis), + names('rtruediv'), op('/')), rfloordiv=arith_method(lambda x, y: operator.floordiv(y, x), - names('rfloordiv'), op('//'), - default_axis=default_axis), - rpow=arith_method(lambda x, y: y**x, names('rpow'), op('**'), - default_axis=default_axis), - rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'), - default_axis=default_axis)) + names('rfloordiv'), op('//')), + rpow=arith_method(lambda x, y: y**x, names('rpow'), op('**')), + rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'))) # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] @@ -425,10 +431,7 @@ def names(x): names('rxor'), op('^')))) if have_divmod: # divmod doesn't have an op that is supported by numexpr - new_methods['divmod'] = arith_method(divmod, - names('divmod'), - None, - default_axis=default_axis) + new_methods['divmod'] = arith_method(divmod, names('divmod'), None) new_methods = {names(k): v for k, v in new_methods.items()} return new_methods @@ -444,8 +447,7 @@ def add_methods(cls, new_methods, force): # Arithmetic def add_special_arithmetic_methods(cls, arith_method=None, comp_method=None, bool_method=None, - use_numexpr=True, force=False, - have_divmod=False): + force=False): """ Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. @@ -454,26 +456,24 @@ def add_special_arithmetic_methods(cls, arith_method=None, ---------- arith_method : function (optional) factory for special arithmetic methods, with op string: - f(op, name, str_rep, default_axis=None) + f(op, name, str_rep) comp_method : function (optional) factory for rich comparison - signature: f(op, name, str_rep) bool_method : function (optional) factory for boolean methods - signature: f(op, name, str_rep) - use_numexpr : bool, default True - whether to accelerate with numexpr, defaults to True force : bool, default False if False, checks whether function is defined **on ``cls.__dict__``** before defining if True, always defines functions on class base - have_divmod : bool, (optional) - should a divmod method be added? this method is special because it - returns a tuple of cls instead of a single element of type cls """ + subtyp = getattr(cls, '_subtyp', '') + use_numexpr = 'sparse' not in subtyp + # numexpr is available for non-sparse classes - # in frame, special methods have default_axis = None, comp methods use - # 'columns' + have_divmod = issubclass(cls, ABCSeries) + # divmod is available for Series and SparseSeries new_methods = _create_methods(arith_method, comp_method, - bool_method, use_numexpr, default_axis=None, + bool_method, use_numexpr, special=True, have_divmod=have_divmod) # inplace operators (I feel like these should get passed an `inplace=True` @@ -517,7 +517,7 @@ def f(self, other): def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None, flex_bool_method=None, - use_numexpr=True, force=False): + force=False): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. @@ -526,19 +526,19 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, ---------- flex_arith_method : function factory for special arithmetic methods, with op string: - f(op, name, str_rep, default_axis=None) + f(op, name, str_rep) flex_comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) - use_numexpr : bool, default True - whether to accelerate with numexpr, defaults to True force : bool, default False if False, checks whether function is defined **on ``cls.__dict__``** before defining if True, always defines functions on class base """ - # in frame, default axis is 'columns', doesn't matter for series and panel + subtyp = getattr(cls, '_subtyp', '') + use_numexpr = 'sparse' not in subtyp + new_methods = _create_methods(flex_arith_method, flex_comp_method, flex_bool_method, - use_numexpr, default_axis='columns', + use_numexpr, special=False) new_methods.update(dict(multiply=new_methods['mul'], subtract=new_methods['sub'], @@ -597,7 +597,7 @@ def _construct_divmod_result(left, result, index, name, dtype): ) -def _arith_method_SERIES(op, name, str_rep, default_axis=None): +def _arith_method_SERIES(op, name, str_rep): """ Wrapper function for Series arithmetic operations, to avoid code duplication. @@ -637,15 +637,9 @@ def safe_na_op(lvalues, rvalues): with np.errstate(all='ignore'): return na_op(lvalues, rvalues) except Exception: - if isinstance(rvalues, ABCSeries): - if is_object_dtype(rvalues): - # if dtype is object, try elementwise op - return libalgos.arrmap_object(rvalues, - lambda x: op(lvalues, x)) - else: - if is_object_dtype(lvalues): - return libalgos.arrmap_object(lvalues, - lambda x: op(x, rvalues)) + if is_object_dtype(lvalues): + return libalgos.arrmap_object(lvalues, + lambda x: op(x, rvalues)) raise def wrapper(left, right, name=name, na_op=na_op): @@ -671,7 +665,7 @@ def wrapper(left, right, name=name, na_op=na_op): lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): - rvalues = getattr(rvalues, 'values', rvalues) + rvalues = rvalues.values result = safe_na_op(lvalues, rvalues) return construct_result(left, result, @@ -933,7 +927,7 @@ def wrapper(self, other): return wrapper -def _flex_method_SERIES(op, name, str_rep, default_axis=None): +def _flex_method_SERIES(op, name, str_rep): doc = _make_flex_doc(name, 'series') @Appender(doc) @@ -964,8 +958,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): series_special_funcs = dict(arith_method=_arith_method_SERIES, comp_method=_comp_method_SERIES, - bool_method=_bool_method_SERIES, - have_divmod=True) + bool_method=_bool_method_SERIES) # ----------------------------------------------------------------------------- @@ -1015,9 +1008,10 @@ def to_series(right): return right -def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns'): +def _arith_method_FRAME(op, name, str_rep=None): eval_kwargs = _gen_eval_kwargs(name) fill_zeros = _gen_fill_zeros(name) + default_axis = _get_frame_op_default_axis(name) def na_op(x, y): import pandas.core.computation.expressions as expressions @@ -1088,7 +1082,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f -def _flex_comp_method_FRAME(op, name, str_rep=None, default_axis='columns'): +def _flex_comp_method_FRAME(op, name, str_rep=None): + default_axis = _get_frame_op_default_axis(name) def na_op(x, y): try: @@ -1167,8 +1162,7 @@ def f(self, other): # ----------------------------------------------------------------------------- # Panel -def _arith_method_PANEL(op, name, str_rep=None, default_axis=None): - +def _arith_method_PANEL(op, name, str_rep=None): # work only for scalars def f(self, other): if not is_scalar(other): @@ -1228,6 +1222,40 @@ def f(self, other, axis=None): return f +def _flex_method_PANEL(op, name, str_rep=None): + eval_kwargs = _gen_eval_kwargs(name) + fill_zeros = _gen_fill_zeros(name) + + def na_op(x, y): + import pandas.core.computation.expressions as expressions + + try: + result = expressions.evaluate(op, str_rep, x, y, + errors='raise', + **eval_kwargs) + except TypeError: + result = op(x, y) + + # handles discrepancy between numpy and numexpr on division/mod + # by 0 though, given that these are generally (always?) + # non-scalars, I'm not sure whether it's worth it at the moment + result = missing.fill_zeros(result, x, y, name, fill_zeros) + return result + + if name in _op_descriptions: + doc = _make_flex_doc(name, 'panel') + else: + # doc strings substitors + doc = _agg_doc_PANEL.format(op_name=name) + + @Appender(doc) + def f(self, other, axis=0): + return self._combine(other, na_op, axis=axis) + + f.__name__ = name + return f + + panel_special_funcs = dict(arith_method=_arith_method_PANEL, comp_method=_comp_method_PANEL, bool_method=_arith_method_PANEL) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index afdd9bae3006f..2cb80e938afb9 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -16,7 +16,6 @@ from pandas.core.dtypes.missing import notna import pandas.core.ops as ops -import pandas.core.missing as missing import pandas.core.common as com from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict) @@ -1521,52 +1520,6 @@ def _extract_axis(self, data, axis=0, intersect=False): return _ensure_index(index) - @classmethod - def _add_aggregate_operations(cls, use_numexpr=True): - """ add the operations to the cls; evaluate the doc strings again """ - - def _panel_arith_method(op, name, str_rep=None, default_axis=None): - - eval_kwargs = ops._gen_eval_kwargs(name) - fill_zeros = ops._gen_fill_zeros(name) - - def na_op(x, y): - import pandas.core.computation.expressions as expressions - - try: - result = expressions.evaluate(op, str_rep, x, y, - errors='raise', - **eval_kwargs) - except TypeError: - result = op(x, y) - - # handles discrepancy between numpy and numexpr on division/mod - # by 0 though, given that these are generally (always?) - # non-scalars, I'm not sure whether it's worth it at the moment - result = missing.fill_zeros(result, x, y, name, fill_zeros) - return result - - if name in ops._op_descriptions: - doc = ops._make_flex_doc(name, 'panel') - else: - # doc strings substitors - doc = ops._agg_doc_PANEL.format( - construct=cls._constructor_sliced.__name__, - cls_name=cls.__name__, wrp_method=name, - axis_order=', '.join(cls._AXIS_ORDERS)) - - @Appender(doc) - def f(self, other, axis=0): - return self._combine(other, na_op, axis=axis) - - f.__name__ = name - return f - - # add `div`, `mul`, `pow`, etc.. - ops.add_flex_arithmetic_methods( - cls, _panel_arith_method, use_numexpr=use_numexpr, - flex_comp_method=ops._comp_method_PANEL) - Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, stat_axis=1, aliases={'major': 'major_axis', @@ -1575,7 +1528,8 @@ def f(self, other, axis=0): 'minor_axis': 'columns'}) ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) -Panel._add_aggregate_operations() +ops.add_flex_arithmetic_methods(Panel, ops._flex_method_PANEL, + flex_comp_method=ops._comp_method_PANEL) Panel._add_numeric_operations() diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 059e399593971..37c0ae3db535c 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -43,7 +43,7 @@ _sparray_doc_kwargs = dict(klass='SparseArray') -def _arith_method_SPARSE_ARRAY(op, name, str_rep=None, default_axis=None): +def _arith_method_SPARSE_ARRAY(op, name, str_rep=None): """ Wrapper function for Series arithmetic operations, to avoid code duplication. @@ -866,5 +866,4 @@ def _make_index(length, indices, kind): ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method_SPARSE_ARRAY, comp_method=_arith_method_SPARSE_ARRAY, - bool_method=_arith_method_SPARSE_ARRAY, - use_numexpr=False) + bool_method=_arith_method_SPARSE_ARRAY) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index c7f5b0ba67c19..cc08ccf77ad26 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -981,7 +981,5 @@ def homogenize(series_dict): # use unaccelerated ops for sparse objects -ops.add_flex_arithmetic_methods(SparseDataFrame, use_numexpr=False, - **ops.frame_flex_funcs) -ops.add_special_arithmetic_methods(SparseDataFrame, use_numexpr=False, - **ops.frame_special_funcs) +ops.add_flex_arithmetic_methods(SparseDataFrame, **ops.frame_flex_funcs) +ops.add_special_arithmetic_methods(SparseDataFrame, **ops.frame_special_funcs) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 2c8fd20f8eab1..1f9a259a3a62e 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -41,13 +41,12 @@ # Wrapper function for Series arithmetic methods -def _arith_method_SPARSE_SERIES(op, name, str_rep=None, default_axis=None): +def _arith_method_SPARSE_SERIES(op, name, str_rep=None): """ Wrapper function for Series arithmetic operations, to avoid code duplication. - str_rep and default_axis are not used, but are - present for compatibility. + str_rep is not used, but is present for compatibility. """ def wrapper(self, other): @@ -861,14 +860,11 @@ def from_coo(cls, A, dense_index=False): # overwrite series methods with unaccelerated versions -ops.add_special_arithmetic_methods(SparseSeries, use_numexpr=False, - **ops.series_special_funcs) -ops.add_flex_arithmetic_methods(SparseSeries, use_numexpr=False, - **ops.series_flex_funcs) +ops.add_special_arithmetic_methods(SparseSeries, **ops.series_special_funcs) +ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs) # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, arith_method=_arith_method_SPARSE_SERIES, comp_method=_arith_method_SPARSE_SERIES, - bool_method=None, use_numexpr=False, - force=True) + bool_method=None, force=True) From 7108faab2c01da995477a3efaae7feb49f875d9b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 25 Jan 2018 09:50:05 -0800 Subject: [PATCH 2/5] centralize class inspection in _create_methods --- pandas/core/ops.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2f093d63842e6..165bd69bd457c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -362,11 +362,18 @@ def _make_flex_doc(op_name, typ): # methods -def _create_methods(arith_method, comp_method, bool_method, - use_numexpr, special=False, have_divmod=False): +def _create_methods(cls, arith_method, comp_method, bool_method, + special=False): # creates actual methods based upon arithmetic, comp and bool method # constructors. + subtyp = getattr(cls, '_subtyp', '') + use_numexpr = 'sparse' not in subtyp + # numexpr is available for non-sparse classes + + have_divmod = issubclass(cls, ABCSeries) + # divmod is available for Series and SparseSeries + # if we're not using numexpr, then don't pass a str_rep if use_numexpr: op = lambda x: x @@ -465,16 +472,8 @@ def add_special_arithmetic_methods(cls, arith_method=None, if False, checks whether function is defined **on ``cls.__dict__``** before defining if True, always defines functions on class base """ - subtyp = getattr(cls, '_subtyp', '') - use_numexpr = 'sparse' not in subtyp - # numexpr is available for non-sparse classes - - have_divmod = issubclass(cls, ABCSeries) - # divmod is available for Series and SparseSeries - - new_methods = _create_methods(arith_method, comp_method, - bool_method, use_numexpr, - special=True, have_divmod=have_divmod) + new_methods = _create_methods(cls, arith_method, comp_method, bool_method, + special=True) # inplace operators (I feel like these should get passed an `inplace=True` # or just be removed @@ -533,12 +532,8 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, if False, checks whether function is defined **on ``cls.__dict__``** before defining if True, always defines functions on class base """ - subtyp = getattr(cls, '_subtyp', '') - use_numexpr = 'sparse' not in subtyp - - new_methods = _create_methods(flex_arith_method, + new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, flex_bool_method, - use_numexpr, special=False) new_methods.update(dict(multiply=new_methods['mul'], subtract=new_methods['sub'], From 1ee3ce56bc5056cd584e4e3293b155dca2848b75 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 25 Jan 2018 09:52:05 -0800 Subject: [PATCH 3/5] Docstring fixup --- pandas/core/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 165bd69bd457c..2449c9965542c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -524,7 +524,7 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, Parameters ---------- flex_arith_method : function - factory for special arithmetic methods, with op string: + factory for flex arithmetic methods, with op string: f(op, name, str_rep) flex_comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) From c9a9a2a065a1d8acfa72f0da9c278ce7cad14da1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 25 Jan 2018 21:56:36 -0800 Subject: [PATCH 4/5] centralize remaining ops _foo_method_BAR functions in ops --- pandas/core/ops.py | 85 +++++++++++++++++++++++++++++++++++- pandas/core/sparse/array.py | 41 ++--------------- pandas/core/sparse/series.py | 54 ++--------------------- 3 files changed, 91 insertions(+), 89 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 2449c9965542c..81a10c7acb30e 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -39,7 +39,8 @@ ABCSeries, ABCDataFrame, ABCIndex, - ABCPeriodIndex) + ABCPeriodIndex, + ABCSparseSeries) def _gen_eval_kwargs(name): @@ -1254,3 +1255,85 @@ def f(self, other, axis=0): panel_special_funcs = dict(arith_method=_arith_method_PANEL, comp_method=_comp_method_PANEL, bool_method=_arith_method_PANEL) + + +# ----------------------------------------------------------------------------- +# Sparse + + +def _arith_method_SPARSE_SERIES(op, name, str_rep=None): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + + str_rep is not used, but is present for compatibility. + """ + + def wrapper(self, other): + if isinstance(other, ABCDataFrame): + return NotImplemented + elif isinstance(other, ABCSeries): + if not isinstance(other, ABCSparseSeries): + other = other.to_sparse(fill_value=self.fill_value) + return _sparse_series_op(self, other, op, name) + elif is_scalar(other): + with np.errstate(all='ignore'): + new_values = op(self.values, other) + return self._constructor(new_values, + index=self.index, + name=self.name) + else: # pragma: no cover + raise TypeError('operation with {other} not supported' + .format(other=type(other))) + + wrapper.__name__ = name + if name.startswith("__"): + # strip special method names, e.g. `__add__` needs to be `add` when + # passed to _sparse_series_op + name = name[2:-2] + return wrapper + + +def _sparse_series_op(left, right, op, name): + left, right = left.align(right, join='outer', copy=False) + new_index = left.index + new_name = com._maybe_match_name(left, right) + + from pandas.core.sparse.array import _sparse_array_op + result = _sparse_array_op(left.values, right.values, op, name, + series=True) + return left._constructor(result, index=new_index, name=new_name) + + +def _arith_method_SPARSE_ARRAY(op, name, str_rep=None): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + + def wrapper(self, other): + from pandas.core.sparse.array import ( + SparseArray, _sparse_array_op, _wrap_result, _get_fill) + if isinstance(other, np.ndarray): + if len(self) != len(other): + raise AssertionError("length mismatch: {self} vs. {other}" + .format(self=len(self), other=len(other))) + if not isinstance(other, SparseArray): + dtype = getattr(other, 'dtype', None) + other = SparseArray(other, fill_value=self.fill_value, + dtype=dtype) + return _sparse_array_op(self, other, op, name) + elif is_scalar(other): + with np.errstate(all='ignore'): + fill = op(_get_fill(self), np.asarray(other)) + result = op(self.sp_values, other) + + return _wrap_result(name, result, self.sp_index, fill) + else: # pragma: no cover + raise TypeError('operation with {other} not supported' + .format(other=type(other))) + + if name.startswith("__"): + name = name[2:-2] + wrapper.__name__ = name + return wrapper diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 37c0ae3db535c..fa07400a0706e 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -14,8 +14,7 @@ from pandas.compat import range from pandas.compat.numpy import function as nv -from pandas.core.dtypes.generic import ( - ABCSparseArray, ABCSparseSeries) +from pandas.core.dtypes.generic import ABCSparseSeries from pandas.core.dtypes.common import ( _ensure_platform_int, is_float, is_integer, @@ -43,38 +42,6 @@ _sparray_doc_kwargs = dict(klass='SparseArray') -def _arith_method_SPARSE_ARRAY(op, name, str_rep=None): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - """ - - def wrapper(self, other): - if isinstance(other, np.ndarray): - if len(self) != len(other): - raise AssertionError("length mismatch: {self} vs. {other}" - .format(self=len(self), other=len(other))) - if not isinstance(other, ABCSparseArray): - dtype = getattr(other, 'dtype', None) - other = SparseArray(other, fill_value=self.fill_value, - dtype=dtype) - return _sparse_array_op(self, other, op, name) - elif is_scalar(other): - with np.errstate(all='ignore'): - fill = op(_get_fill(self), np.asarray(other)) - result = op(self.sp_values, other) - - return _wrap_result(name, result, self.sp_index, fill) - else: # pragma: no cover - raise TypeError('operation with {other} not supported' - .format(other=type(other))) - - if name.startswith("__"): - name = name[2:-2] - wrapper.__name__ = name - return wrapper - - def _get_fill(arr): # coerce fill_value to arr dtype if possible # int64 SparseArray can have NaN as fill_value if there is no missing @@ -864,6 +831,6 @@ def _make_index(length, indices, kind): ops.add_special_arithmetic_methods(SparseArray, - arith_method=_arith_method_SPARSE_ARRAY, - comp_method=_arith_method_SPARSE_ARRAY, - bool_method=_arith_method_SPARSE_ARRAY) + arith_method=ops._arith_method_SPARSE_ARRAY, + comp_method=ops._arith_method_SPARSE_ARRAY, + bool_method=ops._arith_method_SPARSE_ARRAY) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 1f9a259a3a62e..4e207f9d1838c 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -9,12 +9,10 @@ import warnings from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.common import is_scalar from pandas.compat.numpy import function as nv from pandas.core.index import Index, _ensure_index, InvalidIndexError from pandas.core.series import Series -from pandas.core.frame import DataFrame from pandas.core.internals import SingleBlockManager from pandas.core import generic import pandas.core.common as com @@ -23,7 +21,7 @@ from pandas.util._decorators import Appender from pandas.core.sparse.array import ( - make_sparse, _sparse_array_op, SparseArray, + make_sparse, SparseArray, _make_index) from pandas._libs.sparse import BlockIndex, IntIndex import pandas._libs.sparse as splib @@ -37,52 +35,6 @@ axes_single_arg="{0, 'index'}", optional_labels='', optional_axis='') -# ----------------------------------------------------------------------------- -# Wrapper function for Series arithmetic methods - - -def _arith_method_SPARSE_SERIES(op, name, str_rep=None): - """ - Wrapper function for Series arithmetic operations, to avoid - code duplication. - - str_rep is not used, but is present for compatibility. - """ - - def wrapper(self, other): - if isinstance(other, Series): - if not isinstance(other, SparseSeries): - other = other.to_sparse(fill_value=self.fill_value) - return _sparse_series_op(self, other, op, name) - elif isinstance(other, DataFrame): - return NotImplemented - elif is_scalar(other): - with np.errstate(all='ignore'): - new_values = op(self.values, other) - return self._constructor(new_values, - index=self.index, - name=self.name) - else: # pragma: no cover - raise TypeError('operation with {other} not supported' - .format(other=type(other))) - - wrapper.__name__ = name - if name.startswith("__"): - # strip special method names, e.g. `__add__` needs to be `add` when - # passed to _sparse_series_op - name = name[2:-2] - return wrapper - - -def _sparse_series_op(left, right, op, name): - left, right = left.align(right, join='outer', copy=False) - new_index = left.index - new_name = com._maybe_match_name(left, right) - - result = _sparse_array_op(left.values, right.values, op, name, - series=True) - return left._constructor(result, index=new_index, name=new_name) - class SparseSeries(Series): """Data structure for labeled, sparse floating point data @@ -865,6 +817,6 @@ def from_coo(cls, A, dense_index=False): # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, - arith_method=_arith_method_SPARSE_SERIES, - comp_method=_arith_method_SPARSE_SERIES, + ops._arith_method_SPARSE_SERIES, + comp_method=ops._arith_method_SPARSE_SERIES, bool_method=None, force=True) From 89692f74b95d03df5b07b6af3669089fb59c92f1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 26 Jan 2018 08:57:03 -0800 Subject: [PATCH 5/5] comment above code --- pandas/core/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 81a10c7acb30e..ba8a15b60ba56 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -368,9 +368,9 @@ def _create_methods(cls, arith_method, comp_method, bool_method, # creates actual methods based upon arithmetic, comp and bool method # constructors. + # numexpr is available for non-sparse classes subtyp = getattr(cls, '_subtyp', '') use_numexpr = 'sparse' not in subtyp - # numexpr is available for non-sparse classes have_divmod = issubclass(cls, ABCSeries) # divmod is available for Series and SparseSeries