From ea519e9edf1c4e00bbe5965252190dc5c806ef60 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 21 Feb 2018 13:31:33 -0800 Subject: [PATCH 1/3] make ops.add_foo take just class --- pandas/core/frame.py | 4 +- pandas/core/ops.py | 204 +++++++++++++++++++++++------------ pandas/core/panel.py | 4 +- pandas/core/series.py | 4 +- pandas/core/sparse/array.py | 14 +-- pandas/core/sparse/frame.py | 4 +- pandas/core/sparse/series.py | 5 +- 7 files changed, 147 insertions(+), 92 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efd6814ba04c5..b24ed77a6ad25 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6106,8 +6106,8 @@ def isin(self, values): DataFrame._add_numeric_operations() DataFrame._add_series_or_dataframe_operations() -ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs) -ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs) +ops.add_flex_arithmetic_methods(DataFrame) +ops.add_special_arithmetic_methods(DataFrame) def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index ad6102eb6ad0f..be6f11e0c7d75 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -37,7 +37,7 @@ construct_1d_object_array_from_listlike) from pandas.core.dtypes.generic import ( ABCSeries, - ABCDataFrame, + ABCDataFrame, ABCPanel, ABCIndex, ABCSparseSeries, ABCSparseArray) @@ -567,6 +567,84 @@ def mask_cmp_op(x, y, op, allowed_types): # Functions that add arithmetic methods to objects, given arithmetic factory # methods +def _get_flex_wrappers(cls): + """ + Find the appropriate operation-wrappers to use when defining flex + arithmetic or boolean operations with the given class. + + Parameters + ---------- + cls : class + + Returns + ------- + arith_method : function or None + comp_method : function or None + + Notes + ----- + None is only returned for SparseArray + """ + if issubclass(cls, ABCSeries): + # Same for Series and SparseSeries + # Note this check must come before check for ABCSparseArray + # or else if will incorrectly catch SparseSeries. + arith_method = _flex_method_SERIES + comp_method = _flex_method_SERIES + elif issubclass(cls, ABCSparseArray): + arith_method = None + comp_method = None + elif issubclass(cls, ABCPanel): + arith_method = _flex_method_PANEL + comp_method = _comp_method_PANEL + elif issubclass(cls, ABCDataFrame): + # Same for DataFrame and SparseDataFrame + arith_method = _arith_method_FRAME + comp_method = _flex_comp_method_FRAME + return arith_method, comp_method + + +def _get_special_wrappers(cls): + """ + Find the appropriate operation-wrappers to use when defining special + arithmetic, comparison, or boolean operations with the given class. + + Parameters + ---------- + cls : class + + Returns + ------- + arith_method : function + comp_method : function + bool_method : function + """ + if issubclass(cls, ABCSparseSeries): + # Be sure to catch this before ABCSeries and ABCSparseArray, + # as they will both come see SparseSeries as a subclass + arith_method = _arith_method_SPARSE_SERIES + comp_method = _arith_method_SPARSE_SERIES + bool_method = _bool_method_SERIES + # TODO: I don't think the functions defined by bool_method are tested + elif issubclass(cls, ABCSparseArray): + arith_method = _arith_method_SPARSE_ARRAY + comp_method = _arith_method_SPARSE_ARRAY + bool_method = _arith_method_SPARSE_ARRAY + elif issubclass(cls, ABCPanel): + arith_method = _arith_method_PANEL + comp_method = _comp_method_PANEL + bool_method = _arith_method_PANEL + elif issubclass(cls, ABCDataFrame): + # Same for DataFrame and SparseDataFrame + arith_method = _arith_method_FRAME + comp_method = _comp_method_FRAME + bool_method = _arith_method_FRAME + elif issubclass(cls, ABCSeries): + arith_method = _arith_method_SERIES + comp_method = _comp_method_SERIES + bool_method = _bool_method_SERIES + return arith_method, comp_method, bool_method + def _create_methods(cls, arith_method, comp_method, bool_method, special=False): @@ -599,16 +677,18 @@ def _create_methods(cls, arith_method, comp_method, bool_method, # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] + if have_divmod: + # divmod doesn't have an op that is supported by numexpr + new_methods['divmod'] = arith_method(cls, divmod, special) + + new_methods.update(dict( + eq=comp_method(cls, operator.eq, special), + ne=comp_method(cls, operator.ne, special), + lt=comp_method(cls, operator.lt, special), + gt=comp_method(cls, operator.gt, special), + le=comp_method(cls, operator.le, special), + ge=comp_method(cls, operator.ge, special))) - # Comp methods never had a default axis set - if comp_method: - new_methods.update(dict( - eq=comp_method(cls, operator.eq, special), - ne=comp_method(cls, operator.ne, special), - lt=comp_method(cls, operator.lt, special), - gt=comp_method(cls, operator.gt, special), - le=comp_method(cls, operator.le, special), - ge=comp_method(cls, operator.ge, special))) if bool_method: new_methods.update( dict(and_=bool_method(cls, operator.and_, special), @@ -618,9 +698,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, rand_=bool_method(cls, rand_, special), ror_=bool_method(cls, ror_, special), rxor=bool_method(cls, rxor, special))) - if have_divmod: - # divmod doesn't have an op that is supported by numexpr - new_methods['divmod'] = arith_method(cls, divmod, special) if special: dunderize = lambda x: '__{name}__'.format(name=x.strip('_')) @@ -644,22 +721,17 @@ def add_methods(cls, new_methods): # ---------------------------------------------------------------------- # Arithmetic -def add_special_arithmetic_methods(cls, arith_method=None, - comp_method=None, bool_method=None): +def add_special_arithmetic_methods(cls): """ Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. Parameters ---------- - arith_method : function (optional) - factory for special arithmetic methods: - f(cls, op, special) - comp_method : function (optional) - factory for rich comparison - signature: f(cls, op, special) - bool_method : function (optional) - factory for boolean methods - signature: f(cls, op, special) + cls : class + special methods will be defined and pinned to this class """ + arith_method, comp_method, bool_method = _get_special_wrappers(cls) new_methods = _create_methods(cls, arith_method, comp_method, bool_method, special=True) # inplace operators (I feel like these should get passed an `inplace=True` @@ -692,28 +764,26 @@ def f(self, other): __ipow__=_wrap_inplace_method(new_methods["__pow__"]))) if not compat.PY3: new_methods["__idiv__"] = _wrap_inplace_method(new_methods["__div__"]) - if bool_method: - new_methods.update( - dict(__iand__=_wrap_inplace_method(new_methods["__and__"]), - __ior__=_wrap_inplace_method(new_methods["__or__"]), - __ixor__=_wrap_inplace_method(new_methods["__xor__"]))) + + new_methods.update( + dict(__iand__=_wrap_inplace_method(new_methods["__and__"]), + __ior__=_wrap_inplace_method(new_methods["__or__"]), + __ixor__=_wrap_inplace_method(new_methods["__xor__"]))) add_methods(cls, new_methods=new_methods) -def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None): +def add_flex_arithmetic_methods(cls): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. Parameters ---------- - flex_arith_method : function - factory for flex arithmetic methods: - f(cls, op, special) - flex_comp_method : function, optional, - factory for rich comparison - signature: f(cls, op, special) + cls : class + flex methods will be defined and pinned to this class """ + flex_arith_method, flex_comp_method = _get_flex_wrappers(cls) new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, bool_method=None, special=False) @@ -1149,14 +1219,6 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): return flex_wrapper -series_flex_funcs = dict(flex_arith_method=_flex_method_SERIES, - flex_comp_method=_flex_method_SERIES) - -series_special_funcs = dict(arith_method=_arith_method_SERIES, - comp_method=_comp_method_SERIES, - bool_method=_bool_method_SERIES) - - # ----------------------------------------------------------------------------- # DataFrame @@ -1398,14 +1460,6 @@ def f(self, other): return f -frame_flex_funcs = dict(flex_arith_method=_arith_method_FRAME, - flex_comp_method=_flex_comp_method_FRAME) - -frame_special_funcs = dict(arith_method=_arith_method_FRAME, - comp_method=_comp_method_FRAME, - bool_method=_arith_method_FRAME) - - # ----------------------------------------------------------------------------- # Panel @@ -1494,16 +1548,38 @@ def f(self, other, axis=0): return f -panel_special_funcs = dict(arith_method=_arith_method_PANEL, - comp_method=_comp_method_PANEL, - bool_method=_arith_method_PANEL) - -panel_flex_funcs = dict(flex_arith_method=_flex_method_PANEL, - flex_comp_method=_comp_method_PANEL) - # ----------------------------------------------------------------------------- # Sparse +def _cast_sparse_series_op(left, right, opname): + """ + For SparseSeries operation, coerce to float64 if the result is expected + to have NaN or inf values + + Parameters + ---------- + left : SparseArray + right : SparseArray + opname : str + + Returns + ------- + left : SparseArray + right : SparseArray + """ + opname = opname.strip('_') + + if is_integer_dtype(left) and is_integer_dtype(right): + # series coerces to float64 if result should have NaN/inf + if opname in ('floordiv', 'mod') and (right.values == 0).any(): + left = left.astype(np.float64) + right = right.astype(np.float64) + elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any(): + left = left.astype(np.float64) + right = right.astype(np.float64) + + return left, right + def _arith_method_SPARSE_SERIES(cls, op, special): """ @@ -1539,8 +1615,8 @@ def _sparse_series_op(left, right, op, name): new_name = com._maybe_match_name(left, right) from pandas.core.sparse.array import _sparse_array_op - result = _sparse_array_op(left.values, right.values, op, name, - series=True) + lvalues, rvalues = _cast_sparse_series_op(left.values, right.values, name) + result = _sparse_array_op(lvalues, rvalues, op, name) return left._constructor(result, index=new_index, name=new_name) @@ -1562,7 +1638,7 @@ def wrapper(self, other): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, dtype=dtype) - return _sparse_array_op(self, other, op, name, series=False) + return _sparse_array_op(self, other, op, name) elif is_scalar(other): with np.errstate(all='ignore'): fill = op(_get_fill(self), np.asarray(other)) @@ -1575,13 +1651,3 @@ def wrapper(self, other): wrapper.__name__ = name return wrapper - - -sparse_array_special_funcs = dict(arith_method=_arith_method_SPARSE_ARRAY, - comp_method=_arith_method_SPARSE_ARRAY, - bool_method=_arith_method_SPARSE_ARRAY) - -sparse_series_special_funcs = dict(arith_method=_arith_method_SPARSE_SERIES, - comp_method=_arith_method_SPARSE_SERIES, - bool_method=_bool_method_SERIES) -# TODO: I don't think the functions defined by bool_method are tested diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 3be1e3ef8734d..fc7fad861df44 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1527,8 +1527,8 @@ def _extract_axis(self, data, axis=0, intersect=False): slicers={'major_axis': 'index', 'minor_axis': 'columns'}) -ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) -ops.add_flex_arithmetic_methods(Panel, **ops.panel_flex_funcs) +ops.add_special_arithmetic_methods(Panel) +ops.add_flex_arithmetic_methods(Panel) Panel._add_numeric_operations() diff --git a/pandas/core/series.py b/pandas/core/series.py index 90dc14836ab55..72e3891e0db9a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3092,8 +3092,8 @@ def to_period(self, freq=None, copy=True): Series._add_series_or_dataframe_operations() # Add arithmetic! -ops.add_flex_arithmetic_methods(Series, **ops.series_flex_funcs) -ops.add_special_arithmetic_methods(Series, **ops.series_special_funcs) +ops.add_flex_arithmetic_methods(Series) +ops.add_special_arithmetic_methods(Series) # ----------------------------------------------------------------------------- diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 92c4fe932f066..5532d7522cd2d 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -53,20 +53,11 @@ def _get_fill(arr): return np.asarray(arr.fill_value) -def _sparse_array_op(left, right, op, name, series=False): +def _sparse_array_op(left, right, op, name): if name.startswith('__'): # For lookups in _libs.sparse we need non-dunder op name name = name[2:-2] - if series and is_integer_dtype(left) and is_integer_dtype(right): - # series coerces to float64 if result should have NaN/inf - if name in ('floordiv', 'mod') and (right.values == 0).any(): - left = left.astype(np.float64) - right = right.astype(np.float64) - elif name in ('rfloordiv', 'rmod') and (left.values == 0).any(): - left = left.astype(np.float64) - right = right.astype(np.float64) - # dtype used to find corresponding sparse method if not is_dtype_equal(left.dtype, right.dtype): dtype = find_common_type([left.dtype, right.dtype]) @@ -850,5 +841,4 @@ def _make_index(length, indices, kind): return index -ops.add_special_arithmetic_methods(SparseArray, - **ops.sparse_array_special_funcs) +ops.add_special_arithmetic_methods(SparseArray) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 872a17d8dbabe..d89b1d681c478 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -1014,5 +1014,5 @@ def homogenize(series_dict): # use unaccelerated ops for sparse objects -ops.add_flex_arithmetic_methods(SparseDataFrame, **ops.frame_flex_funcs) -ops.add_special_arithmetic_methods(SparseDataFrame, **ops.frame_special_funcs) +ops.add_flex_arithmetic_methods(SparseDataFrame) +ops.add_special_arithmetic_methods(SparseDataFrame) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 26cf9dbadbbf2..7a1496bf11117 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -812,6 +812,5 @@ def from_coo(cls, A, dense_index=False): # overwrite series methods with unaccelerated Sparse-specific versions -ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs) -ops.add_special_arithmetic_methods(SparseSeries, - **ops.sparse_series_special_funcs) +ops.add_flex_arithmetic_methods(SparseSeries) +ops.add_special_arithmetic_methods(SparseSeries) From 042b57d096c9d62e7ecff9e0f7760f724e4a0cdc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 23 Feb 2018 09:56:27 -0800 Subject: [PATCH 2/3] combine get-wrapper functions --- pandas/core/ops.py | 96 ++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 58 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index be6f11e0c7d75..a0f46af2359d7 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -567,10 +567,10 @@ def mask_cmp_op(x, y, op, allowed_types): # Functions that add arithmetic methods to objects, given arithmetic factory # methods -def _get_flex_wrappers(cls): +def _get_method_wrappers(cls): """ - Find the appropriate operation-wrappers to use when defining flex - arithmetic or boolean operations with the given class. + Find the appropriate operation-wrappers to use when defining flex/special + arithmetic, boolean, and comparison operations with the given class. Parameters ---------- @@ -578,72 +578,52 @@ def _get_flex_wrappers(cls): Returns ------- - arith_method : function or None - comp_method : function or None + arith_flex : function or None + comp_flex : function or None + arith_special : function + comp_special : function + bool_special : function Notes ----- None is only returned for SparseArray """ - if issubclass(cls, ABCSeries): - # Same for Series and SparseSeries - # Note this check must come before check for ABCSparseArray - # or else if will incorrectly catch SparseSeries. - arith_method = _flex_method_SERIES - comp_method = _flex_method_SERIES - elif issubclass(cls, ABCSparseArray): - arith_method = None - comp_method = None - elif issubclass(cls, ABCPanel): - arith_method = _flex_method_PANEL - comp_method = _comp_method_PANEL - elif issubclass(cls, ABCDataFrame): - # Same for DataFrame and SparseDataFrame - arith_method = _arith_method_FRAME - comp_method = _flex_comp_method_FRAME - return arith_method, comp_method - - -def _get_special_wrappers(cls): - """ - Find the appropriate operation-wrappers to use when defining special - arithmetic, comparison, or boolean operations with the given class. - - Parameters - ---------- - cls : class - - Returns - ------- - arith_method : function - comp_method : function - bool_method : function - """ if issubclass(cls, ABCSparseSeries): # Be sure to catch this before ABCSeries and ABCSparseArray, # as they will both come see SparseSeries as a subclass - arith_method = _arith_method_SPARSE_SERIES - comp_method = _arith_method_SPARSE_SERIES - bool_method = _bool_method_SERIES + arith_flex = _flex_method_SERIES + comp_flex = _flex_method_SERIES + arith_special = _arith_method_SPARSE_SERIES + comp_special = _arith_method_SPARSE_SERIES + bool_special = _bool_method_SERIES # TODO: I don't think the functions defined by bool_method are tested + elif issubclass(cls, ABCSeries): + # Just Series; SparseSeries is caught above + arith_flex = _flex_method_SERIES + comp_flex = _flex_method_SERIES + arith_special = _arith_method_SERIES + comp_special = _comp_method_SERIES + bool_special = _bool_method_SERIES elif issubclass(cls, ABCSparseArray): - arith_method = _arith_method_SPARSE_ARRAY - comp_method = _arith_method_SPARSE_ARRAY - bool_method = _arith_method_SPARSE_ARRAY + arith_flex = None + comp_flex = None + arith_special = _arith_method_SPARSE_ARRAY + comp_special = _arith_method_SPARSE_ARRAY + bool_special = _arith_method_SPARSE_ARRAY elif issubclass(cls, ABCPanel): - arith_method = _arith_method_PANEL - comp_method = _comp_method_PANEL - bool_method = _arith_method_PANEL + arith_flex = _flex_method_PANEL + comp_flex = _comp_method_PANEL + arith_special = _arith_method_PANEL + comp_special = _comp_method_PANEL + bool_special = _arith_method_PANEL elif issubclass(cls, ABCDataFrame): # Same for DataFrame and SparseDataFrame - arith_method = _arith_method_FRAME - comp_method = _comp_method_FRAME - bool_method = _arith_method_FRAME - elif issubclass(cls, ABCSeries): - arith_method = _arith_method_SERIES - comp_method = _comp_method_SERIES - bool_method = _bool_method_SERIES - return arith_method, comp_method, bool_method + arith_flex = _arith_method_FRAME + comp_flex = _flex_comp_method_FRAME + arith_special = _arith_method_FRAME + comp_special = _comp_method_FRAME + bool_special = _arith_method_FRAME + return arith_flex, comp_flex, arith_special, comp_special, bool_special def _create_methods(cls, arith_method, comp_method, bool_method, @@ -731,7 +711,7 @@ def add_special_arithmetic_methods(cls): cls : class special methods will be defined and pinned to this class """ - arith_method, comp_method, bool_method = _get_special_wrappers(cls) + arith_method, comp_method, bool_method = _get_method_wrappers(cls)[2:] new_methods = _create_methods(cls, arith_method, comp_method, bool_method, special=True) # inplace operators (I feel like these should get passed an `inplace=True` @@ -783,7 +763,7 @@ def add_flex_arithmetic_methods(cls): cls : class flex methods will be defined and pinned to this class """ - flex_arith_method, flex_comp_method = _get_flex_wrappers(cls) + flex_arith_method, flex_comp_method = _get_method_wrappers(cls)[:2] new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, bool_method=None, special=False) From 978a5197dcea5d05c0304c8ea502e2cc08528112 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 24 Feb 2018 11:19:40 -0800 Subject: [PATCH 3/3] underscores --- pandas/core/ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 9ec396decdc7f..7bdbac66b4f31 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -855,7 +855,7 @@ def add_special_arithmetic_methods(cls): cls : class special methods will be defined and pinned to this class """ - arith_method, comp_method, bool_method = _get_method_wrappers(cls)[2:] + _, _, arith_method, comp_method, bool_method = _get_method_wrappers(cls) new_methods = _create_methods(cls, arith_method, comp_method, bool_method, special=True) # inplace operators (I feel like these should get passed an `inplace=True` @@ -907,7 +907,7 @@ def add_flex_arithmetic_methods(cls): cls : class flex methods will be defined and pinned to this class """ - flex_arith_method, flex_comp_method = _get_method_wrappers(cls)[:2] + flex_arith_method, flex_comp_method, _, _, _ = _get_method_wrappers(cls) new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, bool_method=None, special=False)