From 8a63ad210c85f3372d2336414f604dbe842b0a1d Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Nov 2020 16:01:58 -0800 Subject: [PATCH 1/5] REF: Share code between NumericIndex subclasses --- pandas/core/indexes/base.py | 13 +++++- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/numeric.py | 72 +++++++---------------------- pandas/core/indexes/period.py | 2 +- 4 files changed, 30 insertions(+), 59 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7b72196c3c2f3..7827cd9bfe332 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5145,7 +5145,7 @@ def isin(self, values, level=None): """ if level is not None: self._validate_index_level(level) - return algos.isin(self, values) + return algos.isin(self._values, values) def _get_string_slice(self, key: str_t): # this is for partial string indexing, @@ -5523,6 +5523,17 @@ def _cmp_method(self, other, op): """ Wrapper used to dispatch comparison operations. """ + if self.is_(other): + # fastpath + if op in {operator.eq, operator.le, operator.ge}: + arr = np.ones(len(self), dtype=bool) + if self._can_hold_na and not isinstance(self, ABCMultiIndex): + # TODO: should set MultiIndex._can_hold_na = False? + arr[self.isna()] = False + return arr + elif op in {operator.ne, operator.lt, operator.gt}: + return np.zeros(len(self), dtype=bool) + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): if len(self) != len(other): raise ValueError("Lengths must match to compare") diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ce5d62aec4f9f..d7a6849da924c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -681,7 +681,7 @@ def _convert_arr_indexer(self, keyarr): return com.asarray_tuplesafe(keyarr) -class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): +class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin): """ Mixin class for methods shared by DatetimeIndex and TimedeltaIndex, but not PeriodIndex diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 24aaf5885fe0e..8a1bcc7146616 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -1,4 +1,3 @@ -import operator from typing import Any import warnings @@ -6,7 +5,7 @@ from pandas._libs import index as libindex, lib from pandas._typing import Dtype, Label -from pandas.util._decorators import cache_readonly, doc +from pandas.util._decorators import doc from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( @@ -27,7 +26,6 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna -from pandas.core import algorithms import pandas.core.common as com from pandas.core.indexes.base import Index, maybe_extract_name @@ -189,18 +187,6 @@ def _union(self, other, sort): else: return super()._union(other, sort) - def _cmp_method(self, other, op): - if self.is_(other): # fastpath - if op in {operator.eq, operator.le, operator.ge}: - arr = np.ones(len(self), dtype=bool) - if self._can_hold_na: - arr[self.isna()] = False - return arr - elif op in {operator.ne, operator.lt, operator.gt}: - return np.zeros(len(self), dtype=bool) - - return super()._cmp_method(other, op) - _num_index_shared_docs[ "class_descr" @@ -244,6 +230,20 @@ class IntegerIndex(NumericIndex): """ _default_dtype: np.dtype + _can_hold_na = False + + @classmethod + def _assert_safe_casting(cls, data, subarr): + """ + Ensure incoming data can be represented with matching signed-ness. + """ + if data.dtype.kind != cls._default_dtype.kind: + if not np.array_equal(data, subarr): + raise TypeError("Unsafe NumPy casting, you must explicitly cast") + + def _can_union_without_object_cast(self, other) -> bool: + # See GH#26778, further casting may occur in NumericIndex._union + return other.dtype == "f8" or other.dtype == self.dtype def __contains__(self, key) -> bool: """ @@ -279,23 +279,9 @@ class Int64Index(IntegerIndex): __doc__ = _num_index_shared_docs["class_descr"] % _int64_descr_args _typ = "int64index" - _can_hold_na = False _engine_type = libindex.Int64Engine _default_dtype = np.dtype(np.int64) - @classmethod - def _assert_safe_casting(cls, data, subarr): - """ - Ensure incoming data can be represented as ints. - """ - if not issubclass(data.dtype.type, np.signedinteger): - if not np.array_equal(data, subarr): - raise TypeError("Unsafe NumPy casting, you must explicitly cast") - - def _can_union_without_object_cast(self, other) -> bool: - # See GH#26778, further casting may occur in NumericIndex._union - return other.dtype == "f8" or other.dtype == self.dtype - _uint64_descr_args = dict( klass="UInt64Index", ltype="unsigned integer", dtype="uint64", extra="" @@ -306,7 +292,6 @@ class UInt64Index(IntegerIndex): __doc__ = _num_index_shared_docs["class_descr"] % _uint64_descr_args _typ = "uint64index" - _can_hold_na = False _engine_type = libindex.UInt64Engine _default_dtype = np.dtype(np.uint64) @@ -325,21 +310,6 @@ def _convert_arr_indexer(self, keyarr): return com.asarray_tuplesafe(keyarr, dtype=dtype) - # ---------------------------------------------------------------- - - @classmethod - def _assert_safe_casting(cls, data, subarr): - """ - Ensure incoming data can be represented as uints. - """ - if not issubclass(data.dtype.type, np.unsignedinteger): - if not np.array_equal(data, subarr): - raise TypeError("Unsafe NumPy casting, you must explicitly cast") - - def _can_union_without_object_cast(self, other) -> bool: - # See GH#26778, further casting may occur in NumericIndex._union - return other.dtype == "f8" or other.dtype == self.dtype - _float64_descr_args = dict( klass="Float64Index", dtype="float64", ltype="float", extra="" @@ -351,7 +321,7 @@ class Float64Index(NumericIndex): _typ = "float64index" _engine_type = libindex.Float64Engine - _default_dtype = np.float64 + _default_dtype = np.dtype(np.float64) @property def inferred_type(self) -> str: @@ -430,16 +400,6 @@ def __contains__(self, other: Any) -> bool: return is_float(other) and np.isnan(other) and self.hasnans - @cache_readonly - def is_unique(self) -> bool: - return super().is_unique and self._nan_idxs.size < 2 - - @doc(Index.isin) - def isin(self, values, level=None): - if level is not None: - self._validate_index_level(level) - return algorithms.isin(np.array(self), values) - def _can_union_without_object_cast(self, other) -> bool: # See GH#26778, further casting may occur in NumericIndex._union return is_numeric_dtype(other.dtype) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index e25119162368f..9851ed103f639 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -65,7 +65,7 @@ def _new_PeriodIndex(cls, **d): wrap=True, ) @inherit_names(["is_leap_year", "_format_native_types"], PeriodArray) -class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): +class PeriodIndex(DatetimeIndexOpsMixin): """ Immutable ndarray holding ordinal values indicating regular periods in time. From db0a9de86593ec01839fbd1dcc336cb3cae88aff Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Nov 2020 16:08:22 -0800 Subject: [PATCH 2/5] restore --- pandas/core/indexes/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 9851ed103f639..e25119162368f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -65,7 +65,7 @@ def _new_PeriodIndex(cls, **d): wrap=True, ) @inherit_names(["is_leap_year", "_format_native_types"], PeriodArray) -class PeriodIndex(DatetimeIndexOpsMixin): +class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in time. From 084a5b2c556f5331766d2fbd664f6b7d54749bba Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Nov 2020 19:41:06 -0800 Subject: [PATCH 3/5] mypy fixup --- pandas/core/indexes/datetimelike.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 87efa66084408..28e6117b5aa27 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -853,11 +853,7 @@ def _union(self, other, sort): i8self = Int64Index._simple_new(self.asi8) i8other = Int64Index._simple_new(other.asi8) i8result = i8self._union(i8other, sort=sort) - # pandas\core\indexes\datetimelike.py:887: error: Unexpected - # keyword argument "freq" for "DatetimeTimedeltaMixin" [call-arg] - result = type(self)( - i8result, dtype=self.dtype, freq="infer" # type: ignore[call-arg] - ) + result = type(self)(i8result, dtype=self.dtype, freq="infer") return result # -------------------------------------------------------------------- From 730c816ec072caf8942afe65ab0bd108dca8d2e3 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Nov 2020 21:09:14 -0800 Subject: [PATCH 4/5] fix pytables tests --- pandas/core/computation/pytables.py | 6 ++++++ pandas/core/indexes/datetimelike.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 6ec637a8b4845..302e2119775b2 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -430,6 +430,12 @@ def visit_Subscript(self, node, **kwargs): except AttributeError: pass + if isinstance(slobj, Term) and slobj.type is int: + # Avoid IndexError: only integers, slices (`:`), ellipsis (`...`), + # numpy.newaxis (`None`) and integer or boolean arrays are valid + # indices + slobj = slobj.value + try: return self.const_type(value[slobj], self.env) except TypeError as err: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 28e6117b5aa27..9e487051f37f7 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -628,7 +628,7 @@ def _convert_arr_indexer(self, keyarr): return com.asarray_tuplesafe(keyarr) -class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin): +class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): """ Mixin class for methods shared by DatetimeIndex and TimedeltaIndex, but not PeriodIndex From ffb01017228561306135be45e31a806f2bf3c1c8 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 24 Nov 2020 07:39:52 -0800 Subject: [PATCH 5/5] mypy fixup --- pandas/core/indexes/datetimelike.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9e487051f37f7..57f6a8ea0cca5 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -157,16 +157,8 @@ def equals(self, other: object) -> bool: elif other.dtype.kind in ["f", "i", "u", "c"]: return False elif not isinstance(other, type(self)): - inferrable = [ - "timedelta", - "timedelta64", - "datetime", - "datetime64", - "date", - "period", - ] - should_try = False + inferrable = self._data._infer_matches if other.dtype == object: should_try = other.inferred_type in inferrable elif is_categorical_dtype(other.dtype): @@ -648,6 +640,9 @@ def _has_complex_internals(self) -> bool: # used to avoid libreduction code paths, which raise or require conversion return False + def is_type_compatible(self, kind: str) -> bool: + return kind in self._data._infer_matches + # -------------------------------------------------------------------- # Set Operation Methods @@ -853,7 +848,11 @@ def _union(self, other, sort): i8self = Int64Index._simple_new(self.asi8) i8other = Int64Index._simple_new(other.asi8) i8result = i8self._union(i8other, sort=sort) - result = type(self)(i8result, dtype=self.dtype, freq="infer") + # pandas\core\indexes\datetimelike.py:887: error: Unexpected + # keyword argument "freq" for "DatetimeTimedeltaMixin" [call-arg] + result = type(self)( + i8result, dtype=self.dtype, freq="infer" # type: ignore[call-arg] + ) return result # --------------------------------------------------------------------