Skip to content
14 changes: 5 additions & 9 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@
RangeIndex,
Series,
)
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin


__all__ = ["Index"]
Expand Down Expand Up @@ -305,7 +304,7 @@ def _outer_indexer(

_typ = "index"
_data: Union[ExtensionArray, np.ndarray]
_id: Optional[_Identity] = None
_id: Optional[object] = None
_name: Hashable = None
# MultiIndex.levels previously allowed setting the index name. We
# don't allow this anymore, and raise if it happens rather than
Expand Down Expand Up @@ -711,7 +710,7 @@ def _reset_identity(self) -> None:
"""
Initializes or resets ``_id`` attribute with new object.
"""
self._id = _Identity(object())
self._id = object()

@final
def _cleanup(self) -> None:
Expand Down Expand Up @@ -1717,7 +1716,7 @@ def sortlevel(self, level=None, ascending=True, sort_remaining=None):

return self.sort_values(return_indexer=True, ascending=ascending)

def _get_level_values(self, level):
def _get_level_values(self, level) -> Index:
"""
Return an Index of values for requested level.

Expand Down Expand Up @@ -2977,11 +2976,8 @@ def _union(self, other: Index, sort):
return result

@final
def _wrap_setop_result(self, other, result):
if needs_i8_conversion(self.dtype) and isinstance(result, np.ndarray):
self = cast("DatetimeIndexOpsMixin", self)
result = type(self._data)._simple_new(result, dtype=self.dtype)
elif is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
def _wrap_setop_result(self, other: Index, result) -> Index:
if is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
result = Categorical(result, dtype=self.dtype)

name = get_op_result_name(self, other)
Expand Down
13 changes: 1 addition & 12 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class CategoricalIndex(NDArrayBackedExtensionIndex, accessor.PandasDelegate):
"""

_typ = "categoricalindex"
_data_cls = Categorical

@property
def _can_hold_strings(self):
Expand Down Expand Up @@ -225,18 +226,6 @@ def __new__(

return cls._simple_new(data, name=name)

@classmethod
def _simple_new(cls, values: Categorical, name: Optional[Hashable] = None):
assert isinstance(values, Categorical), type(values)
result = object.__new__(cls)

result._data = values
result._name = name
result._cache = {}

result._reset_identity()
return result

# --------------------------------------------------------------------

@doc(Index._shallow_copy)
Expand Down
29 changes: 2 additions & 27 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
from typing import (
TYPE_CHECKING,
Any,
Hashable,
List,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
Expand Down Expand Up @@ -44,7 +42,6 @@
is_integer,
is_list_like,
is_period_dtype,
is_scalar,
)
from pandas.core.dtypes.concat import concat_compat

Expand Down Expand Up @@ -119,7 +116,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):

_can_hold_strings = False
_data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
_data_cls: Union[Type[DatetimeArray], Type[TimedeltaArray], Type[PeriodArray]]
freq: Optional[BaseOffset]
freqstr: Optional[str]
_resolution_obj: Resolution
Expand All @@ -132,25 +128,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
)
_hasnans = hasnans # for index / array -agnostic code

@classmethod
def _simple_new(
cls,
values: Union[DatetimeArray, TimedeltaArray, PeriodArray],
name: Optional[Hashable] = None,
):
assert isinstance(values, cls._data_cls), type(values)

result = object.__new__(cls)
result._data = values
result._name = name
result._cache = {}

# For groupby perf. See note in indexes/base about _index_data
result._index_data = values._ndarray

result._reset_identity()
return result

@property
def _is_all_dates(self) -> bool:
return True
Expand Down Expand Up @@ -219,12 +196,10 @@ def equals(self, other: Any) -> bool:
def __contains__(self, key: Any) -> bool:
hash(key)
try:
res = self.get_loc(key)
self.get_loc(key)
except (KeyError, TypeError, ValueError):
return False
return bool(
is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is some crazy condition, good to remove it

)
return True

@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
Expand Down
36 changes: 35 additions & 1 deletion pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
Shared methods for Index subclasses backed by ExtensionArray.
"""
from typing import (
Hashable,
List,
Type,
TypeVar,
Union,
)
Expand Down Expand Up @@ -30,7 +32,13 @@
ABCSeries,
)

from pandas.core.arrays import IntervalArray
from pandas.core.arrays import (
Categorical,
DatetimeArray,
IntervalArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.indexers import deprecate_ndim_indexing
from pandas.core.indexes.base import Index
Expand Down Expand Up @@ -352,6 +360,32 @@ class NDArrayBackedExtensionIndex(ExtensionIndex):

_data: NDArrayBackedExtensionArray

_data_cls: Union[
Type[Categorical],
Type[DatetimeArray],
Type[TimedeltaArray],
Type[PeriodArray],
]

@classmethod
def _simple_new(
cls,
values: NDArrayBackedExtensionArray,
name: Hashable = None,
):
assert isinstance(values, cls._data_cls), type(values)

result = object.__new__(cls)
result._data = values
result._name = name
result._cache = {}

# For groupby perf. See note in indexes/base about _index_data
result._index_data = values._ndarray

result._reset_identity()
return result

def _get_engine_target(self) -> np.ndarray:
return self._data._ndarray

Expand Down
38 changes: 21 additions & 17 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
TYPE_CHECKING,
Any,
Callable,
Collection,
Hashable,
Iterable,
List,
Expand Down Expand Up @@ -98,6 +99,7 @@
if TYPE_CHECKING:
from pandas import (
CategoricalIndex,
DataFrame,
Series,
)

Expand Down Expand Up @@ -323,7 +325,7 @@ def __new__(
if len(levels) == 0:
raise ValueError("Must pass non-zero number of levels/codes")

result = object.__new__(MultiIndex)
result = object.__new__(cls)
result._cache = {}

# we've already validated levels and codes, so shortcut here
Expand Down Expand Up @@ -503,7 +505,7 @@ def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex
@names_compat
def from_tuples(
cls,
tuples,
tuples: Iterable[Tuple[Hashable, ...]],
sortorder: Optional[int] = None,
names: Optional[Sequence[Hashable]] = None,
) -> MultiIndex:
Expand Down Expand Up @@ -546,6 +548,7 @@ def from_tuples(
raise TypeError("Input must be a list / sequence of tuple-likes.")
elif is_iterator(tuples):
tuples = list(tuples)
tuples = cast(Collection[Tuple[Hashable, ...]], tuples)

arrays: List[Sequence[Hashable]]
if len(tuples) == 0:
Expand All @@ -560,7 +563,8 @@ def from_tuples(
elif isinstance(tuples, list):
arrays = list(lib.to_object_array_tuples(tuples).T)
else:
arrays = zip(*tuples)
arrs = zip(*tuples)
arrays = cast(List[Sequence[Hashable]], arrs)

return cls.from_arrays(arrays, sortorder=sortorder, names=names)

Expand Down Expand Up @@ -626,7 +630,7 @@ def from_product(
return cls(levels, codes, sortorder=sortorder, names=names)

@classmethod
def from_frame(cls, df, sortorder=None, names=None) -> MultiIndex:
def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:
"""
Make a MultiIndex from a DataFrame.

Expand Down Expand Up @@ -762,7 +766,7 @@ def __len__(self) -> int:
# Levels Methods

@cache_readonly
def levels(self):
def levels(self) -> FrozenList:
# Use cache_readonly to ensure that self.get_locs doesn't repeatedly
# create new IndexEngine
# https://github.com/pandas-dev/pandas/issues/31648
Expand Down Expand Up @@ -1293,7 +1297,7 @@ def _formatter_func(self, tup):
formatter_funcs = [level._formatter_func for level in self.levels]
return tuple(func(val) for func, val in zip(formatter_funcs, tup))

def _format_data(self, name=None):
def _format_data(self, name=None) -> str:
"""
Return the formatted data as a unicode string
"""
Expand Down Expand Up @@ -1419,10 +1423,10 @@ def format(
# --------------------------------------------------------------------
# Names Methods

def _get_names(self):
def _get_names(self) -> FrozenList:
return FrozenList(self._names)

def _set_names(self, names, level=None, validate=True):
def _set_names(self, names, level=None, validate: bool = True):
"""
Set new names on index. Each name has to be a hashable type.

Expand All @@ -1433,7 +1437,7 @@ def _set_names(self, names, level=None, validate=True):
level : int, level name, or sequence of int/level names (default None)
If the index is a MultiIndex (hierarchical), level(s) to set (None
for all levels). Otherwise level must be None
validate : boolean, default True
validate : bool, default True
validate that the names match level lengths

Raises
Expand Down Expand Up @@ -1712,7 +1716,7 @@ def unique(self, level=None):
level = self._get_level_number(level)
return self._get_level_values(level=level, unique=True)

def to_frame(self, index=True, name=None):
def to_frame(self, index=True, name=None) -> DataFrame:
"""
Create a DataFrame with the levels of the MultiIndex as columns.

Expand Down Expand Up @@ -2109,8 +2113,8 @@ def take(

na_value = -1

taken = [lab.take(indices) for lab in self.codes]
if allow_fill:
taken = [lab.take(indices) for lab in self.codes]
mask = indices == -1
if mask.any():
masked = []
Expand All @@ -2119,8 +2123,6 @@ def take(
label_values[mask] = na_value
masked.append(np.asarray(label_values))
taken = masked
else:
taken = [lab.take(indices) for lab in self.codes]

return MultiIndex(
levels=self.levels, codes=taken, names=self.names, verify_integrity=False
Expand Down Expand Up @@ -2644,7 +2646,9 @@ def _get_partial_string_timestamp_match_key(self, key):

return key

def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None):
def _get_indexer(
self, target: Index, method=None, limit=None, tolerance=None
) -> np.ndarray:

# empty indexer
if not len(target):
Expand Down Expand Up @@ -3521,7 +3525,7 @@ def equals(self, other: object) -> bool:

return True

def equal_levels(self, other) -> bool:
def equal_levels(self, other: MultiIndex) -> bool:
"""
Return True if the levels of both MultiIndex objects are the same

Expand All @@ -3537,7 +3541,7 @@ def equal_levels(self, other) -> bool:
# --------------------------------------------------------------------
# Set Methods

def _union(self, other, sort):
def _union(self, other, sort) -> MultiIndex:
other, result_names = self._convert_can_do_setop(other)

# We could get here with CategoricalIndex other
Expand Down Expand Up @@ -3579,7 +3583,7 @@ def _maybe_match_names(self, other):
names.append(None)
return names

def _intersection(self, other, sort=False):
def _intersection(self, other, sort=False) -> MultiIndex:
other, result_names = self._convert_can_do_setop(other)

lvals = self._values
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def __contains__(self, key) -> bool:
hash(key)
try:
if is_float(key) and int(key) != key:
# otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
return False
return key in self._engine
except (OverflowError, TypeError, ValueError):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class RangeIndex(Int64Index):

Parameters
----------
start : int (default: 0), or other RangeIndex instance
start : int (default: 0), range, or other RangeIndex instance
If int and "stop" is not given, interpreted as "stop" instead.
stop : int (default: 0)
step : int (default: 1)
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,6 @@ def test_intercept_builtin_sum():
tm.assert_series_equal(result2, expected)


# @pytest.mark.parametrize("f", [max, min, sum])
# def test_builtins_apply(f):


@pytest.mark.parametrize("f", [max, min, sum])
@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key
def test_builtins_apply(keys, f):
Expand Down