-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: NumericIndex for any numpy int/uint/float dtype #41153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9739f85
88a7858
7ccb1b7
b45500d
5ef35f5
d8f6c22
1c65a0b
f3e13aa
4e17485
c1e801d
aa0cea7
1f5f922
132ce44
058cd2e
1c7f23f
07a097c
ff6cfb4
fe7b97c
86f3960
2424c0d
a515bba
341fc2f
c2d8884
5a56b1a
a497d57
6557689
5bc4c2c
84bf540
69953b4
b4be77d
bb42e2d
bafa9b3
35b0e71
ed4730b
6a32788
47e208c
d6a03a0
ec003ed
7ddee71
2bb282f
c1633fb
9c7d57b
f6dccc1
3630fc7
bfe6895
8532ddb
186de8e
ead8f57
2a850ea
d04da70
4b8385c
1f52f8b
951c5f7
7c7c0dd
bb72c68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -265,6 +265,9 @@ ctypedef fused join_t: | |
int16_t | ||
int32_t | ||
int64_t | ||
uint8_t | ||
uint16_t | ||
uint32_t | ||
uint64_t | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,6 +57,7 @@ | |
Int64Index, | ||
IntervalIndex, | ||
MultiIndex, | ||
NumericIndex, | ||
PeriodIndex, | ||
RangeIndex, | ||
TimedeltaIndex, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,6 +80,7 @@ | |
is_interval_dtype, | ||
is_iterator, | ||
is_list_like, | ||
is_numeric_dtype, | ||
is_object_dtype, | ||
is_scalar, | ||
is_signed_integer_dtype, | ||
|
@@ -359,6 +360,11 @@ def _outer_indexer( | |
_can_hold_na: bool = True | ||
_can_hold_strings: bool = True | ||
|
||
# Whether this index is a NumericIndex, but not a Int64Index, Float64Index, | ||
# UInt64Index or RangeIndex. Needed for backwards compat. Remove this attribute and | ||
# associated code in pandas 2.0. | ||
_is_backward_compat_public_numeric_index: bool = False | ||
|
||
_engine_type: type[libindex.IndexEngine] = libindex.ObjectEngine | ||
# whether we support partial string indexing. Overridden | ||
# in DatetimeIndex and PeriodIndex | ||
|
@@ -436,6 +442,12 @@ def __new__( | |
return Index._simple_new(data, name=name) | ||
|
||
# index-like | ||
elif ( | ||
isinstance(data, Index) | ||
and data._is_backward_compat_public_numeric_index | ||
and dtype is None | ||
): | ||
return data._constructor(data, name=name, copy=copy) | ||
elif isinstance(data, (np.ndarray, Index, ABCSeries)): | ||
|
||
if isinstance(data, ABCMultiIndex): | ||
|
@@ -5719,6 +5731,11 @@ def map(self, mapper, na_action=None): | |
# empty | ||
attributes["dtype"] = self.dtype | ||
|
||
if self._is_backward_compat_public_numeric_index and is_numeric_dtype( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why can't this be handled in the Index constructor itself? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's the same thing as the others: The Index constructor returns Int64Index etc. in order to maintain backward compat. When we remove Int64Index etc. in pandas 2.0, this block can be removed, see comment on line 363 regarding this. |
||
new_values.dtype | ||
): | ||
return self._constructor(new_values, **attributes) | ||
|
||
return Index(new_values, **attributes) | ||
|
||
# TODO: De-duplicate with map, xref GH#32349 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
from pandas.core.dtypes.common import ( | ||
is_categorical_dtype, | ||
is_scalar, | ||
pandas_dtype, | ||
) | ||
from pandas.core.dtypes.missing import ( | ||
is_valid_na_for_dtype, | ||
|
@@ -280,6 +281,30 @@ def _is_dtype_compat(self, other) -> Categorical: | |
|
||
return other | ||
|
||
@doc(Index.astype) | ||
def astype(self, dtype: Dtype, copy: bool = True) -> Index: | ||
from pandas.core.api import NumericIndex | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can annotate dtype as Dtype There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we change/deprecate Index.astype(np.int32), also Index(foo, dtype=np.int32)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doing that as a follow-up sounds good |
||
|
||
dtype = pandas_dtype(dtype) | ||
|
||
categories = self.categories | ||
# the super method always returns Int64Index, UInt64Index and Float64Index | ||
# but if the categories are a NumericIndex with dtype float32, we want to | ||
# return an index with the same dtype as self.categories. | ||
if categories._is_backward_compat_public_numeric_index: | ||
assert isinstance(categories, NumericIndex) # mypy complaint fix | ||
try: | ||
categories._validate_dtype(dtype) | ||
except ValueError: | ||
pass | ||
else: | ||
new_values = self._data.astype(dtype, copy=copy) | ||
# pass copy=False because any copying has been done in the | ||
# _data.astype call above | ||
return categories._constructor(new_values, name=self.name, copy=False) | ||
|
||
return super().astype(dtype, copy=copy) | ||
|
||
def equals(self, other: object) -> bool: | ||
""" | ||
Determine if two CategoricalIndex objects contain the same elements. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -97,6 +97,7 @@ class NumericIndex(Index): | |
) | ||
_is_numeric_dtype = True | ||
_can_hold_strings = False | ||
_is_backward_compat_public_numeric_index: bool = True | ||
|
||
@cache_readonly | ||
def _can_hold_na(self) -> bool: | ||
|
@@ -165,7 +166,15 @@ def _ensure_array(cls, data, dtype, copy: bool): | |
dtype = cls._ensure_dtype(dtype) | ||
|
||
if copy or not is_dtype_equal(data.dtype, dtype): | ||
subarr = np.array(data, dtype=dtype, copy=copy) | ||
# TODO: the try/except below is because it's difficult to predict the error | ||
# and/or error message from different combinations of data and dtype. | ||
# Efforts to avoid this try/except welcome. | ||
# See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222 | ||
try: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do you think you need this try/except? e.g. validate_dtype should raise correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are a lot of combinations of >>> data = [pd.Timestamp("20130101")]
>>> NumericIndex(data)
>>> NumericIndex(data, dtype="float64")
>>> NumericIndex(np.array(data))
>>> Float64Index(data) may not give the same error or error message. I've tried again now and can't seem to find anything clearer. I'm not so happy about it myself, but there just seems to be very difficult to improve (for me, maybe someone else can find something better). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i c, ok, can you comment on this (as we may want to try to figure this out in the future) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a "TODO" to this comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. |
||
subarr = np.array(data, dtype=dtype, copy=copy) | ||
cls._validate_dtype(subarr.dtype) | ||
except (TypeError, ValueError): | ||
raise ValueError(f"data is not compatible with {cls.__name__}") | ||
cls._assert_safe_casting(data, subarr) | ||
else: | ||
subarr = data | ||
|
@@ -189,12 +198,24 @@ def _validate_dtype(cls, dtype: Dtype | None) -> None: | |
) | ||
|
||
@classmethod | ||
def _ensure_dtype( | ||
cls, | ||
dtype: Dtype | None, | ||
) -> np.dtype | None: | ||
"""Ensure int64 dtype for Int64Index, etc. Assumed dtype is validated.""" | ||
return cls._default_dtype | ||
def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: | ||
""" | ||
Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex. | ||
|
||
Assumes dtype has already been validated. | ||
""" | ||
if dtype is None: | ||
return cls._default_dtype | ||
|
||
dtype = pandas_dtype(dtype) | ||
assert isinstance(dtype, np.dtype) | ||
|
||
if cls._is_backward_compat_public_numeric_index: | ||
# dtype for NumericIndex | ||
return dtype | ||
else: | ||
# dtype for Int64Index, UInt64Index etc. Needed for backwards compat. | ||
return cls._default_dtype | ||
|
||
def __contains__(self, key) -> bool: | ||
""" | ||
|
@@ -214,8 +235,8 @@ def __contains__(self, key) -> bool: | |
|
||
@doc(Index.astype) | ||
def astype(self, dtype, copy=True): | ||
dtype = pandas_dtype(dtype) | ||
if is_float_dtype(self.dtype): | ||
dtype = pandas_dtype(dtype) | ||
if needs_i8_conversion(dtype): | ||
raise TypeError( | ||
f"Cannot convert Float64Index to dtype {dtype}; integer " | ||
|
@@ -225,7 +246,16 @@ def astype(self, dtype, copy=True): | |
# TODO(jreback); this can change once we have an EA Index type | ||
# GH 13149 | ||
arr = astype_nansafe(self._values, dtype=dtype) | ||
return Int64Index(arr, name=self.name) | ||
if isinstance(self, Float64Index): | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return Int64Index(arr, name=self.name) | ||
else: | ||
return NumericIndex(arr, name=self.name, dtype=dtype) | ||
elif self._is_backward_compat_public_numeric_index: | ||
# this block is needed so e.g. NumericIndex[int8].astype("int32") returns | ||
# NumericIndex[int32] and not Int64Index with dtype int64. | ||
# When Int64Index etc. are removed from the code base, removed this also. | ||
if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The super method is So without the lines above e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. kk can you leave a comment to this effect |
||
return self._constructor(self, dtype=dtype, copy=copy) | ||
|
||
return super().astype(dtype, copy=copy) | ||
|
||
|
@@ -335,6 +365,8 @@ class IntegerIndex(NumericIndex): | |
This is an abstract class for Int64Index, UInt64Index. | ||
""" | ||
|
||
_is_backward_compat_public_numeric_index: bool = False | ||
|
||
@property | ||
def asi8(self) -> np.ndarray: | ||
# do not cache or you'll create a memory leak | ||
|
@@ -399,3 +431,4 @@ class Float64Index(NumericIndex): | |
_engine_type = libindex.Float64Engine | ||
_default_dtype = np.dtype(np.float64) | ||
_dtype_validation_metadata = (is_float_dtype, "float") | ||
_is_backward_compat_public_numeric_index: bool = False |
Uh oh!
There was an error while loading. Please reload this page.