-
-
Notifications
You must be signed in to change notification settings - Fork 19k
TYP: ExtensionArray delete() and searchsorted() #41513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
1bf4b5d
16bcc1a
c0d426f
cce79a9
0a9cd7a
f37d215
22b147a
065454f
cde198e
b986eba
2d85ddb
493ab41
f8f8b93
704abef
f8d4267
9c391a0
4e60e04
6a888ef
2370384
42802ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,6 +91,20 @@ | |
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] | ||
Scalar = Union[PythonScalar, PandasScalar] | ||
|
||
# numpy compatible types | ||
NumpyValueArrayLike = Union[PythonScalar, Sequence[PythonScalar]] | ||
NumpySorter = Union[ | ||
int, | ||
np.integer, | ||
bool, | ||
np.ndarray, | ||
Sequence[int], | ||
Sequence[np.integer], | ||
Sequence[bool], | ||
None, | ||
] | ||
|
||
|
||
|
||
# timestamp and timedelta convertible types | ||
|
||
TimestampConvertibleTypes = Union[ | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -30,6 +30,8 @@ | |||||
ArrayLike, | ||||||
DtypeObj, | ||||||
FrameOrSeriesUnion, | ||||||
NumpySorter, | ||||||
NumpyValueArrayLike, | ||||||
Scalar, | ||||||
) | ||||||
from pandas.util._decorators import doc | ||||||
|
@@ -1507,7 +1509,12 @@ def take( | |||||
# ------------ # | ||||||
|
||||||
|
||||||
def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: | ||||||
def searchsorted( | ||||||
arr: ArrayLike, | ||||||
value: NumpyValueArrayLike, | ||||||
side: Literal["left", "right"] = "left", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is npt._SortSide? we probably don't want to use private aliases in the codebase, but maybe OK to do so in pandas._typing. thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't use |
||||||
sorter: NumpySorter = None, | ||||||
) -> np.ndarray: | ||||||
|
) -> np.ndarray: | |
) -> np.ndarray | int: |
although if we return the value from numpy unchanged, the return type is np.intp
?
docstring also needs updating?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is np.intp
in next commit. Will fix docstring.
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
from functools import wraps | ||
from typing import ( | ||
TYPE_CHECKING, | ||
Any, | ||
Sequence, | ||
TypeVar, | ||
|
@@ -13,8 +14,11 @@ | |
from pandas._libs import lib | ||
from pandas._libs.arrays import NDArrayBacked | ||
from pandas._typing import ( | ||
ArrayLike, | ||
F, | ||
NumpySorter, | ||
PositionalIndexer2D, | ||
PythonScalar, | ||
Shape, | ||
type_t, | ||
) | ||
|
@@ -45,6 +49,9 @@ | |
"NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray" | ||
) | ||
|
||
if TYPE_CHECKING: | ||
from typing import Literal | ||
|
||
|
||
def ravel_compat(meth: F) -> F: | ||
""" | ||
|
@@ -176,12 +183,26 @@ def _concat_same_type( | |
return to_concat[0]._from_backing_data(new_values) # type: ignore[arg-type] | ||
|
||
@doc(ExtensionArray.searchsorted) | ||
def searchsorted(self, value, side="left", sorter=None): | ||
value = self._validate_searchsorted_value(value) | ||
return self._ndarray.searchsorted(value, side=side, sorter=sorter) | ||
|
||
def _validate_searchsorted_value(self, value): | ||
return value | ||
def searchsorted( | ||
self, | ||
value: ArrayLike | object, | ||
|
||
side: Literal["left", "right"] = "left", | ||
sorter: NumpySorter = None, | ||
) -> np.ndarray: | ||
|
||
npvalue = self._validate_searchsorted_value(value) | ||
return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter) | ||
|
||
def _validate_searchsorted_value( | ||
self, value: ArrayLike | object | ||
|
||
) -> np.ndarray | PythonScalar: | ||
if isinstance(value, ExtensionArray): | ||
return value.to_numpy() | ||
elif isinstance(value, np.ndarray): | ||
return value | ||
|
||
else: | ||
# While we also can support DatetimeLikeScalar, numpy.searchsorted | ||
# does not accept that as an argument | ||
|
||
return cast(PythonScalar, value) | ||
|
||
|
||
@doc(ExtensionArray.shift) | ||
def shift(self, periods=1, fill_value=None, axis=0): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
ArrayLike, | ||
Dtype, | ||
FillnaOptions, | ||
NumpySorter, | ||
PositionalIndexer, | ||
Shape, | ||
) | ||
|
@@ -812,7 +813,12 @@ def unique(self: ExtensionArrayT) -> ExtensionArrayT: | |
uniques = unique(self.astype(object)) | ||
return self._from_sequence(uniques, dtype=self.dtype) | ||
|
||
def searchsorted(self, value, side="left", sorter=None): | ||
def searchsorted( | ||
self, | ||
value: ArrayLike | object, | ||
side: Literal["left", "right"] = "left", | ||
sorter: NumpySorter = None, | ||
) -> np.ndarray: | ||
|
||
""" | ||
Find indices where elements should be inserted to maintain order. | ||
|
||
|
@@ -833,7 +839,7 @@ def searchsorted(self, value, side="left", sorter=None): | |
|
||
Parameters | ||
---------- | ||
value : array_like | ||
value : array_like or a single value | ||
Values to insert into `self`. | ||
side : {'left', 'right'}, optional | ||
If 'left', the index of the first suitable location found is given. | ||
|
@@ -1307,7 +1313,7 @@ def __hash__(self) -> int: | |
# ------------------------------------------------------------------------ | ||
# Non-Optimized Default Methods | ||
|
||
def delete(self: ExtensionArrayT, loc) -> ExtensionArrayT: | ||
def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is in the 'public' base EA class. it was added in #39405. @jbrockmendel is this method public? Is it part of the EA interface? does it need a docstring? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch ill make a note to add a docstring |
||
indexer = np.delete(np.arange(len(self)), loc) | ||
return self.take(indexer) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
Any, | ||
Callable, | ||
Sequence, | ||
cast, | ||
) | ||
|
||
import numpy as np | ||
|
@@ -39,8 +40,10 @@ | |
) | ||
from pandas._typing import ( | ||
AnyArrayLike, | ||
ArrayLike, | ||
Dtype, | ||
NpDtype, | ||
NumpySorter, | ||
) | ||
from pandas.util._decorators import ( | ||
cache_readonly, | ||
|
@@ -74,6 +77,8 @@ | |
import pandas.core.common as com | ||
|
||
if TYPE_CHECKING: | ||
from typing import Literal | ||
|
||
from pandas.core.arrays import DatetimeArray | ||
|
||
_shared_doc_kwargs = { | ||
|
@@ -642,12 +647,19 @@ def astype(self, dtype, copy: bool = True): | |
return self.asfreq(dtype.freq) | ||
return super().astype(dtype, copy=copy) | ||
|
||
def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: | ||
value = self._validate_searchsorted_value(value).view("M8[ns]") | ||
def searchsorted( | ||
self, | ||
value: ArrayLike | object, | ||
|
||
side: Literal["left", "right"] = "left", | ||
sorter: NumpySorter = None, | ||
) -> np.ndarray: | ||
npvalue = cast( | ||
np.ndarray, self._validate_searchsorted_value(value).view("M8[ns]") | ||
) | ||
|
||
|
||
# Cast to M8 to get datetime-like NaT placement | ||
m8arr = self._ndarray.view("M8[ns]") | ||
return m8arr.searchsorted(value, side=side, sorter=sorter) | ||
return m8arr.searchsorted(npvalue, side=side, sorter=sorter) | ||
|
||
def fillna(self, value=None, method=None, limit=None) -> PeriodArray: | ||
if method is not None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,8 @@ | |
DtypeObj, | ||
FrameOrSeries, | ||
IndexLabel, | ||
NumpySorter, | ||
NumpyValueArrayLike, | ||
Shape, | ||
final, | ||
) | ||
|
@@ -1226,7 +1228,12 @@ def factorize(self, sort: bool = False, na_sentinel: int | None = -1): | |
""" | ||
|
||
@doc(_shared_docs["searchsorted"], klass="Index") | ||
def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: | ||
def searchsorted( | ||
self, | ||
value: NumpyValueArrayLike, | ||
side: Literal["left", "right"] = "left", | ||
sorter: NumpySorter = None, | ||
) -> np.ndarray: | ||
|
||
return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) | ||
|
||
def drop_duplicates(self, keep="first"): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The typing definition of
Sequence
does not include numpy arrays, EAs, Series, Index etc. #28770from numpy types
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Next commit includes that change.