-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
REF: separate indexer utilities from indexing.py #27229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bfd1b38
746a3c0
0233d42
35d3055
c3eb149
b1ce472
7ee551c
b2a101a
51ceb24
7efe2d1
82561aa
a827ac2
3322f4e
5b6335b
74c3d36
f8b5264
42bdf94
0fafcc4
8b8442c
d478037
741c4d2
39fa014
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
""" | ||
Low-dependency indexing utilities. | ||
""" | ||
import numpy as np | ||
|
||
from pandas.core.dtypes.common import is_list_like | ||
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries | ||
|
||
# ----------------------------------------------------------- | ||
# Indexer Identification | ||
|
||
|
||
def is_list_like_indexer(key) -> bool: | ||
""" | ||
Check if we have a list-like indexer that is *not* a NamedTuple. | ||
|
||
Parameters | ||
---------- | ||
key : object | ||
|
||
Returns | ||
------- | ||
bool | ||
""" | ||
# allow a list_like, but exclude NamedTuples which can be indexers | ||
return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) | ||
|
||
|
||
def is_scalar_indexer(indexer, arr_value) -> bool: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if possible to type pls do so (may be tricky) I would type with Any explicity if it’s truly that possible to use reveal_type to figure things out here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ATM I've added types for everything I had figured out with sufficient certainty. I'd rather leave it clearly-unfinished so it gets more attention than put Any if we can be more specific There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you fix this doc-sring in followup and type as much as possible here |
||
# return True if we are all scalar indexers | ||
|
||
if arr_value.ndim == 1: | ||
if not isinstance(indexer, tuple): | ||
indexer = tuple([indexer]) | ||
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) | ||
return False | ||
|
||
|
||
def is_empty_indexer(indexer, arr_value) -> bool: | ||
# return a boolean if we have an empty indexer | ||
|
||
if is_list_like(indexer) and not len(indexer): | ||
return True | ||
if arr_value.ndim == 1: | ||
if not isinstance(indexer, tuple): | ||
indexer = tuple([indexer]) | ||
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) | ||
return False | ||
|
||
|
||
# ----------------------------------------------------------- | ||
# Indexer Validation | ||
|
||
|
||
def check_setitem_lengths(indexer, value, values) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type |
||
""" | ||
Validate that value and indexer are the same length. | ||
|
||
An special-case is allowed for when the indexer is a boolean array | ||
and the number of true values equals the length of ``value``. In | ||
this case, no exception is raised. | ||
|
||
Parameters | ||
---------- | ||
indexer : sequence | ||
The key for the setitem | ||
value : array-like | ||
The value for the setitem | ||
values : array-like | ||
The values being set into | ||
|
||
Returns | ||
------- | ||
None | ||
|
||
Raises | ||
------ | ||
ValueError | ||
When the indexer is an ndarray or list and the lengths don't | ||
match. | ||
""" | ||
# boolean with truth values == len of the value is ok too | ||
if isinstance(indexer, (np.ndarray, list)): | ||
if is_list_like(value) and len(indexer) != len(value): | ||
if not ( | ||
isinstance(indexer, np.ndarray) | ||
and indexer.dtype == np.bool_ | ||
and len(indexer[indexer]) == len(value) | ||
): | ||
raise ValueError( | ||
"cannot set using a list-like indexer " | ||
"with a different length than the value" | ||
) | ||
|
||
elif isinstance(indexer, slice): | ||
# slice | ||
if is_list_like(value) and len(values): | ||
if len(value) != length_of_indexer(indexer, values): | ||
raise ValueError( | ||
"cannot set using a slice indexer with a " | ||
"different length than the value" | ||
) | ||
|
||
|
||
def validate_indices(indices: np.ndarray, n: int) -> None: | ||
""" | ||
Perform bounds-checking for an indexer. | ||
|
||
-1 is allowed for indicating missing values. | ||
|
||
Parameters | ||
---------- | ||
indices : ndarray | ||
n : int | ||
length of the array being indexed | ||
|
||
Raises | ||
------ | ||
ValueError | ||
|
||
Examples | ||
-------- | ||
>>> validate_indices([1, 2], 3) | ||
# OK | ||
>>> validate_indices([1, -2], 3) | ||
ValueError | ||
>>> validate_indices([1, 2, 3], 3) | ||
IndexError | ||
>>> validate_indices([-1, -1], 0) | ||
# OK | ||
>>> validate_indices([0, 1], 0) | ||
IndexError | ||
""" | ||
if len(indices): | ||
min_idx = indices.min() | ||
if min_idx < -1: | ||
msg = "'indices' contains values less than allowed ({} < {})".format( | ||
min_idx, -1 | ||
) | ||
raise ValueError(msg) | ||
|
||
max_idx = indices.max() | ||
if max_idx >= n: | ||
raise IndexError("indices are out-of-bounds") | ||
|
||
|
||
# ----------------------------------------------------------- | ||
# Indexer Conversion | ||
|
||
|
||
def maybe_convert_indices(indices, n: int): | ||
""" | ||
Attempt to convert indices into valid, positive indices. | ||
|
||
If we have negative indices, translate to positive here. | ||
If we have indices that are out-of-bounds, raise an IndexError. | ||
|
||
Parameters | ||
---------- | ||
indices : array-like | ||
The array of indices that we are to convert. | ||
n : int | ||
The number of elements in the array that we are indexing. | ||
|
||
Returns | ||
------- | ||
valid_indices : array-like | ||
An array-like of positive indices that correspond to the ones | ||
that were passed in initially to this function. | ||
|
||
Raises | ||
------ | ||
IndexError : one of the converted indices either exceeded the number | ||
of elements (specified by `n`) OR was still negative. | ||
""" | ||
|
||
if isinstance(indices, list): | ||
indices = np.array(indices) | ||
if len(indices) == 0: | ||
# If list is empty, np.array will return float and cause indexing | ||
# errors. | ||
return np.empty(0, dtype=np.intp) | ||
|
||
mask = indices < 0 | ||
if mask.any(): | ||
indices = indices.copy() | ||
indices[mask] += n | ||
|
||
mask = (indices >= n) | (indices < 0) | ||
if mask.any(): | ||
raise IndexError("indices are out-of-bounds") | ||
return indices | ||
|
||
|
||
# ----------------------------------------------------------- | ||
# Unsorted | ||
|
||
|
||
def length_of_indexer(indexer, target=None) -> int: | ||
""" | ||
return the length of a single non-tuple indexer which could be a slice | ||
""" | ||
if target is not None and isinstance(indexer, slice): | ||
target_len = len(target) | ||
start = indexer.start | ||
stop = indexer.stop | ||
step = indexer.step | ||
if start is None: | ||
start = 0 | ||
elif start < 0: | ||
start += target_len | ||
if stop is None or stop > target_len: | ||
stop = target_len | ||
elif stop < 0: | ||
stop += target_len | ||
if step is None: | ||
step = 1 | ||
elif step < 0: | ||
step = -step | ||
return (stop - start + step - 1) // step | ||
elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)): | ||
return len(indexer) | ||
elif not is_list_like_indexer(indexer): | ||
return 1 | ||
raise AssertionError("cannot find the length of the indexer") |
Uh oh!
There was an error while loading. Please reload this page.