Skip to content

Commit 298c7cc

Browse files
jbrockmendeljreback
authored andcommitted
REF: separate indexer utilities from indexing.py (#27229)
1 parent 823af76 commit 298c7cc

File tree

8 files changed

+285
-275
lines changed

8 files changed

+285
-275
lines changed

pandas/core/algorithms.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
from pandas.core.dtypes.missing import isna, na_value_for_dtype
5151

5252
from pandas.core import common as com
53+
from pandas.core.indexers import validate_indices
5354

5455
_shared_docs = {} # type: Dict[str, str]
5556

@@ -1587,8 +1588,6 @@ def take(arr, indices, axis=0, allow_fill=False, fill_value=None):
15871588
... fill_value=-10)
15881589
array([ 10, 10, -10])
15891590
"""
1590-
from pandas.core.indexing import validate_indices
1591-
15921591
if not is_array_like(arr):
15931592
arr = np.asarray(arr)
15941593

pandas/core/indexers.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
"""
2+
Low-dependency indexing utilities.
3+
"""
4+
import numpy as np
5+
6+
from pandas.core.dtypes.common import is_list_like
7+
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
8+
9+
# -----------------------------------------------------------
10+
# Indexer Identification
11+
12+
13+
def is_list_like_indexer(key) -> bool:
14+
"""
15+
Check if we have a list-like indexer that is *not* a NamedTuple.
16+
17+
Parameters
18+
----------
19+
key : object
20+
21+
Returns
22+
-------
23+
bool
24+
"""
25+
# allow a list_like, but exclude NamedTuples which can be indexers
26+
return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)
27+
28+
29+
def is_scalar_indexer(indexer, arr_value) -> bool:
30+
# return True if we are all scalar indexers
31+
32+
if arr_value.ndim == 1:
33+
if not isinstance(indexer, tuple):
34+
indexer = tuple([indexer])
35+
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
36+
return False
37+
38+
39+
def is_empty_indexer(indexer, arr_value) -> bool:
40+
# return a boolean if we have an empty indexer
41+
42+
if is_list_like(indexer) and not len(indexer):
43+
return True
44+
if arr_value.ndim == 1:
45+
if not isinstance(indexer, tuple):
46+
indexer = tuple([indexer])
47+
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
48+
return False
49+
50+
51+
# -----------------------------------------------------------
52+
# Indexer Validation
53+
54+
55+
def check_setitem_lengths(indexer, value, values) -> None:
56+
"""
57+
Validate that value and indexer are the same length.
58+
59+
An special-case is allowed for when the indexer is a boolean array
60+
and the number of true values equals the length of ``value``. In
61+
this case, no exception is raised.
62+
63+
Parameters
64+
----------
65+
indexer : sequence
66+
The key for the setitem
67+
value : array-like
68+
The value for the setitem
69+
values : array-like
70+
The values being set into
71+
72+
Returns
73+
-------
74+
None
75+
76+
Raises
77+
------
78+
ValueError
79+
When the indexer is an ndarray or list and the lengths don't
80+
match.
81+
"""
82+
# boolean with truth values == len of the value is ok too
83+
if isinstance(indexer, (np.ndarray, list)):
84+
if is_list_like(value) and len(indexer) != len(value):
85+
if not (
86+
isinstance(indexer, np.ndarray)
87+
and indexer.dtype == np.bool_
88+
and len(indexer[indexer]) == len(value)
89+
):
90+
raise ValueError(
91+
"cannot set using a list-like indexer "
92+
"with a different length than the value"
93+
)
94+
95+
elif isinstance(indexer, slice):
96+
# slice
97+
if is_list_like(value) and len(values):
98+
if len(value) != length_of_indexer(indexer, values):
99+
raise ValueError(
100+
"cannot set using a slice indexer with a "
101+
"different length than the value"
102+
)
103+
104+
105+
def validate_indices(indices: np.ndarray, n: int) -> None:
106+
"""
107+
Perform bounds-checking for an indexer.
108+
109+
-1 is allowed for indicating missing values.
110+
111+
Parameters
112+
----------
113+
indices : ndarray
114+
n : int
115+
length of the array being indexed
116+
117+
Raises
118+
------
119+
ValueError
120+
121+
Examples
122+
--------
123+
>>> validate_indices([1, 2], 3)
124+
# OK
125+
>>> validate_indices([1, -2], 3)
126+
ValueError
127+
>>> validate_indices([1, 2, 3], 3)
128+
IndexError
129+
>>> validate_indices([-1, -1], 0)
130+
# OK
131+
>>> validate_indices([0, 1], 0)
132+
IndexError
133+
"""
134+
if len(indices):
135+
min_idx = indices.min()
136+
if min_idx < -1:
137+
msg = "'indices' contains values less than allowed ({} < {})".format(
138+
min_idx, -1
139+
)
140+
raise ValueError(msg)
141+
142+
max_idx = indices.max()
143+
if max_idx >= n:
144+
raise IndexError("indices are out-of-bounds")
145+
146+
147+
# -----------------------------------------------------------
148+
# Indexer Conversion
149+
150+
151+
def maybe_convert_indices(indices, n: int):
152+
"""
153+
Attempt to convert indices into valid, positive indices.
154+
155+
If we have negative indices, translate to positive here.
156+
If we have indices that are out-of-bounds, raise an IndexError.
157+
158+
Parameters
159+
----------
160+
indices : array-like
161+
The array of indices that we are to convert.
162+
n : int
163+
The number of elements in the array that we are indexing.
164+
165+
Returns
166+
-------
167+
valid_indices : array-like
168+
An array-like of positive indices that correspond to the ones
169+
that were passed in initially to this function.
170+
171+
Raises
172+
------
173+
IndexError : one of the converted indices either exceeded the number
174+
of elements (specified by `n`) OR was still negative.
175+
"""
176+
177+
if isinstance(indices, list):
178+
indices = np.array(indices)
179+
if len(indices) == 0:
180+
# If list is empty, np.array will return float and cause indexing
181+
# errors.
182+
return np.empty(0, dtype=np.intp)
183+
184+
mask = indices < 0
185+
if mask.any():
186+
indices = indices.copy()
187+
indices[mask] += n
188+
189+
mask = (indices >= n) | (indices < 0)
190+
if mask.any():
191+
raise IndexError("indices are out-of-bounds")
192+
return indices
193+
194+
195+
# -----------------------------------------------------------
196+
# Unsorted
197+
198+
199+
def length_of_indexer(indexer, target=None) -> int:
200+
"""
201+
return the length of a single non-tuple indexer which could be a slice
202+
"""
203+
if target is not None and isinstance(indexer, slice):
204+
target_len = len(target)
205+
start = indexer.start
206+
stop = indexer.stop
207+
step = indexer.step
208+
if start is None:
209+
start = 0
210+
elif start < 0:
211+
start += target_len
212+
if stop is None or stop > target_len:
213+
stop = target_len
214+
elif stop < 0:
215+
stop += target_len
216+
if step is None:
217+
step = 1
218+
elif step < 0:
219+
step = -step
220+
return (stop - start + step - 1) // step
221+
elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)):
222+
return len(indexer)
223+
elif not is_list_like_indexer(indexer):
224+
return 1
225+
raise AssertionError("cannot find the length of the indexer")

pandas/core/indexes/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
from pandas.core.arrays import ExtensionArray
6767
from pandas.core.base import IndexOpsMixin, PandasObject
6868
import pandas.core.common as com
69+
from pandas.core.indexers import maybe_convert_indices
6970
from pandas.core.indexes.frozen import FrozenList
7071
import pandas.core.missing as missing
7172
from pandas.core.ops import get_op_result_name, make_invalid_op
@@ -3318,7 +3319,6 @@ def _convert_list_indexer(self, keyarr, kind=None):
33183319
# values outside the range of indices so as to trigger an
33193320
# IndexError in maybe_convert_indices
33203321
indexer[indexer < 0] = len(self)
3321-
from pandas.core.indexing import maybe_convert_indices
33223322

33233323
return maybe_convert_indices(indexer, len(self))
33243324

0 commit comments

Comments
 (0)