Skip to content

REF: separate indexer utilities from indexing.py #27229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jul 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
bfd1b38
implement indexers.py
jbrockmendel Jul 4, 2019
746a3c0
add types
jbrockmendel Jul 4, 2019
0233d42
cleanup
jbrockmendel Jul 4, 2019
35d3055
separate out helper method; add types, remove redundnat checks
jbrockmendel Jul 4, 2019
c3eb149
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Jul 4, 2019
b1ce472
blackify
jbrockmendel Jul 4, 2019
7ee551c
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Jul 4, 2019
b2a101a
types
jbrockmendel Jul 4, 2019
51ceb24
move indexers
jbrockmendel Jul 4, 2019
7efe2d1
organize file
jbrockmendel Jul 4, 2019
82561aa
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Jul 5, 2019
a827ac2
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Jul 5, 2019
3322f4e
isort
jbrockmendel Jul 6, 2019
5b6335b
lint+isort compat
jbrockmendel Jul 6, 2019
74c3d36
blackify
jbrockmendel Jul 7, 2019
f8b5264
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Jul 7, 2019
42bdf94
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Jul 8, 2019
0fafcc4
remove assertion
jbrockmendel Jul 8, 2019
8b8442c
add comment for GH#27259
jbrockmendel Jul 8, 2019
d478037
docstring
jbrockmendel Jul 8, 2019
741c4d2
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Jul 8, 2019
39fa014
revert isinstance to type checks
jbrockmendel Jul 8, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from pandas.core.dtypes.missing import isna, na_value_for_dtype

from pandas.core import common as com
from pandas.core.indexers import validate_indices

_shared_docs = {} # type: Dict[str, str]

Expand Down Expand Up @@ -1587,8 +1588,6 @@ def take(arr, indices, axis=0, allow_fill=False, fill_value=None):
... fill_value=-10)
array([ 10, 10, -10])
"""
from pandas.core.indexing import validate_indices

if not is_array_like(arr):
arr = np.asarray(arr)

Expand Down
225 changes: 225 additions & 0 deletions pandas/core/indexers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
"""
Low-dependency indexing utilities.
"""
import numpy as np

from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries

# -----------------------------------------------------------
# Indexer Identification


def is_list_like_indexer(key) -> bool:
"""
Check if we have a list-like indexer that is *not* a NamedTuple.

Parameters
----------
key : object

Returns
-------
bool
"""
# allow a list_like, but exclude NamedTuples which can be indexers
return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)


def is_scalar_indexer(indexer, arr_value) -> bool:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if possible to type pls do so (may be tricky)

I would type with Any explicity if it’s truly that
so we know this is done (meaning it has been typed)

possible to use reveal_type to figure things out here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ATM I've added types for everything I had figured out with sufficient certainty. I'd rather leave it clearly-unfinished so it gets more attention than put Any if we can be more specific

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you fix this doc-sring in followup and type as much as possible here

# return True if we are all scalar indexers

if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
return False


def is_empty_indexer(indexer, arr_value) -> bool:
# return a boolean if we have an empty indexer

if is_list_like(indexer) and not len(indexer):
return True
if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
return False


# -----------------------------------------------------------
# Indexer Validation


def check_setitem_lengths(indexer, value, values) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type

"""
Validate that value and indexer are the same length.

An special-case is allowed for when the indexer is a boolean array
and the number of true values equals the length of ``value``. In
this case, no exception is raised.

Parameters
----------
indexer : sequence
The key for the setitem
value : array-like
The value for the setitem
values : array-like
The values being set into

Returns
-------
None

Raises
------
ValueError
When the indexer is an ndarray or list and the lengths don't
match.
"""
# boolean with truth values == len of the value is ok too
if isinstance(indexer, (np.ndarray, list)):
if is_list_like(value) and len(indexer) != len(value):
if not (
isinstance(indexer, np.ndarray)
and indexer.dtype == np.bool_
and len(indexer[indexer]) == len(value)
):
raise ValueError(
"cannot set using a list-like indexer "
"with a different length than the value"
)

elif isinstance(indexer, slice):
# slice
if is_list_like(value) and len(values):
if len(value) != length_of_indexer(indexer, values):
raise ValueError(
"cannot set using a slice indexer with a "
"different length than the value"
)


def validate_indices(indices: np.ndarray, n: int) -> None:
"""
Perform bounds-checking for an indexer.

-1 is allowed for indicating missing values.

Parameters
----------
indices : ndarray
n : int
length of the array being indexed

Raises
------
ValueError

Examples
--------
>>> validate_indices([1, 2], 3)
# OK
>>> validate_indices([1, -2], 3)
ValueError
>>> validate_indices([1, 2, 3], 3)
IndexError
>>> validate_indices([-1, -1], 0)
# OK
>>> validate_indices([0, 1], 0)
IndexError
"""
if len(indices):
min_idx = indices.min()
if min_idx < -1:
msg = "'indices' contains values less than allowed ({} < {})".format(
min_idx, -1
)
raise ValueError(msg)

max_idx = indices.max()
if max_idx >= n:
raise IndexError("indices are out-of-bounds")


# -----------------------------------------------------------
# Indexer Conversion


def maybe_convert_indices(indices, n: int):
"""
Attempt to convert indices into valid, positive indices.

If we have negative indices, translate to positive here.
If we have indices that are out-of-bounds, raise an IndexError.

Parameters
----------
indices : array-like
The array of indices that we are to convert.
n : int
The number of elements in the array that we are indexing.

Returns
-------
valid_indices : array-like
An array-like of positive indices that correspond to the ones
that were passed in initially to this function.

Raises
------
IndexError : one of the converted indices either exceeded the number
of elements (specified by `n`) OR was still negative.
"""

if isinstance(indices, list):
indices = np.array(indices)
if len(indices) == 0:
# If list is empty, np.array will return float and cause indexing
# errors.
return np.empty(0, dtype=np.intp)

mask = indices < 0
if mask.any():
indices = indices.copy()
indices[mask] += n

mask = (indices >= n) | (indices < 0)
if mask.any():
raise IndexError("indices are out-of-bounds")
return indices


# -----------------------------------------------------------
# Unsorted


def length_of_indexer(indexer, target=None) -> int:
"""
return the length of a single non-tuple indexer which could be a slice
"""
if target is not None and isinstance(indexer, slice):
target_len = len(target)
start = indexer.start
stop = indexer.stop
step = indexer.step
if start is None:
start = 0
elif start < 0:
start += target_len
if stop is None or stop > target_len:
stop = target_len
elif stop < 0:
stop += target_len
if step is None:
step = 1
elif step < 0:
step = -step
return (stop - start + step - 1) // step
elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)):
return len(indexer)
elif not is_list_like_indexer(indexer):
return 1
raise AssertionError("cannot find the length of the indexer")
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
from pandas.core.arrays import ExtensionArray
from pandas.core.base import IndexOpsMixin, PandasObject
import pandas.core.common as com
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexes.frozen import FrozenList
import pandas.core.missing as missing
from pandas.core.ops import get_op_result_name, make_invalid_op
Expand Down Expand Up @@ -3318,7 +3319,6 @@ def _convert_list_indexer(self, keyarr, kind=None):
# values outside the range of indices so as to trigger an
# IndexError in maybe_convert_indices
indexer[indexer < 0] = len(self)
from pandas.core.indexing import maybe_convert_indices

return maybe_convert_indices(indexer, len(self))

Expand Down
Loading