|
| 1 | +""" |
| 2 | +Low-dependency indexing utilities. |
| 3 | +""" |
| 4 | +import numpy as np |
| 5 | + |
| 6 | +from pandas.core.dtypes.common import is_list_like |
| 7 | +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries |
| 8 | + |
| 9 | +# ----------------------------------------------------------- |
| 10 | +# Indexer Identification |
| 11 | + |
| 12 | + |
| 13 | +def is_list_like_indexer(key) -> bool: |
| 14 | + """ |
| 15 | + Check if we have a list-like indexer that is *not* a NamedTuple. |
| 16 | +
|
| 17 | + Parameters |
| 18 | + ---------- |
| 19 | + key : object |
| 20 | +
|
| 21 | + Returns |
| 22 | + ------- |
| 23 | + bool |
| 24 | + """ |
| 25 | + # allow a list_like, but exclude NamedTuples which can be indexers |
| 26 | + return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) |
| 27 | + |
| 28 | + |
| 29 | +def is_scalar_indexer(indexer, arr_value) -> bool: |
| 30 | + # return True if we are all scalar indexers |
| 31 | + |
| 32 | + if arr_value.ndim == 1: |
| 33 | + if not isinstance(indexer, tuple): |
| 34 | + indexer = tuple([indexer]) |
| 35 | + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) |
| 36 | + return False |
| 37 | + |
| 38 | + |
| 39 | +def is_empty_indexer(indexer, arr_value) -> bool: |
| 40 | + # return a boolean if we have an empty indexer |
| 41 | + |
| 42 | + if is_list_like(indexer) and not len(indexer): |
| 43 | + return True |
| 44 | + if arr_value.ndim == 1: |
| 45 | + if not isinstance(indexer, tuple): |
| 46 | + indexer = tuple([indexer]) |
| 47 | + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) |
| 48 | + return False |
| 49 | + |
| 50 | + |
| 51 | +# ----------------------------------------------------------- |
| 52 | +# Indexer Validation |
| 53 | + |
| 54 | + |
| 55 | +def check_setitem_lengths(indexer, value, values) -> None: |
| 56 | + """ |
| 57 | + Validate that value and indexer are the same length. |
| 58 | +
|
| 59 | + An special-case is allowed for when the indexer is a boolean array |
| 60 | + and the number of true values equals the length of ``value``. In |
| 61 | + this case, no exception is raised. |
| 62 | +
|
| 63 | + Parameters |
| 64 | + ---------- |
| 65 | + indexer : sequence |
| 66 | + The key for the setitem |
| 67 | + value : array-like |
| 68 | + The value for the setitem |
| 69 | + values : array-like |
| 70 | + The values being set into |
| 71 | +
|
| 72 | + Returns |
| 73 | + ------- |
| 74 | + None |
| 75 | +
|
| 76 | + Raises |
| 77 | + ------ |
| 78 | + ValueError |
| 79 | + When the indexer is an ndarray or list and the lengths don't |
| 80 | + match. |
| 81 | + """ |
| 82 | + # boolean with truth values == len of the value is ok too |
| 83 | + if isinstance(indexer, (np.ndarray, list)): |
| 84 | + if is_list_like(value) and len(indexer) != len(value): |
| 85 | + if not ( |
| 86 | + isinstance(indexer, np.ndarray) |
| 87 | + and indexer.dtype == np.bool_ |
| 88 | + and len(indexer[indexer]) == len(value) |
| 89 | + ): |
| 90 | + raise ValueError( |
| 91 | + "cannot set using a list-like indexer " |
| 92 | + "with a different length than the value" |
| 93 | + ) |
| 94 | + |
| 95 | + elif isinstance(indexer, slice): |
| 96 | + # slice |
| 97 | + if is_list_like(value) and len(values): |
| 98 | + if len(value) != length_of_indexer(indexer, values): |
| 99 | + raise ValueError( |
| 100 | + "cannot set using a slice indexer with a " |
| 101 | + "different length than the value" |
| 102 | + ) |
| 103 | + |
| 104 | + |
| 105 | +def validate_indices(indices: np.ndarray, n: int) -> None: |
| 106 | + """ |
| 107 | + Perform bounds-checking for an indexer. |
| 108 | +
|
| 109 | + -1 is allowed for indicating missing values. |
| 110 | +
|
| 111 | + Parameters |
| 112 | + ---------- |
| 113 | + indices : ndarray |
| 114 | + n : int |
| 115 | + length of the array being indexed |
| 116 | +
|
| 117 | + Raises |
| 118 | + ------ |
| 119 | + ValueError |
| 120 | +
|
| 121 | + Examples |
| 122 | + -------- |
| 123 | + >>> validate_indices([1, 2], 3) |
| 124 | + # OK |
| 125 | + >>> validate_indices([1, -2], 3) |
| 126 | + ValueError |
| 127 | + >>> validate_indices([1, 2, 3], 3) |
| 128 | + IndexError |
| 129 | + >>> validate_indices([-1, -1], 0) |
| 130 | + # OK |
| 131 | + >>> validate_indices([0, 1], 0) |
| 132 | + IndexError |
| 133 | + """ |
| 134 | + if len(indices): |
| 135 | + min_idx = indices.min() |
| 136 | + if min_idx < -1: |
| 137 | + msg = "'indices' contains values less than allowed ({} < {})".format( |
| 138 | + min_idx, -1 |
| 139 | + ) |
| 140 | + raise ValueError(msg) |
| 141 | + |
| 142 | + max_idx = indices.max() |
| 143 | + if max_idx >= n: |
| 144 | + raise IndexError("indices are out-of-bounds") |
| 145 | + |
| 146 | + |
| 147 | +# ----------------------------------------------------------- |
| 148 | +# Indexer Conversion |
| 149 | + |
| 150 | + |
| 151 | +def maybe_convert_indices(indices, n: int): |
| 152 | + """ |
| 153 | + Attempt to convert indices into valid, positive indices. |
| 154 | +
|
| 155 | + If we have negative indices, translate to positive here. |
| 156 | + If we have indices that are out-of-bounds, raise an IndexError. |
| 157 | +
|
| 158 | + Parameters |
| 159 | + ---------- |
| 160 | + indices : array-like |
| 161 | + The array of indices that we are to convert. |
| 162 | + n : int |
| 163 | + The number of elements in the array that we are indexing. |
| 164 | +
|
| 165 | + Returns |
| 166 | + ------- |
| 167 | + valid_indices : array-like |
| 168 | + An array-like of positive indices that correspond to the ones |
| 169 | + that were passed in initially to this function. |
| 170 | +
|
| 171 | + Raises |
| 172 | + ------ |
| 173 | + IndexError : one of the converted indices either exceeded the number |
| 174 | + of elements (specified by `n`) OR was still negative. |
| 175 | + """ |
| 176 | + |
| 177 | + if isinstance(indices, list): |
| 178 | + indices = np.array(indices) |
| 179 | + if len(indices) == 0: |
| 180 | + # If list is empty, np.array will return float and cause indexing |
| 181 | + # errors. |
| 182 | + return np.empty(0, dtype=np.intp) |
| 183 | + |
| 184 | + mask = indices < 0 |
| 185 | + if mask.any(): |
| 186 | + indices = indices.copy() |
| 187 | + indices[mask] += n |
| 188 | + |
| 189 | + mask = (indices >= n) | (indices < 0) |
| 190 | + if mask.any(): |
| 191 | + raise IndexError("indices are out-of-bounds") |
| 192 | + return indices |
| 193 | + |
| 194 | + |
| 195 | +# ----------------------------------------------------------- |
| 196 | +# Unsorted |
| 197 | + |
| 198 | + |
| 199 | +def length_of_indexer(indexer, target=None) -> int: |
| 200 | + """ |
| 201 | + return the length of a single non-tuple indexer which could be a slice |
| 202 | + """ |
| 203 | + if target is not None and isinstance(indexer, slice): |
| 204 | + target_len = len(target) |
| 205 | + start = indexer.start |
| 206 | + stop = indexer.stop |
| 207 | + step = indexer.step |
| 208 | + if start is None: |
| 209 | + start = 0 |
| 210 | + elif start < 0: |
| 211 | + start += target_len |
| 212 | + if stop is None or stop > target_len: |
| 213 | + stop = target_len |
| 214 | + elif stop < 0: |
| 215 | + stop += target_len |
| 216 | + if step is None: |
| 217 | + step = 1 |
| 218 | + elif step < 0: |
| 219 | + step = -step |
| 220 | + return (stop - start + step - 1) // step |
| 221 | + elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)): |
| 222 | + return len(indexer) |
| 223 | + elif not is_list_like_indexer(indexer): |
| 224 | + return 1 |
| 225 | + raise AssertionError("cannot find the length of the indexer") |
0 commit comments