Skip to content

Commit f9a3fe9

Browse files
committed
Merge branch 'master' into misc/remove-docs
2 parents 5d63447 + 8945a42 commit f9a3fe9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+585
-374
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,7 @@ jobs:
163163
pytest pandas/tests/resample/
164164
pytest pandas/tests/reshape/merge
165165
pytest pandas/tests/series/
166-
167-
# indexing subset (temporary since other tests don't pass yet)
168-
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
169-
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
166+
pytest pandas/tests/indexing/
170167
171168
pytest pandas/tests/api/
172169
pytest pandas/tests/apply/

pandas/_libs/hashing.pyi

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import numpy as np
2+
3+
def hash_object_array(
4+
arr: np.ndarray, # np.ndarray[object]
5+
key: str,
6+
encoding: str = ...,
7+
) -> np.ndarray: ... # np.ndarray[np.uint64]

pandas/_libs/index.pyx

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -259,11 +259,11 @@ cdef class IndexEngine:
259259
self.monotonic_inc = 0
260260
self.monotonic_dec = 0
261261

262-
def get_indexer(self, values):
262+
def get_indexer(self, ndarray values):
263263
self._ensure_mapping_populated()
264264
return self.mapping.lookup(values)
265265

266-
def get_indexer_non_unique(self, targets):
266+
def get_indexer_non_unique(self, ndarray targets):
267267
"""
268268
Return an indexer suitable for taking from a non unique index
269269
return the labels in the same order as the target
@@ -451,11 +451,11 @@ cdef class DatetimeEngine(Int64Engine):
451451
except KeyError:
452452
raise KeyError(val)
453453

454-
def get_indexer_non_unique(self, targets):
454+
def get_indexer_non_unique(self, ndarray targets):
455455
# we may get datetime64[ns] or timedelta64[ns], cast these to int64
456456
return super().get_indexer_non_unique(targets.view("i8"))
457457

458-
def get_indexer(self, values):
458+
def get_indexer(self, ndarray values):
459459
self._ensure_mapping_populated()
460460
if values.dtype != self._get_box_dtype():
461461
return np.repeat(-1, len(values)).astype('i4')
@@ -594,15 +594,15 @@ cdef class BaseMultiIndexCodesEngine:
594594
in zip(self.levels, zip(*target))]
595595
return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
596596

597-
def get_indexer_no_fill(self, object target) -> np.ndarray:
597+
def get_indexer(self, ndarray[object] target) -> np.ndarray:
598598
"""
599599
Returns an array giving the positions of each value of `target` in
600600
`self.values`, where -1 represents a value in `target` which does not
601601
appear in `self.values`
602602

603603
Parameters
604604
----------
605-
target : list-like of keys
605+
target : ndarray[object]
606606
Each key is a tuple, with a label for each level of the index
607607

608608
Returns
@@ -613,8 +613,8 @@ cdef class BaseMultiIndexCodesEngine:
613613
lab_ints = self._extract_level_codes(target)
614614
return self._base.get_indexer(self, lab_ints)
615615

616-
def get_indexer(self, object target, object values = None,
617-
object method = None, object limit = None) -> np.ndarray:
616+
def get_indexer_with_fill(self, ndarray target, ndarray values,
617+
str method, object limit) -> np.ndarray:
618618
"""
619619
Returns an array giving the positions of each value of `target` in
620620
`values`, where -1 represents a value in `target` which does not
@@ -630,25 +630,22 @@ cdef class BaseMultiIndexCodesEngine:
630630

631631
Parameters
632632
----------
633-
target: list-like of tuples
633+
target: ndarray[object] of tuples
634634
need not be sorted, but all must have the same length, which must be
635635
the same as the length of all tuples in `values`
636-
values : list-like of tuples
636+
values : ndarray[object] of tuples
637637
must be sorted and all have the same length. Should be the set of
638638
the MultiIndex's values. Needed only if `method` is not None
639639
method: string
640640
"backfill" or "pad"
641-
limit: int, optional
641+
limit: int or None
642642
if provided, limit the number of fills to this value
643643

644644
Returns
645645
-------
646646
np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`,
647647
filled with the `method` (and optionally `limit`) specified
648648
"""
649-
if method is None:
650-
return self.get_indexer_no_fill(target)
651-
652649
assert method in ("backfill", "pad")
653650
cdef:
654651
int64_t i, j, next_code
@@ -658,8 +655,8 @@ cdef class BaseMultiIndexCodesEngine:
658655
ndarray[int64_t, ndim=1] new_codes, new_target_codes
659656
ndarray[int64_t, ndim=1] sorted_indexer
660657

661-
target_order = np.argsort(target.values).astype('int64')
662-
target_values = target.values[target_order]
658+
target_order = np.argsort(target).astype('int64')
659+
target_values = target[target_order]
663660
num_values, num_target_values = len(values), len(target_values)
664661
new_codes, new_target_codes = (
665662
np.empty((num_values,)).astype('int64'),
@@ -718,7 +715,7 @@ cdef class BaseMultiIndexCodesEngine:
718715

719716
return self._base.get_loc(self, lab_int)
720717

721-
def get_indexer_non_unique(self, object target):
718+
def get_indexer_non_unique(self, ndarray target):
722719
# This needs to be overridden just because the default one works on
723720
# target._values, and target can be itself a MultiIndex.
724721

pandas/_libs/ops.pyi

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import (
2+
Any,
3+
Callable,
4+
)
5+
6+
import numpy as np
7+
8+
_BinOp = Callable[[Any, Any], Any]
9+
_BoolOp = Callable[[Any, Any], bool]
10+
11+
12+
def scalar_compare(
13+
values: np.ndarray, # object[:]
14+
val: object,
15+
op: _BoolOp, # {operator.eq, operator.ne, ...}
16+
) -> np.ndarray: ... # np.ndarray[bool]
17+
18+
def vec_compare(
19+
left: np.ndarray, # np.ndarray[object]
20+
right: np.ndarray, # np.ndarray[object]
21+
op: _BoolOp, # {operator.eq, operator.ne, ...}
22+
) -> np.ndarray: ... # np.ndarray[bool]
23+
24+
25+
def scalar_binop(
26+
values: np.ndarray, # object[:]
27+
val: object,
28+
op: _BinOp, # binary operator
29+
) -> np.ndarray: ...
30+
31+
32+
def vec_binop(
33+
left: np.ndarray, # object[:]
34+
right: np.ndarray, # object[:]
35+
op: _BinOp, # binary operator
36+
) -> np.ndarray: ...
37+
38+
39+
def maybe_convert_bool(
40+
arr: np.ndarray, # np.ndarray[object]
41+
true_values=...,
42+
false_values=...
43+
) -> np.ndarray: ...

pandas/_libs/ops_dispatch.pyi

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import numpy as np
2+
3+
def maybe_dispatch_ufunc_to_dunder_op(
4+
self, ufunc: np.ufunc, method: str, *inputs, **kwargs
5+
): ...

pandas/_libs/reshape.pyi

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import numpy as np
2+
3+
def unstack(
4+
values: np.ndarray, # reshape_t[:, :]
5+
mask: np.ndarray, # const uint8_t[:]
6+
stride: int,
7+
length: int,
8+
width: int,
9+
new_values: np.ndarray, # reshape_t[:, :]
10+
new_mask: np.ndarray, # uint8_t[:, :]
11+
) -> None: ...
12+
13+
14+
def explode(
15+
values: np.ndarray, # np.ndarray[object]
16+
) -> tuple[
17+
np.ndarray, # np.ndarray[object]
18+
np.ndarray, # np.ndarray[np.int64]
19+
]: ...

pandas/_libs/writers.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def write_csv_rows(
3434
data_index : ndarray
3535
nlevels : int
3636
cols : ndarray
37-
writer : object
37+
writer : _csv.writer
3838
"""
3939
# In crude testing, N>100 yields little marginal improvement
4040
cdef:

pandas/core/array_algos/quantile.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -142,17 +142,10 @@ def quantile_ea_compat(
142142
mask = np.asarray(values.isna())
143143
mask = np.atleast_2d(mask)
144144

145-
# error: Incompatible types in assignment (expression has type "ndarray", variable
146-
# has type "ExtensionArray")
147-
values, fill_value = values._values_for_factorize() # type: ignore[assignment]
148-
# error: No overload variant of "atleast_2d" matches argument type "ExtensionArray"
149-
values = np.atleast_2d(values) # type: ignore[call-overload]
150-
151-
# error: Argument 1 to "quantile_with_mask" has incompatible type "ExtensionArray";
152-
# expected "ndarray"
153-
result = quantile_with_mask(
154-
values, mask, fill_value, qs, interpolation, axis # type: ignore[arg-type]
155-
)
145+
arr, fill_value = values._values_for_factorize()
146+
arr = np.atleast_2d(arr)
147+
148+
result = quantile_with_mask(arr, mask, fill_value, qs, interpolation, axis)
156149

157150
if not is_sparse(orig.dtype):
158151
# shape[0] should be 1 as long as EAs are 1D

pandas/core/array_algos/replace.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,6 @@ def re_replacer(s):
152152
f = np.vectorize(re_replacer, otypes=[values.dtype])
153153

154154
if mask is None:
155-
# error: Invalid index type "slice" for "ExtensionArray"; expected type
156-
# "Union[int, ndarray]"
157-
values[:] = f(values) # type: ignore[index]
155+
values[:] = f(values)
158156
else:
159157
values[mask] = f(values[mask])

pandas/core/array_algos/take.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,6 @@ def take_1d(
177177
178178
Note: similarly to `take_nd`, this function assumes that the indexer is
179179
a valid(ated) indexer with no out of bound indices.
180-
181-
TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially
182-
be removed again if we don't end up with ArrayManager.
183180
"""
184181
if not isinstance(arr, np.ndarray):
185182
# ExtensionArray -> dispatch to their method

pandas/core/arrays/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def __getitem__(
326326
"""
327327
raise AbstractMethodError(self)
328328

329-
def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
329+
def __setitem__(self, key: Union[int, slice, np.ndarray], value: Any) -> None:
330330
"""
331331
Set one or more values inplace.
332332

pandas/core/arrays/datetimes.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
tzinfo,
88
)
99
from typing import (
10+
TYPE_CHECKING,
1011
Optional,
1112
Union,
1213
cast,
14+
overload,
1315
)
1416
import warnings
1517

@@ -79,6 +81,9 @@
7981
Tick,
8082
)
8183

84+
if TYPE_CHECKING:
85+
from typing import Literal
86+
8287
_midnight = time(0, 0)
8388

8489

@@ -1909,6 +1914,20 @@ def std(
19091914
# Constructor Helpers
19101915

19111916

1917+
@overload
1918+
def sequence_to_datetimes(
1919+
data, allow_object: Literal[False] = ..., require_iso8601: bool = ...
1920+
) -> DatetimeArray:
1921+
...
1922+
1923+
1924+
@overload
1925+
def sequence_to_datetimes(
1926+
data, allow_object: Literal[True] = ..., require_iso8601: bool = ...
1927+
) -> Union[np.ndarray, DatetimeArray]:
1928+
...
1929+
1930+
19121931
def sequence_to_datetimes(
19131932
data, allow_object: bool = False, require_iso8601: bool = False
19141933
) -> Union[np.ndarray, DatetimeArray]:

pandas/core/arrays/string_arrow.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Tuple,
1010
Type,
1111
Union,
12+
cast,
1213
)
1314

1415
import numpy as np
@@ -485,7 +486,7 @@ def _cmp_method(self, other, op):
485486
# TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
486487
return BooleanArray._from_sequence(result.to_pandas().values)
487488

488-
def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
489+
def __setitem__(self, key: Union[int, slice, np.ndarray], value: Any) -> None:
489490
"""Set one or more values inplace.
490491
491492
Parameters
@@ -509,6 +510,8 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
509510
key = check_array_indexer(self, key)
510511

511512
if is_integer(key):
513+
key = cast(int, key)
514+
512515
if not is_scalar(value):
513516
raise ValueError("Must pass scalars with scalar indexer")
514517
elif isna(value):
@@ -518,8 +521,7 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
518521

519522
# Slice data and insert in-between
520523
new_data = [
521-
# error: Slice index must be an integer or None
522-
*self._data[0:key].chunks, # type: ignore[misc]
524+
*self._data[0:key].chunks,
523525
pa.array([value], type=pa.string()),
524526
*self._data[(key + 1) :].chunks,
525527
]
@@ -530,11 +532,11 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
530532
# This is probably extremely slow.
531533

532534
# Convert all possible input key types to an array of integers
533-
if is_bool_dtype(key):
535+
if isinstance(key, slice):
536+
key_array = np.array(range(len(self))[key])
537+
elif is_bool_dtype(key):
534538
# TODO(ARROW-9430): Directly support setitem(booleans)
535539
key_array = np.argwhere(key).flatten()
536-
elif isinstance(key, slice):
537-
key_array = np.array(range(len(self))[key])
538540
else:
539541
# TODO(ARROW-9431): Directly support setitem(integers)
540542
key_array = np.asanyarray(key)

pandas/core/arrays/timedeltas.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from pandas.core.dtypes.common import (
4343
DT64NS_DTYPE,
4444
TD64NS_DTYPE,
45-
is_categorical_dtype,
4645
is_dtype_equal,
4746
is_float_dtype,
4847
is_integer_dtype,
@@ -53,7 +52,10 @@
5352
pandas_dtype,
5453
)
5554
from pandas.core.dtypes.dtypes import DatetimeTZDtype
56-
from pandas.core.dtypes.generic import ABCMultiIndex
55+
from pandas.core.dtypes.generic import (
56+
ABCCategorical,
57+
ABCMultiIndex,
58+
)
5759
from pandas.core.dtypes.missing import isna
5860

5961
from pandas.core import nanops
@@ -970,7 +972,7 @@ def sequence_to_td64ns(
970972
elif not isinstance(data, (np.ndarray, ExtensionArray)):
971973
# GH#24539 e.g. xarray, dask object
972974
data = np.asarray(data)
973-
elif is_categorical_dtype(data.dtype):
975+
elif isinstance(data, ABCCategorical):
974976
data = data.categories.take(data.codes, fill_value=NaT)._values
975977
copy = False
976978

0 commit comments

Comments
 (0)