Skip to content

Commit 1e98a41

Browse files
committed
Merge remote-tracking branch 'upstream/main' into ref/is_range_indexer/step
2 parents c9339d5 + f5d754d commit 1e98a41

File tree

31 files changed

+209
-188
lines changed

31 files changed

+209
-188
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,11 @@ Performance improvements
272272
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
273273
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
274274
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
275-
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
275+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
276276
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
277-
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`)
278-
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`)
279-
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
277+
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`)
278+
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
279+
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
280280
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
281281
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
282282

@@ -289,6 +289,7 @@ Bug fixes
289289
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
290290
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
291291
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
292+
- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
292293
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
293294
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
294295

@@ -393,6 +394,7 @@ Other
393394
^^^^^
394395
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
395396
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
397+
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
396398
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
397399
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
398400
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ dependencies:
6262
# downstream packages
6363
- dask-core
6464
- seaborn-base
65+
- dask-expr
6566

6667
# local testing dependencies
6768
- moto

pandas/core/algorithms.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,10 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
439439
# Dispatch to extension dtype's unique.
440440
return values.unique()
441441

442+
if isinstance(values, ABCIndex):
443+
# Dispatch to Index's unique.
444+
return values.unique()
445+
442446
original = values
443447
hashtable, values = _get_hashtable_algo(values)
444448

pandas/core/groupby/generic.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,8 +1642,11 @@ def _wrap_applied_output(
16421642
first_not_none = next(com.not_none(*values), None)
16431643

16441644
if first_not_none is None:
1645-
# GH9684 - All values are None, return an empty frame.
1646-
return self.obj._constructor()
1645+
# GH9684 - All values are None, return an empty frame
1646+
# GH57775 - Ensure that columns and dtypes from original frame are kept.
1647+
result = self.obj._constructor(columns=data.columns)
1648+
result = result.astype(data.dtypes)
1649+
return result
16471650
elif isinstance(first_not_none, DataFrame):
16481651
return self._concat_objects(
16491652
values,

pandas/core/groupby/groupby.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,14 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
16361636
a 5
16371637
b 2
16381638
dtype: int64
1639+
1640+
Example 4: The function passed to ``apply`` returns ``None`` for one of the
1641+
group. This group is filtered from the result:
1642+
1643+
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
1644+
B C
1645+
0 1 4
1646+
1 2 6
16391647
"""
16401648
if isinstance(func, str):
16411649
if hasattr(self, func):

pandas/core/indexes/base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4235,7 +4235,6 @@ def join(
42354235

42364236
return self._join_via_get_indexer(other, how, sort)
42374237

4238-
@final
42394238
def _join_empty(
42404239
self, other: Index, how: JoinHow, sort: bool
42414240
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

pandas/core/indexes/multi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2063,7 +2063,7 @@ def remove_unused_levels(self) -> MultiIndex:
20632063
20642064
>>> mi2 = mi[2:].remove_unused_levels()
20652065
>>> mi2.levels
2066-
(Index([1], dtype='int64'), Index(['a', 'b'], dtype='object'))
2066+
(RangeIndex(start=1, stop=2, step=1), Index(['a', 'b'], dtype='object'))
20672067
"""
20682068
new_levels = []
20692069
new_codes = []

pandas/core/indexes/range.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -472,9 +472,15 @@ def _shallow_copy(self, values, name: Hashable = no_default):
472472

473473
if values.dtype.kind == "f":
474474
return Index(values, name=name, dtype=np.float64)
475-
if values.dtype.kind == "i" and values.ndim == 1 and len(values) > 1:
475+
if values.dtype.kind == "i" and values.ndim == 1:
476476
# GH 46675 & 43885: If values is equally spaced, return a
477477
# more memory-compact RangeIndex instead of Index with 64-bit dtype
478+
if len(values) == 0:
479+
return type(self)._simple_new(_empty_range, name=name)
480+
elif len(values) == 1:
481+
start = values[0]
482+
new_range = range(start, start + self.step, self.step)
483+
return type(self)._simple_new(new_range, name=name)
478484
diff = values[1] - values[0]
479485
if not missing.isna(diff) and lib.is_range(values, diff):
480486
new_range = range(values[0], values[-1] + diff, diff)
@@ -889,12 +895,19 @@ def symmetric_difference(
889895
result = result.rename(result_name)
890896
return result
891897

898+
def _join_empty(
899+
self, other: Index, how: JoinHow, sort: bool
900+
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
901+
if other.dtype.kind == "i":
902+
other = self._shallow_copy(other._values, name=other.name)
903+
return super()._join_empty(other, how=how, sort=sort)
904+
892905
def _join_monotonic(
893906
self, other: Index, how: JoinHow = "left"
894907
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
895908
# This currently only gets called for the monotonic increasing case
896909
if not isinstance(other, type(self)):
897-
maybe_ri = self._shallow_copy(other._values)
910+
maybe_ri = self._shallow_copy(other._values, name=other.name)
898911
if not isinstance(maybe_ri, type(self)):
899912
return super()._join_monotonic(other, how=how)
900913
other = maybe_ri
@@ -1070,6 +1083,8 @@ def __getitem__(self, key):
10701083
"""
10711084
Conserve RangeIndex type for scalar and slice keys.
10721085
"""
1086+
if key is Ellipsis:
1087+
key = slice(None)
10731088
if isinstance(key, slice):
10741089
return self._getitem_slice(key)
10751090
elif is_integer(key):
@@ -1089,17 +1104,20 @@ def __getitem__(self, key):
10891104
)
10901105
elif com.is_bool_indexer(key):
10911106
if isinstance(getattr(key, "dtype", None), ExtensionDtype):
1092-
np_key = key.to_numpy(dtype=bool, na_value=False)
1107+
key = key.to_numpy(dtype=bool, na_value=False)
10931108
else:
1094-
np_key = np.asarray(key, dtype=bool)
1095-
check_array_indexer(self._range, np_key) # type: ignore[arg-type]
1109+
key = np.asarray(key, dtype=bool)
1110+
check_array_indexer(self._range, key) # type: ignore[arg-type]
10961111
# Short circuit potential _shallow_copy check
1097-
if np_key.all():
1112+
if key.all():
10981113
return self._simple_new(self._range, name=self.name)
1099-
elif not np_key.any():
1114+
elif not key.any():
11001115
return self._simple_new(_empty_range, name=self.name)
1101-
return self.take(np.flatnonzero(np_key))
1102-
return super().__getitem__(key)
1116+
key = np.flatnonzero(key)
1117+
try:
1118+
return self.take(key)
1119+
except (TypeError, ValueError):
1120+
return super().__getitem__(key)
11031121

11041122
def _getitem_slice(self, slobj: slice) -> Self:
11051123
"""

pandas/core/internals/__init__.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ def __getattr__(name: str):
3535
return create_block_manager_from_blocks
3636

3737
if name in [
38-
"NumericBlock",
39-
"ObjectBlock",
4038
"Block",
4139
"ExtensionBlock",
4240
"DatetimeTZBlock",
@@ -49,25 +47,17 @@ def __getattr__(name: str):
4947
# on hard-coding stacklevel
5048
stacklevel=2,
5149
)
52-
if name == "NumericBlock":
53-
from pandas.core.internals.blocks import NumericBlock
54-
55-
return NumericBlock
56-
elif name == "DatetimeTZBlock":
50+
if name == "DatetimeTZBlock":
5751
from pandas.core.internals.blocks import DatetimeTZBlock
5852

5953
return DatetimeTZBlock
6054
elif name == "ExtensionBlock":
6155
from pandas.core.internals.blocks import ExtensionBlock
6256

6357
return ExtensionBlock
64-
elif name == "Block":
58+
else:
6559
from pandas.core.internals.blocks import Block
6660

6761
return Block
68-
else:
69-
from pandas.core.internals.blocks import ObjectBlock
70-
71-
return ObjectBlock
7262

7363
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")

pandas/core/internals/blocks.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2148,18 +2148,6 @@ def is_numeric(self) -> bool: # type: ignore[override]
21482148
return kind in "fciub"
21492149

21502150

2151-
class NumericBlock(NumpyBlock):
2152-
# this Block type is kept for backwards-compatibility
2153-
# TODO(3.0): delete and remove deprecation in __init__.py.
2154-
__slots__ = ()
2155-
2156-
2157-
class ObjectBlock(NumpyBlock):
2158-
# this Block type is kept for backwards-compatibility
2159-
# TODO(3.0): delete and remove deprecation in __init__.py.
2160-
__slots__ = ()
2161-
2162-
21632151
class NDArrayBackedExtensionBlock(EABackedBlock):
21642152
"""
21652153
Block backed by an NDArrayBackedExtensionArray

0 commit comments

Comments
 (0)