Skip to content

Commit ea0d6ec

Browse files
committed
Merge branch 'master' into misc/remove-docs
2 parents 8d8cee4 + ff2884a commit ea0d6ec

39 files changed

+758
-463
lines changed

doc/source/user_guide/window.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ be calculated with :meth:`~Rolling.apply` by specifying a separate column of wei
101101
102102
All windowing operations support a ``min_periods`` argument that dictates the minimum amount of
103103
non-``np.nan`` values a window must have; otherwise, the resulting value is ``np.nan``.
104-
``min_peridos`` defaults to 1 for time-based windows and ``window`` for fixed windows
104+
``min_periods`` defaults to 1 for time-based windows and ``window`` for fixed windows
105105

106106
.. ipython:: python
107107

doc/source/whatsnew/v1.3.0.rst

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,38 @@ cast to ``dtype=object`` (:issue:`38709`)
302302
ser2
303303
304304
305+
.. _whatsnew_130.notable_bug_fixes.rolling_groupby_column:
306+
307+
GroupBy.rolling no longer returns grouped-by column in values
308+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
309+
310+
The group-by column will now be dropped from the result of a
311+
``groupby.rolling`` operation (:issue:`32262`)
312+
313+
.. ipython:: python
314+
315+
df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]})
316+
df
317+
318+
*Previous behavior*:
319+
320+
.. code-block:: ipython
321+
322+
In [1]: df.groupby("A").rolling(2).sum()
323+
Out[1]:
324+
A B
325+
A
326+
1 0 NaN NaN
327+
1 2.0 1.0
328+
2 2 NaN NaN
329+
3 3 NaN NaN
330+
331+
*New behavior*:
332+
333+
.. ipython:: python
334+
335+
df.groupby("A").rolling(2).sum()
336+
305337
.. _whatsnew_130.notable_bug_fixes.rolling_var_precision:
306338

307339
Removed artificial truncation in rolling variance and standard deviation
@@ -428,7 +460,7 @@ Deprecations
428460
- Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
429461
- Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`)
430462
- Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`, :issue:`40425`)
431-
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`)
463+
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`)
432464
- Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`)
433465
- Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
434466

@@ -506,6 +538,7 @@ Numeric
506538
- Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`)
507539
- Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
508540
- Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`)
541+
- Bug in ``rank`` method for :class:`Series`, :class:`DataFrame`, :class:`DataFrameGroupBy`, and :class:`SeriesGroupBy` treating the most negative ``int64`` value as missing (:issue:`32859`)
509542
- Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`)
510543
- Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`)
511544
- Bug in :meth:`DataFrame.transform` would raise ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`)

pandas/_libs/algos.pyx

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ def nancorr_kendall(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarra
490490
int64_t total_discordant = 0
491491
float64_t kendall_tau
492492
int64_t n_obs
493-
const int64_t[:] labels_n
493+
const intp_t[:] labels_n
494494

495495
N, K = (<object>mat).shape
496496

@@ -499,7 +499,7 @@ def nancorr_kendall(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarra
499499

500500
ranked_mat = np.empty((N, K), dtype=np.float64)
501501
# For compatibility when calling rank_1d
502-
labels_n = np.zeros(N, dtype=np.int64)
502+
labels_n = np.zeros(N, dtype=np.intp)
503503

504504
for i in range(K):
505505
ranked_mat[:, i] = rank_1d(mat[:, i], labels_n)
@@ -591,16 +591,17 @@ def validate_limit(nobs: int, limit=None) -> int:
591591

592592
@cython.boundscheck(False)
593593
@cython.wraparound(False)
594-
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
594+
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
595+
# -> ndarray[intp_t, ndim=1]
595596
cdef:
596597
Py_ssize_t i, j, nleft, nright
597-
ndarray[int64_t, ndim=1] indexer
598+
ndarray[intp_t, ndim=1] indexer
598599
algos_t cur, next_val
599600
int lim, fill_count = 0
600601

601602
nleft = len(old)
602603
nright = len(new)
603-
indexer = np.empty(nright, dtype=np.int64)
604+
indexer = np.empty(nright, dtype=np.intp)
604605
indexer[:] = -1
605606

606607
lim = validate_limit(nright, limit)
@@ -737,15 +738,16 @@ D
737738
@cython.boundscheck(False)
738739
@cython.wraparound(False)
739740
def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
741+
# -> ndarray[intp_t, ndim=1]
740742
cdef:
741743
Py_ssize_t i, j, nleft, nright
742-
ndarray[int64_t, ndim=1] indexer
744+
ndarray[intp_t, ndim=1] indexer
743745
algos_t cur, prev
744746
int lim, fill_count = 0
745747

746748
nleft = len(old)
747749
nright = len(new)
748-
indexer = np.empty(nright, dtype=np.int64)
750+
indexer = np.empty(nright, dtype=np.intp)
749751
indexer[:] = -1
750752

751753
lim = validate_limit(nright, limit)
@@ -959,7 +961,8 @@ ctypedef fused rank_t:
959961
@cython.boundscheck(False)
960962
def rank_1d(
961963
ndarray[rank_t, ndim=1] values,
962-
const int64_t[:] labels,
964+
const intp_t[:] labels,
965+
bint is_datetimelike=False,
963966
ties_method="average",
964967
bint ascending=True,
965968
bint pct=False,
@@ -971,9 +974,12 @@ def rank_1d(
971974
Parameters
972975
----------
973976
values : array of rank_t values to be ranked
974-
labels : array containing unique label for each group, with its ordering
977+
labels : np.ndarray[np.intp]
978+
Array containing unique label for each group, with its ordering
975979
matching up to the corresponding record in `values`. If not called
976980
from a groupby operation, will be an array of 0's
981+
is_datetimelike : bool, default False
982+
True if `values` contains datetime-like entries.
977983
ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
978984
'average'
979985
* average: average rank of group
@@ -1029,7 +1035,7 @@ def rank_1d(
10291035

10301036
if rank_t is object:
10311037
mask = missing.isnaobj(masked_vals)
1032-
elif rank_t is int64_t:
1038+
elif rank_t is int64_t and is_datetimelike:
10331039
mask = (masked_vals == NPY_NAT).astype(np.uint8)
10341040
elif rank_t is float64_t:
10351041
mask = np.isnan(masked_vals).astype(np.uint8)
@@ -1056,7 +1062,7 @@ def rank_1d(
10561062
if rank_t is object:
10571063
nan_fill_val = NegInfinity()
10581064
elif rank_t is int64_t:
1059-
nan_fill_val = np.iinfo(np.int64).min
1065+
nan_fill_val = NPY_NAT
10601066
elif rank_t is uint64_t:
10611067
nan_fill_val = 0
10621068
else:
@@ -1272,6 +1278,7 @@ def rank_1d(
12721278
def rank_2d(
12731279
ndarray[rank_t, ndim=2] in_arr,
12741280
int axis=0,
1281+
bint is_datetimelike=False,
12751282
ties_method="average",
12761283
bint ascending=True,
12771284
na_option="keep",
@@ -1296,7 +1303,9 @@ def rank_2d(
12961303
tiebreak = tiebreakers[ties_method]
12971304

12981305
keep_na = na_option == 'keep'
1299-
check_mask = rank_t is not uint64_t
1306+
1307+
# For cases where a mask is not possible, we can avoid mask checks
1308+
check_mask = not (rank_t is uint64_t or (rank_t is int64_t and not is_datetimelike))
13001309

13011310
if axis == 0:
13021311
values = np.asarray(in_arr).T.copy()
@@ -1307,28 +1316,34 @@ def rank_2d(
13071316
if values.dtype != np.object_:
13081317
values = values.astype('O')
13091318

1310-
if rank_t is not uint64_t:
1319+
if check_mask:
13111320
if ascending ^ (na_option == 'top'):
13121321
if rank_t is object:
13131322
nan_value = Infinity()
13141323
elif rank_t is float64_t:
13151324
nan_value = np.inf
1316-
elif rank_t is int64_t:
1325+
1326+
# int64 and datetimelike
1327+
else:
13171328
nan_value = np.iinfo(np.int64).max
13181329

13191330
else:
13201331
if rank_t is object:
13211332
nan_value = NegInfinity()
13221333
elif rank_t is float64_t:
13231334
nan_value = -np.inf
1324-
elif rank_t is int64_t:
1335+
1336+
# int64 and datetimelike
1337+
else:
13251338
nan_value = NPY_NAT
13261339

13271340
if rank_t is object:
13281341
mask = missing.isnaobj2d(values)
13291342
elif rank_t is float64_t:
13301343
mask = np.isnan(values)
1331-
elif rank_t is int64_t:
1344+
1345+
# int64 and datetimelike
1346+
else:
13321347
mask = values == NPY_NAT
13331348

13341349
np.putmask(values, mask, nan_value)

pandas/_libs/algos_take_helper.pxi.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
219219
fill_value=np.nan):
220220
cdef:
221221
Py_ssize_t i, j, k, n, idx
222-
ndarray[int64_t] idx0 = indexer[0]
223-
ndarray[int64_t] idx1 = indexer[1]
222+
ndarray[intp_t] idx0 = indexer[0]
223+
ndarray[intp_t] idx1 = indexer[1]
224224
{{c_type_out}} fv
225225

226226
n = len(idx0)

0 commit comments

Comments
 (0)