Skip to content

Commit f912217

Browse files
committed
Merge branch 'master' into enh-bool-index
2 parents 7ef2dba + c3d3357 commit f912217

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+901
-415
lines changed

doc/source/whatsnew/v1.4.0.rst

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ Additionally there are specific enhancements to the HTML specific rendering:
111111
- :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`).
112112
- :meth:`.Styler.to_html` omits CSSStyle rules for hidden table elements as a performance enhancement (:issue:`43619`)
113113
- Custom CSS classes can now be directly specified without string replacement (:issue:`43686`)
114+
- Ability to render hyperlinks automatically via a new ``hyperlinks`` formatting keyword argument (:issue:`45058`)
114115

115116
There are also some LaTeX specific enhancements:
116117

@@ -364,10 +365,29 @@ second column is instead renamed to ``a.2``.
364365
365366
res
366367
367-
.. _whatsnew_140.notable_bug_fixes.notable_bug_fix3:
368+
.. _whatsnew_140.notable_bug_fixes.unstack_pivot_int32_limit:
368369

369-
notable_bug_fix3
370-
^^^^^^^^^^^^^^^^
370+
unstack and pivot_table no longer raises ValueError for result that would exceed int32 limit
371+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
372+
373+
Previously :meth:`DataFrame.pivot_table` and :meth:`DataFrame.unstack` would raise a ``ValueError`` if the operation
374+
could produce a result with more than ``2**31 - 1`` elements. This operation now raises a :class:`errors.PerformanceWarning`
375+
instead (:issue:`26314`).
376+
377+
*Previous behavior*:
378+
379+
.. code-block:: ipython
380+
381+
In [3]: df = DataFrame({"ind1": np.arange(2 ** 16), "ind2": np.arange(2 ** 16), "count": 0})
382+
In [4]: df.pivot_table(index="ind1", columns="ind2", values="count", aggfunc="count")
383+
ValueError: Unstacked DataFrame is too big, causing int32 overflow
384+
385+
*New behavior*:
386+
387+
.. code-block:: python
388+
389+
In [4]: df.pivot_table(index="ind1", columns="ind2", values="count", aggfunc="count")
390+
PerformanceWarning: The following operation may generate 4294967296 cells in the resulting pandas object.
371391
372392
.. ---------------------------------------------------------------------------
373393
@@ -590,6 +610,7 @@ Other Deprecations
590610
- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
591611
- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`)
592612
- Deprecated ``numeric_only=None`` in :meth:`DataFrame.rank`; in a future version ``numeric_only`` must be either ``True`` or ``False`` (the default) (:issue:`45036`)
613+
- Deprecated the behavior of :meth:`Timestamp.utcfromtimestamp`, in the future it will return a timezone-aware UTC :class:`Timestamp` (:issue:`22451`)
593614
- Deprecated :meth:`NaT.freq` (:issue:`45071`)
594615
-
595616

@@ -682,6 +703,7 @@ Datetimelike
682703
- Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`)
683704
- Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`)
684705
- Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`)
706+
- Bug in :meth:`Timestamp.fromtimestamp` not supporting the ``tz`` argument (:issue:`45083`)
685707
-
686708

687709
Timedelta

pandas/_libs/tslibs/nattype.pyx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -378,9 +378,6 @@ class NaTType(_NaT):
378378
def __reduce__(self):
379379
return (__nat_unpickle, (None, ))
380380

381-
def __rdiv__(self, other):
382-
return _nat_rdivide_op(self, other)
383-
384381
def __rtruediv__(self, other):
385382
return _nat_rdivide_op(self, other)
386383

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,10 +1164,20 @@ class Timestamp(_Timestamp):
11641164
>>> pd.Timestamp.utcfromtimestamp(1584199972)
11651165
Timestamp('2020-03-14 15:32:52')
11661166
"""
1167+
# GH#22451
1168+
warnings.warn(
1169+
"The behavior of Timestamp.utcfromtimestamp is deprecated, in a "
1170+
"future version will return a timezone-aware Timestamp with UTC "
1171+
"timezone. To keep the old behavior, use "
1172+
"Timestamp.utcfromtimestamp(ts).tz_localize(None). "
1173+
"To get the future behavior, use Timestamp.fromtimestamp(ts, 'UTC')",
1174+
FutureWarning,
1175+
stacklevel=1,
1176+
)
11671177
return cls(datetime.utcfromtimestamp(ts))
11681178

11691179
@classmethod
1170-
def fromtimestamp(cls, ts):
1180+
def fromtimestamp(cls, ts, tz=None):
11711181
"""
11721182
Timestamp.fromtimestamp(ts)
11731183
@@ -1180,7 +1190,8 @@ class Timestamp(_Timestamp):
11801190
11811191
Note that the output may change depending on your local time.
11821192
"""
1183-
return cls(datetime.fromtimestamp(ts))
1193+
tz = maybe_get_tz(tz)
1194+
return cls(datetime.fromtimestamp(ts, tz))
11841195

11851196
def strftime(self, format):
11861197
"""

pandas/core/algorithms.py

Lines changed: 14 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@
6363
needs_i8_conversion,
6464
)
6565
from pandas.core.dtypes.concat import concat_compat
66-
from pandas.core.dtypes.dtypes import PandasDtype
66+
from pandas.core.dtypes.dtypes import (
67+
ExtensionDtype,
68+
PandasDtype,
69+
)
6770
from pandas.core.dtypes.generic import (
6871
ABCDatetimeArray,
6972
ABCExtensionArray,
@@ -489,7 +492,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:
489492
elif needs_i8_conversion(values.dtype):
490493
return isin(comps, values.astype(object))
491494

492-
elif is_extension_array_dtype(values.dtype):
495+
elif isinstance(values.dtype, ExtensionDtype):
493496
return isin(np.asarray(comps), np.asarray(values))
494497

495498
# GH16012
@@ -508,19 +511,7 @@ def f(c, v):
508511
f = np.in1d
509512

510513
else:
511-
# error: List item 0 has incompatible type "Union[Any, dtype[Any],
512-
# ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
513-
# Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any,
514-
# Any]]"
515-
# error: List item 1 has incompatible type "Union[Any, ExtensionDtype]";
516-
# expected "Union[dtype[Any], None, type, _SupportsDType, str, Tuple[Any,
517-
# Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]"
518-
# error: List item 1 has incompatible type "Union[dtype[Any], ExtensionDtype]";
519-
# expected "Union[dtype[Any], None, type, _SupportsDType, str, Tuple[Any,
520-
# Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]"
521-
common = np.find_common_type(
522-
[values.dtype, comps.dtype], [] # type: ignore[list-item]
523-
)
514+
common = np.find_common_type([values.dtype, comps.dtype], [])
524515
values = values.astype(common, copy=False)
525516
comps = comps.astype(common, copy=False)
526517
f = htable.ismember
@@ -932,7 +923,7 @@ def duplicated(
932923
return htable.duplicated(values, keep=keep)
933924

934925

935-
def mode(values, dropna: bool = True) -> Series:
926+
def mode(values: ArrayLike, dropna: bool = True) -> ArrayLike:
936927
"""
937928
Returns the mode(s) of an array.
938929
@@ -945,27 +936,17 @@ def mode(values, dropna: bool = True) -> Series:
945936
946937
Returns
947938
-------
948-
mode : Series
939+
np.ndarray or ExtensionArray
949940
"""
950-
from pandas import Series
951-
from pandas.core.indexes.api import default_index
952-
953941
values = _ensure_arraylike(values)
954942
original = values
955943

956-
# categorical is a fast-path
957-
if is_categorical_dtype(values.dtype):
958-
if isinstance(values, Series):
959-
# TODO: should we be passing `name` below?
960-
return Series(values._values.mode(dropna=dropna), name=values.name)
961-
return values.mode(dropna=dropna)
962-
963944
if needs_i8_conversion(values.dtype):
964-
if dropna:
965-
mask = values.isna()
966-
values = values[~mask]
967-
modes = mode(values.view("i8"))
968-
return modes.view(original.dtype)
945+
# Got here with ndarray; dispatch to DatetimeArray/TimedeltaArray.
946+
values = ensure_wrapped_if_datetimelike(values)
947+
# error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
948+
# ndarray[Any, Any]]" has no attribute "_mode"
949+
return values._mode(dropna=dropna) # type: ignore[union-attr]
969950

970951
values = _ensure_data(values)
971952

@@ -976,8 +957,7 @@ def mode(values, dropna: bool = True) -> Series:
976957
warn(f"Unable to sort modes: {err}")
977958

978959
result = _reconstruct_data(npresult, original.dtype, original)
979-
# Ensure index is type stable (should always use int index)
980-
return Series(result, index=default_index(len(result)))
960+
return result
981961

982962

983963
def rank(

pandas/core/arraylike.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -521,10 +521,12 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar
521521

522522
if "axis" not in kwargs:
523523
# For DataFrame reductions we don't want the default axis=0
524-
# FIXME: DataFrame.min ignores axis=None
525-
# FIXME: np.minimum.reduce(df) gets here bc axis is not in kwargs,
526-
# but np.minimum.reduce(df.values) behaves as if axis=0
527-
kwargs["axis"] = None
524+
# Note: np.min is not a ufunc, but uses array_function_dispatch,
525+
# so calls DataFrame.min (without ever getting here) with the np.min
526+
# default of axis=None, which DataFrame.min catches and changes to axis=0.
527+
# np.minimum.reduce(df) gets here bc axis is not in kwargs,
528+
# so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
529+
kwargs["axis"] = 0
528530

529531
# By default, numpy's reductions do not skip NaNs, so we have to
530532
# pass skipna=False

pandas/core/arrays/base.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
from pandas.core.algorithms import (
7474
factorize_array,
7575
isin,
76+
mode,
7677
rank,
7778
unique,
7879
)
@@ -1578,6 +1579,26 @@ def _quantile(
15781579

15791580
return result
15801581

1582+
def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT:
1583+
"""
1584+
Returns the mode(s) of the ExtensionArray.
1585+
1586+
Always returns `ExtensionArray` even if only one value.
1587+
1588+
Parameters
1589+
----------
1590+
dropna : bool, default True
1591+
Don't consider counts of NA values.
1592+
1593+
Returns
1594+
-------
1595+
same type as self
1596+
Sorted, if possible.
1597+
"""
1598+
# error: Incompatible return value type (got "Union[ExtensionArray,
1599+
# ndarray[Any, Any]]", expected "ExtensionArrayT")
1600+
return mode(self, dropna=dropna) # type: ignore[return-value]
1601+
15811602
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
15821603
if any(
15831604
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs

pandas/core/arrays/categorical.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2229,7 +2229,7 @@ def max(self, *, skipna=True, **kwargs):
22292229
pointer = self._codes.max()
22302230
return self._wrap_reduction_result(None, pointer)
22312231

2232-
def mode(self, dropna=True):
2232+
def mode(self, dropna: bool = True) -> Categorical:
22332233
"""
22342234
Returns the mode(s) of the Categorical.
22352235
@@ -2244,6 +2244,15 @@ def mode(self, dropna=True):
22442244
-------
22452245
modes : `Categorical` (sorted)
22462246
"""
2247+
warn(
2248+
"Categorical.mode is deprecated and will be removed in a future version. "
2249+
"Use Series.mode instead.",
2250+
FutureWarning,
2251+
stacklevel=find_stack_level(),
2252+
)
2253+
return self._mode(dropna=dropna)
2254+
2255+
def _mode(self, dropna: bool = True) -> Categorical:
22472256
codes = self._codes
22482257
if dropna:
22492258
good = self._codes != -1

pandas/core/arrays/datetimelike.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
from pandas.core.algorithms import (
100100
checked_add_with_arr,
101101
isin,
102+
mode,
102103
unique1d,
103104
)
104105
from pandas.core.arraylike import OpsMixin
@@ -1531,6 +1532,17 @@ def median(self, *, axis: int | None = None, skipna: bool = True, **kwargs):
15311532
result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
15321533
return self._wrap_reduction_result(axis, result)
15331534

1535+
def _mode(self, dropna: bool = True):
1536+
values = self
1537+
if dropna:
1538+
mask = values.isna()
1539+
values = values[~mask]
1540+
1541+
i8modes = mode(values.view("i8"))
1542+
npmodes = i8modes.view(self._ndarray.dtype)
1543+
npmodes = cast(np.ndarray, npmodes)
1544+
return self._from_backing_data(npmodes)
1545+
15341546

15351547
class DatelikeOps(DatetimeLikeArrayMixin):
15361548
"""

pandas/core/arrays/floating.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def coerce_to_array(
127127
return values, mask
128128

129129
values = np.array(values, copy=copy)
130-
if is_object_dtype(values):
130+
if is_object_dtype(values.dtype):
131131
inferred_type = lib.infer_dtype(values, skipna=True)
132132
if inferred_type == "empty":
133133
pass

pandas/core/arrays/integer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def coerce_to_array(
177177

178178
values = np.array(values, copy=copy)
179179
inferred_type = None
180-
if is_object_dtype(values) or is_string_dtype(values):
180+
if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
181181
inferred_type = lib.infer_dtype(values, skipna=True)
182182
if inferred_type == "empty":
183183
pass

pandas/core/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555

5656
from pandas.core import (
5757
algorithms,
58+
nanops,
5859
ops,
5960
)
6061
from pandas.core.accessor import DirNamesMixin
@@ -70,7 +71,6 @@
7071
ensure_wrapped_if_datetimelike,
7172
extract_array,
7273
)
73-
import pandas.core.nanops as nanops
7474

7575
if TYPE_CHECKING:
7676

pandas/core/common.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,14 @@ def require_length_match(data, index: Index):
562562
)
563563

564564

565-
_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}
565+
# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
566+
# whereas np.min and np.max (which directly call obj.min and obj.max)
567+
# default to axis=None.
568+
_builtin_table = {
569+
builtins.sum: np.sum,
570+
builtins.max: np.maximum.reduce,
571+
builtins.min: np.minimum.reduce,
572+
}
566573

567574
_cython_table = {
568575
builtins.sum: "sum",

0 commit comments

Comments
 (0)