Skip to content
Merged
12 changes: 10 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
ensure_platform_int,
infer_dtype_from_object,
is_bool_dtype,
is_datetime64_any_dtype,
is_dict_like,
is_dtype_equal,
is_extension_array_dtype,
Expand All @@ -88,6 +89,7 @@
is_list_like,
is_named_tuple,
is_object_dtype,
is_period_dtype,
is_scalar,
is_sequence,
needs_i8_conversion,
Expand Down Expand Up @@ -7789,11 +7791,13 @@ def _reduce(
self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
):

dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M")
dtype_is_dt = self.dtypes.apply(
lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x)
)
if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any():
warnings.warn(
"DataFrame.mean and DataFrame.median with numeric_only=None "
"will include datetime64 and datetime64tz columns in a "
"will include datetime64, datetime64tz, and PeriodDtype columns in a "
"future version.",
FutureWarning,
stacklevel=3,
Expand Down Expand Up @@ -7854,6 +7858,10 @@ def blk_func(values):
assert len(res) == max(list(res.keys())) + 1, res.keys()
out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
out.index = df.columns
if axis == 0 and df.dtypes.apply(needs_i8_conversion).any():
# FIXME: needs_i8_conversion check is kludge, not sure
# why it is necessary in this case and this case alone
out[:] = coerce_to_dtypes(out.values, df.dtypes)
return out

if numeric_only is None:
Expand Down
49 changes: 26 additions & 23 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from pandas._config import get_option

from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib
from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib
from pandas._typing import Dtype, Scalar
from pandas.compat._optional import import_optional_dependency

Expand All @@ -17,9 +17,7 @@
is_any_int_dtype,
is_bool_dtype,
is_complex,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
is_datetime64_any_dtype,
is_float,
is_float_dtype,
is_integer,
Expand All @@ -28,8 +26,10 @@
is_object_dtype,
is_scalar,
is_timedelta64_dtype,
needs_i8_conversion,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna

from pandas.core.construction import extract_array
Expand Down Expand Up @@ -134,10 +134,8 @@ def f(


def _bn_ok_dtype(dtype: Dtype, name: str) -> bool:
# Bottleneck chokes on datetime64
if not is_object_dtype(dtype) and not (
is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype)
):
# Bottleneck chokes on datetime64, PeriodDtype (or and EA)
if not is_object_dtype(dtype) and not needs_i8_conversion(dtype):

# GH 15507
# bottleneck does not properly upcast during the sum
Expand Down Expand Up @@ -283,17 +281,16 @@ def _get_values(
# with scalar fill_value. This guarantee is important for the
# maybe_upcast_putmask call below
assert is_scalar(fill_value)
values = extract_array(values, extract_numpy=True)

mask = _maybe_get_mask(values, skipna, mask)

values = extract_array(values, extract_numpy=True)
dtype = values.dtype

if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
if needs_i8_conversion(values):
# changing timedelta64/datetime64 to int64 needs to happen after
# finding `mask` above
values = getattr(values, "asi8", values)
values = values.view(np.int64)
values = np.asarray(values.view("i8"))

dtype_ok = _na_ok_dtype(dtype)

Expand All @@ -307,7 +304,8 @@ def _get_values(

if skipna and copy:
values = values.copy()
if dtype_ok:
assert mask is not None # for mypy
if dtype_ok and mask.any():
np.putmask(values, mask, fill_value)

# promote if needed
Expand All @@ -325,13 +323,14 @@ def _get_values(


def _na_ok_dtype(dtype) -> bool:
# TODO: what about datetime64tz? PeriodDtype?
return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64))
if needs_i8_conversion(dtype):
return False
return not issubclass(dtype.type, np.integer)


def _wrap_results(result, dtype: Dtype, fill_value=None):
""" wrap our results if needed """
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
if is_datetime64_any_dtype(dtype):
if fill_value is None:
# GH#24293
fill_value = iNaT
Expand All @@ -342,7 +341,8 @@ def _wrap_results(result, dtype: Dtype, fill_value=None):
result = np.nan
result = Timestamp(result, tz=tz)
else:
result = result.view(dtype)
# If we have float dtype, taking a view will give the wrong result
result = result.astype(dtype)
elif is_timedelta64_dtype(dtype):
if not isinstance(result, np.ndarray):
if result == fill_value:
Expand All @@ -356,6 +356,14 @@ def _wrap_results(result, dtype: Dtype, fill_value=None):
else:
result = result.astype("m8[ns]").view(dtype)

elif isinstance(dtype, PeriodDtype):
if is_float(result) and result.is_integer():
result = int(result)
if is_integer(result):
result = Period._from_ordinal(result, freq=dtype.freq)
else:
raise NotImplementedError(type(result), result)

return result


Expand Down Expand Up @@ -542,12 +550,7 @@ def nanmean(values, axis=None, skipna=True, mask=None):
)
dtype_sum = dtype_max
dtype_count = np.float64
if (
is_integer_dtype(dtype)
or is_timedelta64_dtype(dtype)
or is_datetime64_dtype(dtype)
or is_datetime64tz_dtype(dtype)
):
if is_integer_dtype(dtype) or needs_i8_conversion(dtype):
dtype_sum = np.float64
elif is_float_dtype(dtype):
dtype_sum = dtype
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1984,7 +1984,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs):
nan
"""
skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
i = nanops.nanargmin(com.values_from_object(self), skipna=skipna)
i = nanops.nanargmin(self._values, skipna=skipna)
if i == -1:
return np.nan
return self.index[i]
Expand Down Expand Up @@ -2055,7 +2055,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs):
nan
"""
skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
i = nanops.nanargmax(com.values_from_object(self), skipna=skipna)
i = nanops.nanargmax(self._values, skipna=skipna)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nanargmax/nanargmin expect to get an ndarray. Due to this change, it is no longer guaranteed to be an ndarray. Reported this as #32749

So those lines should either be reverted, or another "convert to ndarray" function should be used (or nanargmax/nanargmin could be rewritten to support EAs, but personally I think it is much cleaner to keep those algos based on numpy arrays)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel can you respond to this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(or nanargmax/nanargmin could be rewritten to support EAs, but personally I think it is much cleaner to keep those algos based on numpy arrays)

I'd be fine with either of these options. Probably prefer both actually: a EA-supporting public method and an ndarray-only private method for each of the relevant nanops funcs.

if i == -1:
return np.nan
return self.index[i]
Expand Down Expand Up @@ -2093,7 +2093,7 @@ def round(self, decimals=0, *args, **kwargs) -> "Series":
dtype: float64
"""
nv.validate_round(args, kwargs)
result = com.values_from_object(self).round(decimals)
result = self._values.round(decimals)
result = self._constructor(result, index=self.index).__finalize__(self)

return result
Expand Down
5 changes: 0 additions & 5 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,11 +875,6 @@ def test_mean_datetimelike(self):
expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]})
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you should technically have a whatsnew note as this 'bug' is fixed (do in followon)

reason="casts to object-dtype and then tries to add timestamps",
raises=TypeError,
strict=True,
)
def test_mean_datetimelike_numeric_only_false(self):
df = pd.DataFrame(
{
Expand Down