Skip to content

BUG: Resample.aggregate raising TypeError instead of SpecificationError with missing keys dtypes #39028

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ Groupby/resample/rolling
- Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`)
- Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`)
- Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`)
- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys having mixed dtypes (:issue:`39025`)

Reshaping
^^^^^^^^^
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from pandas.core.dtypes.common import is_dict_like, is_list_like
from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries

from pandas.core.algorithms import safe_sort
from pandas.core.base import DataError, SpecificationError
import pandas.core.common as com
from pandas.core.indexes.api import Index
Expand Down Expand Up @@ -482,9 +483,10 @@ def transform_dict_like(

if obj.ndim != 1:
# Check for missing columns on a frame
cols = sorted(set(func.keys()) - set(obj.columns))
cols = set(func.keys()) - set(obj.columns)
if len(cols) > 0:
raise SpecificationError(f"Column(s) {cols} do not exist")
cols_sorted = list(safe_sort(list(cols)))
raise SpecificationError(f"Column(s) {cols_sorted} do not exist")

# Can't use func.values(); wouldn't work for a Series
if any(is_dict_like(v) for _, v in func.items()):
Expand Down Expand Up @@ -738,7 +740,11 @@ def agg_dict_like(
if isinstance(selected_obj, ABCDataFrame) and len(
selected_obj.columns.intersection(keys)
) != len(keys):
cols = sorted(set(keys) - set(selected_obj.columns.intersection(keys)))
cols = list(
safe_sort(
list(set(keys) - set(selected_obj.columns.intersection(keys))),
)
)
raise SpecificationError(f"Column(s) {cols} do not exist")

from pandas.core.reshape.concat import concat
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/frame/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,24 @@ def f(x, a, b, c):


def test_transform_missing_columns(axis):
# GH 35964
# GH#35964
df = DataFrame({"A": [1, 2], "B": [3, 4]})
match = re.escape("Column(s) ['C'] do not exist")
with pytest.raises(SpecificationError, match=match):
df.transform({"C": "cumsum"})


def test_transform_none_to_type():
# GH#34377
df = DataFrame({"a": [None]})
msg = "Transform function failed"
with pytest.raises(ValueError, match=msg):
df.transform({"a": int})


def test_transform_mixed_column_name_dtypes():
# GH39025
df = DataFrame({"a": ["1"]})
msg = r"Column\(s\) \[1, 'b'\] do not exist"
with pytest.raises(SpecificationError, match=msg):
df.transform({"a": int, 1: str, "b": int})
15 changes: 15 additions & 0 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,21 @@ def test_agg_consistency():
r.agg({"r1": "mean", "r2": "sum"})


def test_agg_consistency_int_str_column_mix():
# GH#39025
df = DataFrame(
np.random.randn(1000, 2),
index=pd.date_range("1/1/2012", freq="S", periods=1000),
columns=[1, "a"],
)

r = df.resample("3T")

msg = r"Column\(s\) \[2, 'b'\] do not exist"
with pytest.raises(pd.core.base.SpecificationError, match=msg):
r.agg({2: "mean", "b": "sum"})


# TODO: once GH 14008 is fixed, move these tests into
# `Base` test class

Expand Down
10 changes: 1 addition & 9 deletions pandas/tests/series/apply/test_series_transform.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas import DataFrame, Series, concat
from pandas import Series, concat
import pandas._testing as tm
from pandas.core.base import SpecificationError
from pandas.core.groupby.base import transformation_kernels
Expand Down Expand Up @@ -65,14 +65,6 @@ def test_transform_wont_agg(string_series):
string_series.transform(["sqrt", "max"])


def test_transform_none_to_type():
# GH34377
df = DataFrame({"a": [None]})
msg = "Transform function failed"
with pytest.raises(ValueError, match=msg):
df.transform({"a": int})


def test_transform_axis_1_raises():
# GH 35964
msg = "No axis named 1 for object type Series"
Expand Down