From ee16f26f49801741b2fc943b013b1f29e176fa92 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 7 Jan 2021 22:45:10 +0100 Subject: [PATCH 01/11] Fix resample missing columns bug --- pandas/core/aggregation.py | 10 ++++++---- pandas/tests/resample/test_resample_api.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index c64f0bd71cf84..1d619d0b8fa24 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -735,10 +735,12 @@ def agg_dict_like( # deprecation of renaming keys # GH 15931 keys = list(arg.keys()) - if isinstance(selected_obj, ABCDataFrame) and len( - selected_obj.columns.intersection(keys) - ) != len(keys): - cols = sorted(set(keys) - set(selected_obj.columns.intersection(keys))) + common_keys = selected_obj.columns.intersection(keys) + if isinstance(selected_obj, ABCDataFrame) and len(common_keys) != len(keys): + cols = sorted( + set(keys) - set(common_keys), + key=lambda col: (isinstance(col, str), col), + ) raise SpecificationError(f"Column(s) {cols} do not exist") from pandas.core.reshape.concat import concat diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 2cd9bb70385bf..d217957cbe08a 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -297,6 +297,21 @@ def test_agg_consistency(): r.agg({"r1": "mean", "r2": "sum"}) +def test_agg_consistency_int_str_column_mix(): + # GH#39025 + df = DataFrame( + np.random.randn(1000, 2), + index=pd.date_range("1/1/2012", freq="S", periods=1000), + columns=[1, "a"], + ) + + r = df.resample("3T") + + msg = r"Column\(s\) \[2, 'b'\] do not exist" + with pytest.raises(pd.core.base.SpecificationError, match=msg): + r.agg({2: "mean", "b": "sum"}) + + # TODO: once GH 14008 is fixed, move these tests into # `Base` test class From 4f9c2826a2b38de383841e49ac354e90e7dc1624 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 7 Jan 2021 22:54:39 +0100 Subject: [PATCH 02/11] Add whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 886469837d184..5c93f14032849 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -306,6 +306,7 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`) - Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`) - Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`) +- Bug in :meth:`.Resampler.aggregate` raising ``TypeError`` instead of ``SpecificationError`` when columns and missing keys had mixed dtypes (:issue:`39025`) Reshaping ^^^^^^^^^ From 2c7e58eefeaff33ca8f37735df46a84a66760681 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 7 Jan 2021 22:56:51 +0100 Subject: [PATCH 03/11] Change whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5c93f14032849..1ea6ab0ded2c1 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -306,7 +306,7 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`) - Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`) - Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`) -- Bug in :meth:`.Resampler.aggregate` raising ``TypeError`` instead of ``SpecificationError`` when columns and missing keys had mixed dtypes (:issue:`39025`) +- Bug in :meth:`.Resampler.aggregate` raising ``TypeError`` instead of ``SpecificationError`` when missing keys having mixed dtypes (:issue:`39025`) Reshaping ^^^^^^^^^ From d5c3a66c1168d86549ebdfb6f18fb99c1e062bc3 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 7 Jan 2021 23:48:13 +0100 Subject: [PATCH 04/11] Fix bug --- pandas/core/aggregation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 1d619d0b8fa24..e58f4efc3d0aa 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -735,10 +735,11 @@ def agg_dict_like( # deprecation of renaming keys # GH 15931 keys = list(arg.keys()) - common_keys = selected_obj.columns.intersection(keys) - if isinstance(selected_obj, ABCDataFrame) and len(common_keys) != len(keys): + if isinstance(selected_obj, ABCDataFrame) and len( + selected_obj.columns.intersection(keys) + ) != len(keys): cols = sorted( - set(keys) - set(common_keys), + set(keys) - set(selected_obj.columns.intersection(keys)), key=lambda col: (isinstance(col, str), col), ) raise SpecificationError(f"Column(s) {cols} do not exist") From f612899512379b9be1728deeb30867ac1660244a Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 8 Jan 2021 10:02:41 +0100 Subject: [PATCH 05/11] Add safe sort --- pandas/core/aggregation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index e58f4efc3d0aa..f19ab576fc848 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -35,6 +35,7 @@ from pandas.core.dtypes.common import is_dict_like, is_list_like from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries +from pandas.core.algorithms import safe_sort from pandas.core.base import DataError, SpecificationError import pandas.core.common as com from pandas.core.indexes.api import Index @@ -738,9 +739,8 @@ def agg_dict_like( if isinstance(selected_obj, ABCDataFrame) and len( selected_obj.columns.intersection(keys) ) != len(keys): - cols = sorted( + cols = safe_sort( set(keys) - set(selected_obj.columns.intersection(keys)), - key=lambda col: (isinstance(col, str), col), ) raise SpecificationError(f"Column(s) {cols} do not exist") From 7ab627df8922765a304cea3df0cc21536b783be7 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 8 Jan 2021 11:09:18 +0100 Subject: [PATCH 06/11] Add list calls --- pandas/core/aggregation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index f19ab576fc848..44b1d97842669 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -739,8 +739,10 @@ def agg_dict_like( if isinstance(selected_obj, ABCDataFrame) and len( selected_obj.columns.intersection(keys) ) != len(keys): - cols = safe_sort( - set(keys) - set(selected_obj.columns.intersection(keys)), + cols = list( + safe_sort( + list(set(keys) - set(selected_obj.columns.intersection(keys))), + ) ) raise SpecificationError(f"Column(s) {cols} do not exist") From e40a9d2c5f9a4defef931721de00191398b7b5db Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 8 Jan 2021 21:45:08 +0100 Subject: [PATCH 07/11] Fix other bug --- pandas/core/aggregation.py | 3 ++- pandas/tests/series/apply/test_series_transform.py | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 44b1d97842669..88c1c4d7353bc 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -483,8 +483,9 @@ def transform_dict_like( if obj.ndim != 1: # Check for missing columns on a frame - cols = sorted(set(func.keys()) - set(obj.columns)) + cols = set(func.keys()) - set(obj.columns) if len(cols) > 0: + cols = list(safe_sort(list(cols))) raise SpecificationError(f"Column(s) {cols} do not exist") # Can't use func.values(); wouldn't work for a Series diff --git a/pandas/tests/series/apply/test_series_transform.py b/pandas/tests/series/apply/test_series_transform.py index 992aaa540a65f..af8712db03e4f 100644 --- a/pandas/tests/series/apply/test_series_transform.py +++ b/pandas/tests/series/apply/test_series_transform.py @@ -73,6 +73,14 @@ def test_transform_none_to_type(): df.transform({"a": int}) +def test_transform_mixed_column_name_dtypes(): + # GH39025 + df = DataFrame({"a": ["1"]}) + msg = r"Column\(s\) \[1, 'b'\] do not exist" + with pytest.raises(SpecificationError, match=msg): + df.transform({"a": int, 1: str, "b": int}) + + def test_transform_axis_1_raises(): # GH 35964 msg = "No axis named 1 for object type Series" From ea32dbafe5c36ef6198126c4ef0fa32371f4c3ec Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 8 Jan 2021 21:47:46 +0100 Subject: [PATCH 08/11] Move tests --- .../tests/frame/apply/test_frame_transform.py | 16 ++++++++++++++++ .../series/apply/test_series_transform.py | 18 +----------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pandas/tests/frame/apply/test_frame_transform.py b/pandas/tests/frame/apply/test_frame_transform.py index db5b2f3d86dfe..74378e6821cd0 100644 --- a/pandas/tests/frame/apply/test_frame_transform.py +++ b/pandas/tests/frame/apply/test_frame_transform.py @@ -258,3 +258,19 @@ def test_transform_missing_columns(axis): match = re.escape("Column(s) ['C'] do not exist") with pytest.raises(SpecificationError, match=match): df.transform({"C": "cumsum"}) + + +def test_transform_none_to_type(): + # GH34377 + df = DataFrame({"a": [None]}) + msg = "Transform function failed" + with pytest.raises(ValueError, match=msg): + df.transform({"a": int}) + + +def test_transform_mixed_column_name_dtypes(): + # GH39025 + df = DataFrame({"a": ["1"]}) + msg = r"Column\(s\) \[1, 'b'\] do not exist" + with pytest.raises(SpecificationError, match=msg): + df.transform({"a": int, 1: str, "b": int}) diff --git a/pandas/tests/series/apply/test_series_transform.py b/pandas/tests/series/apply/test_series_transform.py index af8712db03e4f..73cc789c6eb3a 100644 --- a/pandas/tests/series/apply/test_series_transform.py +++ b/pandas/tests/series/apply/test_series_transform.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series, concat +from pandas import Series, concat import pandas._testing as tm from pandas.core.base import SpecificationError from pandas.core.groupby.base import transformation_kernels @@ -65,22 +65,6 @@ def test_transform_wont_agg(string_series): string_series.transform(["sqrt", "max"]) -def test_transform_none_to_type(): - # GH34377 - df = DataFrame({"a": [None]}) - msg = "Transform function failed" - with pytest.raises(ValueError, match=msg): - df.transform({"a": int}) - - -def test_transform_mixed_column_name_dtypes(): - # GH39025 - df = DataFrame({"a": ["1"]}) - msg = r"Column\(s\) \[1, 'b'\] do not exist" - with pytest.raises(SpecificationError, match=msg): - df.transform({"a": int, 1: str, "b": int}) - - def test_transform_axis_1_raises(): # GH 35964 msg = "No axis named 1 for object type Series" From 21c498687699c2592a605896f362bd68d92e7d49 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 8 Jan 2021 21:48:17 +0100 Subject: [PATCH 09/11] Add whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1ea6ab0ded2c1..61e747e1d5a53 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -306,7 +306,7 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`) - Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`) - Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`) -- Bug in :meth:`.Resampler.aggregate` raising ``TypeError`` instead of ``SpecificationError`` when missing keys having mixed dtypes (:issue:`39025`) +- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys having mixed dtypes (:issue:`39025`) Reshaping ^^^^^^^^^ From d1ae40e8b454cdd1ed2ac77a2053b495e4f8ec06 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 8 Jan 2021 21:51:10 +0100 Subject: [PATCH 10/11] Add hash --- pandas/tests/frame/apply/test_frame_transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/apply/test_frame_transform.py b/pandas/tests/frame/apply/test_frame_transform.py index 74378e6821cd0..bff0306a50ee6 100644 --- a/pandas/tests/frame/apply/test_frame_transform.py +++ b/pandas/tests/frame/apply/test_frame_transform.py @@ -253,7 +253,7 @@ def f(x, a, b, c): def test_transform_missing_columns(axis): - # GH 35964 + # GH#35964 df = DataFrame({"A": [1, 2], "B": [3, 4]}) match = re.escape("Column(s) ['C'] do not exist") with pytest.raises(SpecificationError, match=match): @@ -261,7 +261,7 @@ def test_transform_missing_columns(axis): def test_transform_none_to_type(): - # GH34377 + # GH#34377 df = DataFrame({"a": [None]}) msg = "Transform function failed" with pytest.raises(ValueError, match=msg): From d67bc4263eeb53ff65f8bb13bce052a984c03670 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 8 Jan 2021 22:45:45 +0100 Subject: [PATCH 11/11] Fix mypy problem. --- pandas/core/aggregation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 88c1c4d7353bc..cd169a250b49b 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -485,8 +485,8 @@ def transform_dict_like( # Check for missing columns on a frame cols = set(func.keys()) - set(obj.columns) if len(cols) > 0: - cols = list(safe_sort(list(cols))) - raise SpecificationError(f"Column(s) {cols} do not exist") + cols_sorted = list(safe_sort(list(cols))) + raise SpecificationError(f"Column(s) {cols_sorted} do not exist") # Can't use func.values(); wouldn't work for a Series if any(is_dict_like(v) for _, v in func.items()):