From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/16] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From 1bcf32565417ce6e54f6787435021e09b2175259 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 11 Aug 2019 17:00:26 +0200 Subject: [PATCH 02/16] Fix issue 27472 --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/groupby/base.py | 4 +++- pandas/tests/groupby/test_transform.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index fb67decb46b64..3b75c0b973072 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -123,7 +123,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) - -- +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``cumcount`` fails (:issue:`27472`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index fc3bb69afd0cb..527f31eb80c80 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -100,7 +100,9 @@ def _gotitem(self, key, ndim, subset=None): # cythonized transformations or canned "agg+broadcast", which do not # require postprocessing of the result by transform. -cythonized_kernels = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"]) +cythonized_kernels = frozenset( + ["cumprod", "cumsum", "shift", "cummin", "cummax", "cumcount"] +) cython_cast_blacklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"]) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index d3972e6ba9008..a795d66e8228e 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1074,3 +1074,13 @@ def test_transform_lambda_with_datetimetz(): name="time", ) assert_series_equal(result, expected) + + +def test_transform_cumcount(): + # GH 27472 + df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))) + g = df.groupby(np.repeat([0, 1], 3)) + + result = g.transform("cumcount") + expected = Series([0, 1, 2, 0, 1, 2], dtype="int64") + assert_series_equal(result, expected) From 42fdb0b6f7ae2a4193c5324e4f31b2d53c3197be Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 11 Aug 2019 17:28:55 +0200 Subject: [PATCH 03/16] prettier --- pandas/tests/groupby/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index a795d66e8228e..8358e5f9ff59d 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1082,5 +1082,5 @@ def test_transform_cumcount(): g = df.groupby(np.repeat([0, 1], 3)) result = g.transform("cumcount") - expected = Series([0, 1, 2, 0, 1, 2], dtype="int64") + expected = g.cumcount() assert_series_equal(result, expected) From 9b1af14b3ff7b985338fd69e24dcd7a2b168267e Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 11 Aug 2019 18:41:30 +0200 Subject: [PATCH 04/16] Fix issue 27468 --- pandas/core/groupby/base.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/tests/groupby/test_transform.py | 41 +++++++++++++++++++++++--- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 527f31eb80c80..d67f8fad4f15c 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -122,7 +122,6 @@ def _gotitem(self, key, ndim, subset=None): "mean", "median", "min", - "ngroup", "nth", "nunique", "prod", @@ -160,6 +159,7 @@ def _gotitem(self, key, ndim, subset=None): "rank", "shift", "tshift", + "ngroup" ] ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ea2bd22cccc3d..2b37ca5d26abe 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -583,7 +583,7 @@ def transform(self, func, *args, **kwargs): if not (func in base.transform_kernel_whitelist): msg = "'{func}' is not a valid function name for transform(name)" raise ValueError(msg.format(func=func)) - if func in base.cythonized_kernels: + if func in base.cythonized_kernels or func in base.transformation_kernels: # cythonized transformation or canned "reduction+broadcast" return getattr(self, func)(*args, **kwargs) else: diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 8358e5f9ff59d..8aa91730b49bc 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1034,8 +1034,6 @@ def test_transform_agg_by_name(reduction_func, obj): func = reduction_func g = obj.groupby(np.repeat([0, 1], 3)) - if func == "ngroup": # GH#27468 - pytest.xfail("TODO: g.transform('ngroup') doesn't work") if func == "size": # GH#27469 pytest.xfail("TODO: g.transform('size') doesn't work") @@ -1076,11 +1074,46 @@ def test_transform_lambda_with_datetimetz(): assert_series_equal(result, expected) -def test_transform_cumcount(): - # GH 27472 +def test_transform_cumcount_ngroup(): df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))) g = df.groupby(np.repeat([0, 1], 3)) + # GH 27472 result = g.transform("cumcount") expected = g.cumcount() assert_series_equal(result, expected) + + # GH 27468 + result = g.transform("ngroup") + expected = g.ngroup() + assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func", + [ + "backfill", + "bfill", + "cumcount", + "cummax", + "cummin", + "cumprod", + "cumsum", + "diff", + "ffill", + "pad", + "pct_change", + "rank", + "shift", + "ngroup", + ], +) +def test_transformation_kernels_length(func): + # This test is to evaluate if after transformation, the index + # of transformed data is still the same with original DataFrame + # TODO: exceptions are fillna, tshfit and corrwith + df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))) + g = df.groupby(np.repeat([0, 1], 3)) + + result = g.transform(func) + assert (result.index == df.index).all() From d520bccc180bf354f561b6c56c8e3241e70263f9 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 11 Aug 2019 18:43:03 +0200 Subject: [PATCH 05/16] Add code commit --- pandas/core/groupby/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2b37ca5d26abe..c4257917fcb91 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -583,6 +583,7 @@ def transform(self, func, *args, **kwargs): if not (func in base.transform_kernel_whitelist): msg = "'{func}' is not a valid function name for transform(name)" raise ValueError(msg.format(func=func)) + # transformation are added as well since they are broadcasted already if func in base.cythonized_kernels or func in base.transformation_kernels: # cythonized transformation or canned "reduction+broadcast" return getattr(self, func)(*args, **kwargs) From c298eafaf608ea12b62e9738b2e7ba28110792b8 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 11 Aug 2019 18:44:44 +0200 Subject: [PATCH 06/16] Update whatsnew --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index e7e05af378232..b7a0256d7c100 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -121,7 +121,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) - Bug in windowing over read-only arrays (:issue:`27766`) - -- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``cumcount`` fails (:issue:`27472`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``cumcount`` and ``ngroup`` fail (:issue:`27472` and :issue:`27468`) Reshaping ^^^^^^^^^ From c66ec838701aa709ab54801b56ec332029483bd2 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 12 Aug 2019 20:04:51 +0200 Subject: [PATCH 07/16] revert change --- doc/source/whatsnew/v0.25.1.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index a81815bd25cd0..1e03e7aa38055 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -120,9 +120,8 @@ Groupby/resample/rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) - Bug in windowing over read-only arrays (:issue:`27766`) -- -- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``cumcount`` and ``ngroup`` fail (:issue:`27472` and :issue:`27468`) - Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``cumcount`` and ``ngroup`` fail (:issue:`27472` and :issue:`27468`) Reshaping ^^^^^^^^^ From 4dc07ebac14707a46bc8e36861462430b43fdbda Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 12 Aug 2019 20:37:44 +0200 Subject: [PATCH 08/16] Code change based on review --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/tests/groupby/test_transform.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 1e03e7aa38055..447b165cef308 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -121,7 +121,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) - Bug in windowing over read-only arrays (:issue:`27766`) - Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`) -- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``cumcount`` and ``ngroup`` fail (:issue:`27472` and :issue:`27468`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``'cumcount'`` and ``'ngroup'`` fail (:issue:`27472` and :issue:`27468`) Reshaping ^^^^^^^^^ diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 8aa91730b49bc..1dc7441162f96 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1106,12 +1106,17 @@ def test_transform_cumcount_ngroup(): "rank", "shift", "ngroup", + "fillna", + "tshift", + "corrwith", ], ) +@pytest.param("fillna", marks=pytest.mark.xfail("TODO: potential bug")) +@pytest.param("tshift", marks=pytest.mark.xfail("should apply to ts data")) +@pytest.param("corrwith", marks=pytest.mark.xfail("Inapplicable to the data")) def test_transformation_kernels_length(func): # This test is to evaluate if after transformation, the index # of transformed data is still the same with original DataFrame - # TODO: exceptions are fillna, tshfit and corrwith df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))) g = df.groupby(np.repeat([0, 1], 3)) From 9d60bbb177918ce983247068037e2b9400ebf628 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 12 Aug 2019 21:00:14 +0200 Subject: [PATCH 09/16] Fix test --- pandas/tests/groupby/test_transform.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 1dc7441162f96..d4f6deb89bbba 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1106,14 +1106,15 @@ def test_transform_cumcount_ngroup(): "rank", "shift", "ngroup", - "fillna", - "tshift", - "corrwith", + pytest.param("fillna", marks=pytest.mark.xfail(reason="TODO: potential bug")), + pytest.param( + "tshift", marks=pytest.mark.xfail(reason="should apply to ts data") + ), + pytest.param( + "corrwith", marks=pytest.mark.xfail(reason="Inapplicable to the data") + ), ], ) -@pytest.param("fillna", marks=pytest.mark.xfail("TODO: potential bug")) -@pytest.param("tshift", marks=pytest.mark.xfail("should apply to ts data")) -@pytest.param("corrwith", marks=pytest.mark.xfail("Inapplicable to the data")) def test_transformation_kernels_length(func): # This test is to evaluate if after transformation, the index # of transformed data is still the same with original DataFrame From 0378a7445b68f839505461482db10e5202a5680b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 12 Aug 2019 22:17:21 +0200 Subject: [PATCH 10/16] Fix linting error --- pandas/core/groupby/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index d67f8fad4f15c..889fb2fbb75ae 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -159,7 +159,7 @@ def _gotitem(self, key, ndim, subset=None): "rank", "shift", "tshift", - "ngroup" + "ngroup", ] ) From 2a8e1ede0d784484b6ab38c8b0472cd58a560db9 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 13 Aug 2019 19:22:27 +0200 Subject: [PATCH 11/16] try to push again and pass flaky test --- pandas/tests/groupby/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index d4f6deb89bbba..5394a1a809f89 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1108,7 +1108,7 @@ def test_transform_cumcount_ngroup(): "ngroup", pytest.param("fillna", marks=pytest.mark.xfail(reason="TODO: potential bug")), pytest.param( - "tshift", marks=pytest.mark.xfail(reason="should apply to ts data") + "tshift", marks=pytest.mark.xfail(reason="Untested, should apply to ts data") ), pytest.param( "corrwith", marks=pytest.mark.xfail(reason="Inapplicable to the data") From 99203443e3c4e77e1f8d695e2b0ff04c7d905b42 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 13 Aug 2019 19:39:30 +0200 Subject: [PATCH 12/16] resumbit pr --- pandas/tests/groupby/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 5394a1a809f89..e6dddc9b18205 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1108,7 +1108,7 @@ def test_transform_cumcount_ngroup(): "ngroup", pytest.param("fillna", marks=pytest.mark.xfail(reason="TODO: potential bug")), pytest.param( - "tshift", marks=pytest.mark.xfail(reason="Untested, should apply to ts data") + "tshift", marks=pytest.mark.xfail(reason="Should apply to ts data") ), pytest.param( "corrwith", marks=pytest.mark.xfail(reason="Inapplicable to the data") From 9012e5337ba2190f6c7bcd0ea8b1cf3e47aef1cc Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 13 Aug 2019 21:50:15 +0200 Subject: [PATCH 13/16] Code change based on review --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/tests/groupby/test_transform.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 447b165cef308..ae5b1ec892aa9 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -121,7 +121,7 @@ Groupby/resample/rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) - Bug in windowing over read-only arrays (:issue:`27766`) - Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`) -- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``'cumcount'`` and ``'ngroup'`` fail (:issue:`27472` and :issue:`27468`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``'cumcount'`` and ``'ngroup'`` fail (:issue:`27472`, :issue:`27468`) Reshaping ^^^^^^^^^ diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index e6dddc9b18205..7b212b4a09dc2 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -1106,12 +1106,15 @@ def test_transform_cumcount_ngroup(): "rank", "shift", "ngroup", - pytest.param("fillna", marks=pytest.mark.xfail(reason="TODO: potential bug")), pytest.param( - "tshift", marks=pytest.mark.xfail(reason="Should apply to ts data") + "fillna", marks=pytest.mark.xfail(reason="GH27905: potential bug") ), pytest.param( - "corrwith", marks=pytest.mark.xfail(reason="Inapplicable to the data") + "tshift", marks=pytest.mark.xfail(reason="GH27905: Should apply to ts data") + ), + pytest.param( + "corrwith", + marks=pytest.mark.xfail(reason="GH27905: Inapplicable to the data"), ), ], ) From 8d360a2664f6c654e3e3e2d2147c10aa0f34507d Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 15 Aug 2019 09:15:45 +0200 Subject: [PATCH 14/16] Code change based on review --- pandas/core/groupby/generic.py | 1 + pandas/tests/groupby/test_transform.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c4257917fcb91..9f350411084f3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -583,6 +583,7 @@ def transform(self, func, *args, **kwargs): if not (func in base.transform_kernel_whitelist): msg = "'{func}' is not a valid function name for transform(name)" raise ValueError(msg.format(func=func)) + # transformation are added as well since they are broadcasted already if func in base.cythonized_kernels or func in base.transformation_kernels: # cythonized transformation or canned "reduction+broadcast" diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 7b212b4a09dc2..f41120a146aae 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -20,7 +20,11 @@ ) from pandas.core.groupby.groupby import DataError from pandas.util import testing as tm -from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.util.testing import ( + assert_frame_equal, + assert_series_equal, + assert_index_equal, +) def assert_fp_equal(a, b): @@ -1107,7 +1111,8 @@ def test_transform_cumcount_ngroup(): "shift", "ngroup", pytest.param( - "fillna", marks=pytest.mark.xfail(reason="GH27905: potential bug") + "fillna", + marks=pytest.mark.xfail(reason="GH27905: 'fillna' get empty DataFrame now"), ), pytest.param( "tshift", marks=pytest.mark.xfail(reason="GH27905: Should apply to ts data") @@ -1125,4 +1130,4 @@ def test_transformation_kernels_length(func): g = df.groupby(np.repeat([0, 1], 3)) result = g.transform(func) - assert (result.index == df.index).all() + assert_index_equal(result.index, df.index) From e54f024abadc7ddae7804241bef621811f0733dd Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 15 Aug 2019 09:43:19 +0200 Subject: [PATCH 15/16] Fix linting --- pandas/tests/groupby/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index f41120a146aae..63104e978839f 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -22,8 +22,8 @@ from pandas.util import testing as tm from pandas.util.testing import ( assert_frame_equal, - assert_series_equal, assert_index_equal, + assert_series_equal, ) From 45c533977f8149bba87c37049c3e266aa0fb9297 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 23 Aug 2019 21:38:15 +0200 Subject: [PATCH 16/16] remove from whatsnew --- doc/source/whatsnew/v0.25.1.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 1bf0058589cc0..6bf9a3b705527 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -120,7 +120,6 @@ Groupby/resample/rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) - Bug in windowing over read-only arrays (:issue:`27766`) - Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`) -- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where ``'cumcount'`` and ``'ngroup'`` fail (:issue:`27472`, :issue:`27468`) Reshaping ^^^^^^^^^