From d653961913b2a8f3188f5a678ec95a15ac1f8bad Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Sat, 2 Jul 2022 01:12:47 +0530 Subject: [PATCH 1/6] testcase for #33168 Quantile function fails when performing groupby on Time Zone Aware Timestamps --- pandas/tests/groupby/test_quantile.py | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 20328426a69b2..47785b218c6b1 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -343,3 +343,50 @@ def test_columns_groupby_quantile(): ) tm.assert_frame_equal(result, expected) + +def test_timestamp_groupby_quantile(): + # GH 33168 + df = DataFrame( + { + 'timestamp': pd.date_range(start='2020-04-19 00:00:00', freq='1T', periods=200, tz='UTC').floor('1H'), + 'category': list(range(1, 201)), + 'value': list(range(201, 401)) + } + ) + + result = df.groupby('timestamp').quantile([0.1, 0.5, 0.9]) + + expected = DataFrame( + [ + {'category': 6.9, 'value': 206.9}, + {'category': 30.5, 'value': 230.5}, + {'category': 54.1, 'value': 254.1}, + {'category': 66.9, 'value': 266.9}, + {'category': 90.5, 'value': 290.5}, + {'category': 114.1, 'value': 314.1}, + {'category': 126.9, 'value': 326.9}, + {'category': 150.5, 'value': 350.5}, + {'category': 174.1, 'value': 374.1}, + {'category': 182.9, 'value': 382.9}, + {'category': 190.5, 'value': 390.5}, + {'category': 198.1, 'value': 398.1} + ], + index=pd.MultiIndex.from_tuples( + [ + ( pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.1), + ( pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.5), + ( pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.9), + ( pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.1), + ( pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.5), + ( pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.9), + ( pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.1), + ( pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.5), + ( pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.9), + ( pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.1), + ( pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.5), + ( pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.9) + ], names=('timestamp', None) + ) + ) + + tm.assert_frame_equal(result, expected) \ No newline at end of file From 4daed962aa6a3c07d6df433cde4c0ab5a68959da Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Sat, 2 Jul 2022 01:34:01 +0530 Subject: [PATCH 2/6] testcase for #33168 Quantile function fails when performing groupby on Time Zone Aware Timestamps pre check changes --- pandas/tests/groupby/test_quantile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 47785b218c6b1..e27b5e29296fc 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -389,4 +389,4 @@ def test_timestamp_groupby_quantile(): ) ) - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected) From f7556be7269211c123e8c9ae3b306fc079e672f7 Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Sat, 2 Jul 2022 01:49:12 +0530 Subject: [PATCH 3/6] testcase for #33168 code style changes --- pandas/tests/groupby/test_quantile.py | 60 ++++++++++++++------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index e27b5e29296fc..05e1feb16eef9 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -344,49 +344,51 @@ def test_columns_groupby_quantile(): tm.assert_frame_equal(result, expected) + def test_timestamp_groupby_quantile(): # GH 33168 df = DataFrame( { - 'timestamp': pd.date_range(start='2020-04-19 00:00:00', freq='1T', periods=200, tz='UTC').floor('1H'), + 'timestamp': pd.date_range(start='2020-04-19 00:00:00', + freq='1T', periods=200, tz='UTC').floor('1H'), 'category': list(range(1, 201)), 'value': list(range(201, 401)) } ) - + result = df.groupby('timestamp').quantile([0.1, 0.5, 0.9]) - + expected = DataFrame( [ - {'category': 6.9, 'value': 206.9}, - {'category': 30.5, 'value': 230.5}, - {'category': 54.1, 'value': 254.1}, - {'category': 66.9, 'value': 266.9}, - {'category': 90.5, 'value': 290.5}, - {'category': 114.1, 'value': 314.1}, - {'category': 126.9, 'value': 326.9}, - {'category': 150.5, 'value': 350.5}, - {'category': 174.1, 'value': 374.1}, - {'category': 182.9, 'value': 382.9}, - {'category': 190.5, 'value': 390.5}, - {'category': 198.1, 'value': 398.1} - ], + {'category': 6.9, 'value': 206.9}, + {'category': 30.5, 'value': 230.5}, + {'category': 54.1, 'value': 254.1}, + {'category': 66.9, 'value': 266.9}, + {'category': 90.5, 'value': 290.5}, + {'category': 114.1, 'value': 314.1}, + {'category': 126.9, 'value': 326.9}, + {'category': 150.5, 'value': 350.5}, + {'category': 174.1, 'value': 374.1}, + {'category': 182.9, 'value': 382.9}, + {'category': 190.5, 'value': 390.5}, + {'category': 198.1, 'value': 398.1} + ], index=pd.MultiIndex.from_tuples( [ - ( pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.1), - ( pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.5), - ( pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.9), - ( pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.1), - ( pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.5), - ( pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.9), - ( pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.1), - ( pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.5), - ( pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.9), - ( pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.1), - ( pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.5), - ( pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.9) + (pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.1), + (pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.5), + (pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.9), + (pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.1), + (pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.5), + (pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.9), + (pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.1), + (pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.5), + (pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.9), + (pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.1), + (pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.5), + (pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.9) ], names=('timestamp', None) ) ) - + tm.assert_frame_equal(result, expected) From afcb9a676cd4e8be6e873396597ad980211664de Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Sat, 2 Jul 2022 02:05:12 +0530 Subject: [PATCH 4/6] testcase for #33168 code style changes --- pandas/tests/groupby/test_quantile.py | 61 ++++++++++++++------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 05e1feb16eef9..268c103b97b69 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -349,45 +349,46 @@ def test_timestamp_groupby_quantile(): # GH 33168 df = DataFrame( { - 'timestamp': pd.date_range(start='2020-04-19 00:00:00', - freq='1T', periods=200, tz='UTC').floor('1H'), - 'category': list(range(1, 201)), - 'value': list(range(201, 401)) + "timestamp": pd.date_range( + start="2020-04-19 00:00:00", freq="1T", periods=200, tz="UTC" + ).floor("1H"), + "category": list(range(1, 201)), + "value": list(range(201, 401)), } ) - result = df.groupby('timestamp').quantile([0.1, 0.5, 0.9]) + result = df.groupby("timestamp").quantile([0.1, 0.5, 0.9]) expected = DataFrame( [ - {'category': 6.9, 'value': 206.9}, - {'category': 30.5, 'value': 230.5}, - {'category': 54.1, 'value': 254.1}, - {'category': 66.9, 'value': 266.9}, - {'category': 90.5, 'value': 290.5}, - {'category': 114.1, 'value': 314.1}, - {'category': 126.9, 'value': 326.9}, - {'category': 150.5, 'value': 350.5}, - {'category': 174.1, 'value': 374.1}, - {'category': 182.9, 'value': 382.9}, - {'category': 190.5, 'value': 390.5}, - {'category': 198.1, 'value': 398.1} + {"category": 6.9, "value": 206.9}, + {"category": 30.5, "value": 230.5}, + {"category": 54.1, "value": 254.1}, + {"category": 66.9, "value": 266.9}, + {"category": 90.5, "value": 290.5}, + {"category": 114.1, "value": 314.1}, + {"category": 126.9, "value": 326.9}, + {"category": 150.5, "value": 350.5}, + {"category": 174.1, "value": 374.1}, + {"category": 182.9, "value": 382.9}, + {"category": 190.5, "value": 390.5}, + {"category": 198.1, "value": 398.1} ], index=pd.MultiIndex.from_tuples( [ - (pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.1), - (pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.5), - (pd.Timestamp('2020-04-19 00:00:00+00:00'), 0.9), - (pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.1), - (pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.5), - (pd.Timestamp('2020-04-19 01:00:00+00:00'), 0.9), - (pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.1), - (pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.5), - (pd.Timestamp('2020-04-19 02:00:00+00:00'), 0.9), - (pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.1), - (pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.5), - (pd.Timestamp('2020-04-19 03:00:00+00:00'), 0.9) - ], names=('timestamp', None) + (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.1), + (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.5), + (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.9), + (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.1), + (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.5), + (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.9), + (pd.Timestamp("2020-04-19 02:00:00+00:00"), 0.1), + (pd.Timestamp("2020-04-19 02:00:00+00:00"), 0.5), + (pd.Timestamp("2020-04-19 02:00:00+00:00"), 0.9), + (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.1), + (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.5), + (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.9) + ], names=("timestamp", None) ) ) From 8b331a4ec098d21b9b2ad473be891ffdfca5e016 Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Sat, 2 Jul 2022 12:29:21 +0530 Subject: [PATCH 5/6] testcase for #33168 code style changes --- pandas/tests/groupby/test_quantile.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 268c103b97b69..75b5fa3614b81 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -372,7 +372,7 @@ def test_timestamp_groupby_quantile(): {"category": 174.1, "value": 374.1}, {"category": 182.9, "value": 382.9}, {"category": 190.5, "value": 390.5}, - {"category": 198.1, "value": 398.1} + {"category": 198.1, "value": 398.1}, ], index=pd.MultiIndex.from_tuples( [ @@ -387,9 +387,10 @@ def test_timestamp_groupby_quantile(): (pd.Timestamp("2020-04-19 02:00:00+00:00"), 0.9), (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.1), (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.5), - (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.9) - ], names=("timestamp", None) - ) + (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.9), + ], + names=("timestamp", None), + ), ) tm.assert_frame_equal(result, expected) From ebf1d364f825cd0de854dca4a9411b710801c6b6 Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Sat, 2 Jul 2022 18:36:40 +0530 Subject: [PATCH 6/6] testcase for #33168 addressing review comments --- pandas/tests/groupby/test_quantile.py | 40 ++++++++------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 75b5fa3614b81..2b7e71d9619a4 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -350,44 +350,28 @@ def test_timestamp_groupby_quantile(): df = DataFrame( { "timestamp": pd.date_range( - start="2020-04-19 00:00:00", freq="1T", periods=200, tz="UTC" + start="2020-04-19 00:00:00", freq="1T", periods=100, tz="UTC" ).floor("1H"), - "category": list(range(1, 201)), - "value": list(range(201, 401)), + "category": list(range(1, 101)), + "value": list(range(101, 201)), } ) - result = df.groupby("timestamp").quantile([0.1, 0.5, 0.9]) + result = df.groupby("timestamp").quantile([0.2, 0.8]) expected = DataFrame( [ - {"category": 6.9, "value": 206.9}, - {"category": 30.5, "value": 230.5}, - {"category": 54.1, "value": 254.1}, - {"category": 66.9, "value": 266.9}, - {"category": 90.5, "value": 290.5}, - {"category": 114.1, "value": 314.1}, - {"category": 126.9, "value": 326.9}, - {"category": 150.5, "value": 350.5}, - {"category": 174.1, "value": 374.1}, - {"category": 182.9, "value": 382.9}, - {"category": 190.5, "value": 390.5}, - {"category": 198.1, "value": 398.1}, + {"category": 12.8, "value": 112.8}, + {"category": 48.2, "value": 148.2}, + {"category": 68.8, "value": 168.8}, + {"category": 92.2, "value": 192.2}, ], index=pd.MultiIndex.from_tuples( [ - (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.1), - (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.5), - (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.9), - (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.1), - (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.5), - (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.9), - (pd.Timestamp("2020-04-19 02:00:00+00:00"), 0.1), - (pd.Timestamp("2020-04-19 02:00:00+00:00"), 0.5), - (pd.Timestamp("2020-04-19 02:00:00+00:00"), 0.9), - (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.1), - (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.5), - (pd.Timestamp("2020-04-19 03:00:00+00:00"), 0.9), + (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.2), + (pd.Timestamp("2020-04-19 00:00:00+00:00"), 0.8), + (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.2), + (pd.Timestamp("2020-04-19 01:00:00+00:00"), 0.8), ], names=("timestamp", None), ),