From ed06c2b0d0da41f9b2052d8e0390c2fc9b1f0ac4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 11:13:51 -0700 Subject: [PATCH 01/10] Add test for GH 14467 --- pandas/tests/frame/test_constructors.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 03376bdce26f8..6e9991ff17ac3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2681,3 +2681,14 @@ def test_from_out_of_bounds_timedelta(self, constructor, cls): result = constructor(scalar) assert type(get1(result)) is cls + + def test_nested_list_columns(self): + # GH 14467 + result = DataFrame( + [[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]] + ) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]), + ) + tm.assert_frame_equal(result, expected) From f8b4be184999c61e9152b7f9c594273a6cd0f1d9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 11:49:28 -0700 Subject: [PATCH 02/10] Add tests for GH 14564 --- pandas/tests/frame/methods/test_quantile.py | 57 +++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index dbb5cb357de47..925c071cb320a 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -4,6 +4,7 @@ import pandas as pd from pandas import ( DataFrame, + Index, Series, Timestamp, ) @@ -650,3 +651,59 @@ def test_quantile_ea_scalar(self, index, frame_or_series): assert result == expected else: tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected_data, expected_index, axis", + [ + ["float64", [], [], 1], + ["int64", [], [], 1], + ["float64", [np.nan, np.nan], ["a", "b"], 0], + ["int64", [np.nan, np.nan], ["a", "b"], 0], + ], + ) + def test_empty_numeric(self, dtype, expected_data, expected_index, axis): + # GH 14564 + df = DataFrame(columns=["a", "b"], dtype=dtype) + result = df.quantile(0.5, axis=axis) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype="float64" + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected_data, expected_index, axis, expected_dtype", + [ + ["datetime64[ns]", [], [], 1, "float64"], + ["datetime64[ns]", [pd.NaT, pd.NaT], ["a", "b"], 0, "datetime64[ns]"], + ], + ) + def test_empty_datelike( + self, dtype, expected_data, expected_index, axis, expected_dtype + ): + # GH 14564 + df = DataFrame(columns=["a", "b"], dtype=dtype) + result = df.quantile(0.5, axis=axis, numeric_only=False) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype=expected_dtype + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "expected_data, expected_index, axis", + [ + [[np.nan, np.nan], range(2), 1], + [[], [], 0], + ], + ) + def test_datelike_numeric_only(self, expected_data, expected_index, axis): + # GH 14564 + df = DataFrame( + { + "a": pd.to_datetime(["2010", "2011"]), + "b": [0, 5], + "c": pd.to_datetime(["2011", "2012"]), + } + ) + result = df[["a", "c"]].quantile(0.5, axis=axis) + expected = Series(expected_data, name=0.5, index=Index(expected_index)) + tm.assert_series_equal(result, expected) From ba6907dcb0a82bf3b633bafcf38501897c37a81a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 11:54:31 -0700 Subject: [PATCH 03/10] Add test for GH 14773 --- pandas/tests/frame/methods/test_diff.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 75d93ed2aafc6..037bcdbc5934c 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -285,3 +285,11 @@ def test_diff_readonly(self): result = df.diff() expected = DataFrame(np.array(df)).diff() tm.assert_frame_equal(result, expected) + + def test_diff_all_int_dtype(self, any_int_or_nullable_int_dtype): + # GH 14773 + df = DataFrame(range(5)) + df = df.astype(np.int8) + result = df.diff() + expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype="float32") + tm.assert_frame_equal(result, expected) From c7cef6966486a8ffc0f44c191f4d5ef2e793d07e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 12:01:15 -0700 Subject: [PATCH 04/10] Add test for GH 15036 --- pandas/tests/groupby/test_groupby.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 83aeb29ec53df..de3c3abc7b78e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2265,3 +2265,11 @@ def test_groupby_mean_duplicate_index(rand_series_with_duplicate_datetimeindex): result = dups.groupby(level=0).mean() expected = dups.groupby(dups.index).mean() tm.assert_series_equal(result, expected) + + +def test_groupby_all_nan_groups_drop(): + # GH 15036 + s = Series([1, 2, 3], [np.nan, np.nan, np.nan]) + result = s.groupby(s.index).sum() + expected = Series([], index=Index([], dtype=np.float64), dtype=np.int64) + tm.assert_series_equal(result, expected) From a4fd0a4e97acf631d6b135afbf5612c8d0ce0d7f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 12:08:08 -0700 Subject: [PATCH 05/10] Add test for GH 15106 --- pandas/tests/groupby/test_groupby.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index de3c3abc7b78e..d256b19dbb148 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2273,3 +2273,12 @@ def test_groupby_all_nan_groups_drop(): result = s.groupby(s.index).sum() expected = Series([], index=Index([], dtype=np.float64), dtype=np.int64) tm.assert_series_equal(result, expected) + + +def test_groupby_empty_multi_column(): + # GH 15106 + result = DataFrame(data=[], columns=["A", "B", "C"]).groupby(["A", "B"]).sum() + expected = DataFrame( + [], columns=["C"], index=MultiIndex([[], []], [[], []], names=["A", "B"]) + ) + tm.assert_frame_equal(result, expected) From 9217c436984a455379dd2565abc04dda7a261c9a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 12:15:39 -0700 Subject: [PATCH 06/10] Add test for GH 15239 --- pandas/tests/frame/test_stack_unstack.py | 33 ++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 365d8abcb6bac..9ecd677c0c6d8 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1999,6 +1999,39 @@ def test_stack_nan_in_multiindex_columns(self): ) tm.assert_frame_equal(result, expected) + def test_multi_level_stack_categorical(self): + # GH 15239 + midx = MultiIndex.from_arrays( + [ + ["A"] * 2 + ["B"] * 2, + pd.Categorical(list("abab")), + pd.Categorical(list("ccdd")), + ] + ) + df = DataFrame(np.arange(8).reshape(2, 4), columns=midx) + result = df.stack([1, 2]) + expected = DataFrame( + [ + [0, np.nan], + [np.nan, 2], + [1, np.nan], + [np.nan, 3], + [4, np.nan], + [np.nan, 6], + [5, np.nan], + [np.nan, 7], + ], + columns=["A", "B"], + index=MultiIndex.from_arrays( + [ + [0] * 4 + [1] * 4, + pd.Categorical(list("aabbaabb")), + pd.Categorical(list("cdcdcdcd")), + ] + ), + ) + tm.assert_frame_equal(result, expected) + def test_stack_nan_level(self): # GH 9406 df_nan = DataFrame( From ecd8c0e93f7cc994ea8c02e822d83458c4e0cb4e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 12:20:19 -0700 Subject: [PATCH 07/10] Add test for GH 15273 --- pandas/tests/io/json/test_pandas.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3cc77aa723fe9..0ffc6044a5897 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1750,3 +1750,23 @@ def test_readjson_bool_series(self): result = read_json("[true, true, false]", typ="series") expected = Series([True, True, False]) tm.assert_series_equal(result, expected) + + def test_to_json_multiindex_escape(self): + # GH 15273 + df = DataFrame( + True, + index=pd.date_range("2017-01-20", "2017-01-23"), + columns=["foo", "bar"], + ).stack() + result = df.to_json() + expected = ( + "{\"(Timestamp('2017-01-20 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-20 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-21 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-21 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-22 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-22 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-23 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}" + ) + assert result == expected From b427b5cfd5e977e068b5aa7bb9d5c8f583114d27 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 14:55:27 -0700 Subject: [PATCH 08/10] Add dtype to supress warning --- pandas/tests/frame/methods/test_quantile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 925c071cb320a..08368d58bb317 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -705,5 +705,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis): } ) result = df[["a", "c"]].quantile(0.5, axis=axis) - expected = Series(expected_data, name=0.5, index=Index(expected_index)) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 + ) tm.assert_series_equal(result, expected) From 239a2db32acb293f0a89adf5bc280d5e985a22d9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 15 May 2021 19:26:00 -0700 Subject: [PATCH 09/10] Ensure fixture is used --- pandas/tests/frame/methods/test_diff.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 037bcdbc5934c..0a3d2e1c9a8fc 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -286,10 +286,11 @@ def test_diff_readonly(self): expected = DataFrame(np.array(df)).diff() tm.assert_frame_equal(result, expected) - def test_diff_all_int_dtype(self, any_int_or_nullable_int_dtype): + def test_diff_all_int_dtype(self, any_int_dtype): # GH 14773 df = DataFrame(range(5)) - df = df.astype(np.int8) + df = df.astype(any_int_dtype) result = df.diff() - expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype="float32") + expected_dtype = "float32" if any_int_dtype in ("int8", "int16") else "float64" + expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype) tm.assert_frame_equal(result, expected) From aea4e324134f1374df2d9dbc3ac79b385b988601 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 18 May 2021 09:00:20 -0700 Subject: [PATCH 10/10] xfail incorrect return --- pandas/tests/frame/methods/test_quantile.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 08368d58bb317..aca061cdd197b 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -673,7 +673,14 @@ def test_empty_numeric(self, dtype, expected_data, expected_index, axis): @pytest.mark.parametrize( "dtype, expected_data, expected_index, axis, expected_dtype", [ - ["datetime64[ns]", [], [], 1, "float64"], + pytest.param( + "datetime64[ns]", + [], + [], + 1, + "datetime64[ns]", + marks=pytest.mark.xfail(reason="#GH 41544"), + ), ["datetime64[ns]", [pd.NaT, pd.NaT], ["a", "b"], 0, "datetime64[ns]"], ], )