diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index b84ff38b43ae7..a8df09d479f22 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -626,6 +626,18 @@ def test_setitem_iloc_two_dimensional_generator(self): expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) + def test_setitem_dtypes_bytes_type_to_object(self): + # GH 20734 + index = Series(name="id", dtype="S24") + df = DataFrame(index=index) + df["a"] = Series(name="a", index=index, dtype=np.uint32) + df["b"] = Series(name="b", index=index, dtype="S64") + df["c"] = Series(name="c", index=index, dtype="S64") + df["d"] = Series(name="d", index=index, dtype=np.uint8) + result = df.dtypes + expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) + tm.assert_series_equal(result, expected) + class TestSetitemTZAwareValues: @pytest.fixture diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index cf4127da79bf9..2007e60dbc5d0 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1146,3 +1146,35 @@ def test_apply_as_index_constant_lambda(as_index, expected): df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 1, 2, 2], "c": [1, 1, 1, 1]}) result = df.groupby(["a", "b"], as_index=as_index).apply(lambda x: 1) tm.assert_equal(result, expected) + + +def test_sort_index_groups(): + # GH 20420 + df = DataFrame( + {"A": [1, 2, 3, 4, 5], "B": [6, 7, 8, 9, 0], "C": [1, 1, 1, 2, 2]}, + index=range(5), + ) + result = df.groupby("C").apply(lambda x: x.A.sort_index()) + expected = Series( + range(1, 6), + index=MultiIndex.from_tuples( + [(1, 0), (1, 1), (1, 2), (2, 3), (2, 4)], names=["C", None] + ), + name="A", + ) + tm.assert_series_equal(result, expected) + + +def test_positional_slice_groups_datetimelike(): + # GH 21651 + expected = DataFrame( + { + "date": pd.date_range("2010-01-01", freq="12H", periods=5), + "vals": range(5), + "let": list("abcde"), + } + ) + result = expected.groupby([expected.let, expected.date.dt.date]).apply( + lambda x: x.iloc[0:] + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 1b74096cbfbdf..dfbf1a5b2cdc2 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -641,3 +641,25 @@ def test_nth_nan_in_grouper(dropna): ) tm.assert_frame_equal(result, expected) + + +def test_first_categorical_and_datetime_data_nat(): + # GH 20520 + df = DataFrame( + { + "group": ["first", "first", "second", "third", "third"], + "time": 5 * [np.datetime64("NaT")], + "categories": Series(["a", "b", "c", "a", "b"], dtype="category"), + } + ) + result = df.groupby("group").first() + expected = DataFrame( + { + "time": 3 * [np.datetime64("NaT")], + "categories": Series(["a", "c", "a"]).astype( + pd.CategoricalDtype(["a", "b", "c"]) + ), + } + ) + expected.index = Index(["first", "second", "third"], name="group") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index b5092f83e1a9f..dae5c7274ffc5 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1259,3 +1259,11 @@ def test_categorical_and_not_categorical_key(observed): tm.assert_series_equal(result, expected) expected_explicit = Series([4, 2, 4], name="B") tm.assert_series_equal(result, expected_explicit) + + +def test_string_rank_grouping(): + # GH 19354 + df = DataFrame({"A": [1, 1, 2], "B": [1, 2, 3]}) + result = df.groupby("A").transform("rank") + expected = DataFrame({"B": [1.0, 2.0, 1.0]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index f0018c8a82453..afcff6db5e3dd 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -866,3 +866,27 @@ def test_loc_get_scalar_casting_to_float(): result = df.loc[[(3, 4)], "b"].iloc[0] assert result == 2 assert isinstance(result, np.int64) + + +def test_loc_empty_single_selector_with_names(): + # GH 19517 + idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0]) + s2 = Series(index=idx, dtype=np.float64) + result = s2.loc["a"] + expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0)) + tm.assert_series_equal(result, expected) + + +def test_loc_keyerror_rightmost_key_missing(): + # GH 20951 + + df = DataFrame( + { + "A": [100, 100, 200, 200, 300, 300], + "B": [10, 10, 20, 21, 31, 33], + "C": range(6), + } + ) + df = df.set_index(["A", "B"]) + with pytest.raises(KeyError, match="^1$"): + df.loc[(100, 1)] diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 880fa6398d25a..aac26c13c2a7c 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -910,3 +910,26 @@ def test_none_comparison(series_with_simple_index): result = series < None assert not result.iat[0] assert not result.iat[1] + + +def test_series_varied_multiindex_alignment(): + # GH 20414 + s1 = Series( + range(8), + index=pd.MultiIndex.from_product( + [list("ab"), list("xy"), [1, 2]], names=["ab", "xy", "num"] + ), + ) + s2 = Series( + [1000 * i for i in range(1, 5)], + index=pd.MultiIndex.from_product([list("xy"), [1, 2]], names=["xy", "num"]), + ) + result = s1.loc[pd.IndexSlice["a", :, :]] + s2 + expected = Series( + [1000, 2001, 3002, 4003], + index=pd.MultiIndex.from_tuples( + [("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2)], + names=["ab", "xy", "num"], + ), + ) + tm.assert_series_equal(result, expected)