diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index d19b59debfdea..86639065ba5c2 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -1,12 +1,39 @@ import numpy as np import pytest +import pytz import pandas as pd -from pandas import DataFrame, Index, Series +from pandas import DataFrame, Index, Series, date_range import pandas._testing as tm class TestDataFrameAlign: + def test_frame_align_aware(self): + idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") + idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") + df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) + df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) + new1, new2 = df1.align(df2) + assert df1.index.tz == new1.index.tz + assert df2.index.tz == new2.index.tz + + # different timezones convert to UTC + + # frame with frame + df1_central = df1.tz_convert("US/Central") + new1, new2 = df1.align(df1_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + # frame with Series + new1, new2 = df1.align(df1_central[0], axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + df1[0].align(df1_central, axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + def test_align_float(self, float_frame): af, bf = float_frame.align(float_frame) assert af._mgr is not float_frame._mgr diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/methods/test_reindex.py similarity index 92% rename from pandas/tests/frame/test_axis_select_reindex.py rename to pandas/tests/frame/methods/test_reindex.py index 12945533b17ae..99a3bbdf5ffe3 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas import Categorical, DataFrame, Index, MultiIndex, Series, date_range, isna +from pandas import Categorical, DataFrame, Index, Series, date_range, isna import pandas._testing as tm @@ -12,30 +12,6 @@ class TestDataFrameSelectReindex: # These are specific reindex-based tests; other indexing tests should go in # test_indexing - def test_merge_join_different_levels(self): - # GH 9455 - - # first dataframe - df1 = DataFrame(columns=["a", "b"], data=[[1, 11], [0, 22]]) - - # second dataframe - columns = MultiIndex.from_tuples([("a", ""), ("c", "c1")]) - df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]]) - - # merge - columns = ["a", "b", ("c", "c1")] - expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]]) - with tm.assert_produces_warning(UserWarning): - result = pd.merge(df1, df2, on="a") - tm.assert_frame_equal(result, expected) - - # join, see discussion in GH 12219 - columns = ["a", "b", ("a", ""), ("c", "c1")] - expected = DataFrame(columns=columns, data=[[1, 11, 0, 44], [0, 22, 1, 33]]) - with tm.assert_produces_warning(UserWarning): - result = df1.join(df2, on="a") - tm.assert_frame_equal(result, expected) - def test_reindex(self, float_frame): datetime_series = tm.makeTimeSeries(nper=30) @@ -382,20 +358,6 @@ def test_reindex_api_equivalence(self): for res in [res2, res3]: tm.assert_frame_equal(res1, res) - def test_align_int_fill_bug(self): - # GH #910 - X = np.arange(10 * 10, dtype="float64").reshape(10, 10) - Y = np.ones((10, 1), dtype=int) - - df1 = DataFrame(X) - df1["0.X"] = Y.squeeze() - - df2 = df1.astype(float) - - result = df1 - df1.mean() - expected = df2 - df2.mean() - tm.assert_frame_equal(result, expected) - def test_reindex_boolean(self): frame = DataFrame( np.ones((10, 2), dtype=bool), index=np.arange(0, 20, 2), columns=[0, 2] diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py new file mode 100644 index 0000000000000..cd6c5da8dd3a0 --- /dev/null +++ b/pandas/tests/frame/methods/test_values.py @@ -0,0 +1,53 @@ +import numpy as np + +from pandas import DataFrame, Timestamp, date_range +import pandas._testing as tm + + +class TestDataFrameValues: + def test_values_duplicates(self): + df = DataFrame( + [[1, 2, "a", "b"], [1, 2, "a", "b"]], columns=["one", "one", "two", "two"] + ) + + result = df.values + expected = np.array([[1, 2, "a", "b"], [1, 2, "a", "b"]], dtype=object) + + tm.assert_numpy_array_equal(result, expected) + + def test_frame_values_with_tz(self): + tz = "US/Central" + df = DataFrame({"A": date_range("2000", periods=4, tz=tz)}) + result = df.values + expected = np.array( + [ + [Timestamp("2000-01-01", tz=tz)], + [Timestamp("2000-01-02", tz=tz)], + [Timestamp("2000-01-03", tz=tz)], + [Timestamp("2000-01-04", tz=tz)], + ] + ) + tm.assert_numpy_array_equal(result, expected) + + # two columns, homogenous + + df["B"] = df["A"] + result = df.values + expected = np.concatenate([expected, expected], axis=1) + tm.assert_numpy_array_equal(result, expected) + + # three columns, heterogeneous + est = "US/Eastern" + df["C"] = df["A"].dt.tz_convert(est) + + new = np.array( + [ + [Timestamp("2000-01-01T01:00:00", tz=est)], + [Timestamp("2000-01-02T01:00:00", tz=est)], + [Timestamp("2000-01-03T01:00:00", tz=est)], + [Timestamp("2000-01-04T01:00:00", tz=est)], + ] + ) + expected = np.concatenate([expected, new], axis=1) + result = df.values + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 2c04473d50851..de56625209160 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1493,6 +1493,20 @@ def test_dunder_methods_binary(self, all_arithmetic_operators): with pytest.raises(TypeError, match="takes 2 positional arguments"): getattr(df, all_arithmetic_operators)(b, 0) + def test_align_int_fill_bug(self): + # GH#910 + X = np.arange(10 * 10, dtype="float64").reshape(10, 10) + Y = np.ones((10, 1), dtype=int) + + df1 = DataFrame(X) + df1["0.X"] = Y.squeeze() + + df2 = df1.astype(float) + + result = df1 - df1.mean() + expected = df2 - df2.mean() + tm.assert_frame_equal(result, expected) + def test_pow_with_realignment(): # GH#32685 pow has special semantics for operating with null values diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 4d6e675c6765f..07cd307c8cc54 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, period_range +from pandas import DataFrame, Index, MultiIndex, period_range import pandas._testing as tm @@ -292,3 +292,27 @@ def test_join_multiindex_leftright(self): tm.assert_frame_equal(df1.join(df2, how="right"), exp) tm.assert_frame_equal(df2.join(df1, how="left"), exp[["value2", "value1"]]) + + def test_merge_join_different_levels(self): + # GH#9455 + + # first dataframe + df1 = DataFrame(columns=["a", "b"], data=[[1, 11], [0, 22]]) + + # second dataframe + columns = MultiIndex.from_tuples([("a", ""), ("c", "c1")]) + df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]]) + + # merge + columns = ["a", "b", ("c", "c1")] + expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]]) + with tm.assert_produces_warning(UserWarning): + result = pd.merge(df1, df2, on="a") + tm.assert_frame_equal(result, expected) + + # join, see discussion in GH#12219 + columns = ["a", "b", ("a", ""), ("c", "c1")] + expected = DataFrame(columns=columns, data=[[1, 11, 0, 44], [0, 22, 1, 33]]) + with tm.assert_produces_warning(UserWarning): + result = df1.join(df2, on="a") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index a8b76f4d85f49..c5b923f9a0c1c 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -488,16 +488,6 @@ def test_columns_with_dups(self): xp.columns = ["A", "A", "B"] tm.assert_frame_equal(rs, xp) - def test_values_duplicates(self): - df = DataFrame( - [[1, 2, "a", "b"], [1, 2, "a", "b"]], columns=["one", "one", "two", "two"] - ) - - result = df.values - expected = np.array([[1, 2, "a", "b"], [1, 2, "a", "b"]], dtype=object) - - tm.assert_numpy_array_equal(result, expected) - def test_set_value_by_index(self): # See gh-12344 df = DataFrame(np.arange(9).reshape(3, 3).T) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index dfd4fb1855383..bb4e7a157f53e 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -3,7 +3,6 @@ """ import numpy as np import pytest -import pytz from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -14,43 +13,6 @@ class TestDataFrameTimezones: - def test_frame_values_with_tz(self): - tz = "US/Central" - df = DataFrame({"A": date_range("2000", periods=4, tz=tz)}) - result = df.values - expected = np.array( - [ - [pd.Timestamp("2000-01-01", tz=tz)], - [pd.Timestamp("2000-01-02", tz=tz)], - [pd.Timestamp("2000-01-03", tz=tz)], - [pd.Timestamp("2000-01-04", tz=tz)], - ] - ) - tm.assert_numpy_array_equal(result, expected) - - # two columns, homogenous - - df = df.assign(B=df.A) - result = df.values - expected = np.concatenate([expected, expected], axis=1) - tm.assert_numpy_array_equal(result, expected) - - # three columns, heterogeneous - est = "US/Eastern" - df = df.assign(C=df.A.dt.tz_convert(est)) - - new = np.array( - [ - [pd.Timestamp("2000-01-01T01:00:00", tz=est)], - [pd.Timestamp("2000-01-02T01:00:00", tz=est)], - [pd.Timestamp("2000-01-03T01:00:00", tz=est)], - [pd.Timestamp("2000-01-04T01:00:00", tz=est)], - ] - ) - expected = np.concatenate([expected, new], axis=1) - result = df.values - tm.assert_numpy_array_equal(result, expected) - def test_frame_join_tzaware(self): test1 = DataFrame( np.zeros((6, 3)), @@ -72,32 +34,6 @@ def test_frame_join_tzaware(self): tm.assert_index_equal(result.index, ex_index) assert result.index.tz.zone == "US/Central" - def test_frame_align_aware(self): - idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") - idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") - df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) - df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) - new1, new2 = df1.align(df2) - assert df1.index.tz == new1.index.tz - assert df2.index.tz == new2.index.tz - - # different timezones convert to UTC - - # frame with frame - df1_central = df1.tz_convert("US/Central") - new1, new2 = df1.align(df1_central) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC - - # frame with Series - new1, new2 = df1.align(df1_central[0], axis=0) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC - - df1[0].align(df1_central, axis=0) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) def test_frame_no_datetime64_dtype(self, tz): # after GH#7822