From c1289e9e63c0b831a5111b31d8c7ff539d9c8603 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 20 Nov 2021 17:36:33 +0000 Subject: [PATCH 01/29] DEPR: Series/DataFrame.append (#35407) --- doc/source/whatsnew/v1.4.0.rst | 55 +++++++++++ pandas/core/frame.py | 8 ++ pandas/core/series.py | 8 ++ pandas/tests/frame/methods/test_append.py | 109 +++++++++++++-------- pandas/tests/series/methods/test_append.py | 83 ++++++++++------ 5 files changed, 195 insertions(+), 68 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 8b51fe2db7641..00f2de1588cd6 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -438,6 +438,61 @@ when given numeric data, but in the future, a :class:`NumericIndex` will be retu Out [4]: NumericIndex([1, 2, 3], dtype='uint64') +.. _whatsnew_140.deprecations.frame_series_append: + +Deprecated Frame.append and Series.append +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.append` and :meth:`Series.append` have been deprecated and will be removed in Pandas 2.0. +Use :func:`pandas.core.reshape.concat` instead (:issue:`35407`). + +*Deprecated syntax* + +.. code-block:: ipython + + In [1]: pd.Series([1, 2]).append(pd.Series([3, 4]) + Out [1]: + :1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.core.reshape.concat instead. + 0 1 + 1 2 + 0 3 + 1 4 + dtype: int64 + + In [2]: df1 = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) + In [3]: df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) + In [4]: df1.append(df2) + Out [4]: + :1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.core.reshape.concat instead. + A B + 0 1 2 + 1 3 4 + 0 5 6 + 1 7 8 + +*Recommended syntax* + +.. code-block:: ipython + + In [1]: pd.core.reshape.concat.concat([pd.Series([1, 2]), pd.Series([3, 4])]) + Out [1]: + 0 1 + 1 2 + 0 3 + 1 4 + dtype: int64 + + In [2]: df1 = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) + In [3]: df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) + In [4]: pd.core.reshape.concat.concat([df1, df2]) + Out [4]: + A B + 0 1 2 + 1 3 4 + 0 5 6 + 1 7 8 + + .. _whatsnew_140.deprecations.other: Other Deprecations diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0960ab4a81149..1ca19f22607ac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9114,6 +9114,14 @@ def append( 3 3 4 4 """ + warnings.warn( + "The frame.append method is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.core.reshape.concat instead.", + FutureWarning, + stacklevel=2, + ) + combined_columns = None if isinstance(other, (Series, dict)): if isinstance(other, dict): diff --git a/pandas/core/series.py b/pandas/core/series.py index e0a63b8e35105..51f83fc7016ca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2888,6 +2888,14 @@ def append( ... ValueError: Indexes have overlapping values: [0, 1, 2] """ + warnings.warn( + "The frame.append method is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.core.reshape.concat instead.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.reshape.concat import concat if isinstance(to_append, (list, tuple)): diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index c29b247cc6e17..265c40218d595 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -21,19 +21,22 @@ def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_seri a = obj[:5] b = obj[5:] - result = a.append(b) + with tm.assert_produces_warning(FutureWarning): + result = a.append(b) tm.assert_equal(result, obj) def test_append_empty_list(self): # GH 28769 df = DataFrame() - result = df.append([]) + with tm.assert_produces_warning(FutureWarning): + result = df.append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - result = df.append([]) + with tm.assert_produces_warning(FutureWarning): + result = df.append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df # .append() should return a new object @@ -43,39 +46,49 @@ def test_append_series_dict(self): series = df.loc[4] msg = "Indexes have overlapping values" - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( + FutureWarning + ): df.append(series, verify_integrity=True) series.name = None msg = "Can only append a Series if ignore_index=True" - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( + FutureWarning + ): df.append(series, verify_integrity=True) - result = df.append(series[::-1], ignore_index=True) - expected = df.append( - DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True - ) + with tm.assert_produces_warning(FutureWarning): + result = df.append(series[::-1], ignore_index=True) + expected = df.append( + DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True + ) tm.assert_frame_equal(result, expected) # dict - result = df.append(series.to_dict(), ignore_index=True) + with tm.assert_produces_warning(FutureWarning): + result = df.append(series.to_dict(), ignore_index=True) tm.assert_frame_equal(result, expected) - result = df.append(series[::-1][:3], ignore_index=True) - expected = df.append( - DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True - ) + with tm.assert_produces_warning(FutureWarning): + result = df.append(series[::-1][:3], ignore_index=True) + expected = df.append( + DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True + ) tm.assert_frame_equal(result, expected.loc[:, result.columns]) msg = "Can only append a dict if ignore_index=True" - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( + FutureWarning + ): df.append(series.to_dict()) # can append when name set row = df.loc[4] row.name = 5 - result = df.append(row) - expected = df.append(df[-1:], ignore_index=True) + with tm.assert_produces_warning(FutureWarning): + result = df.append(row) + expected = df.append(df[-1:], ignore_index=True) tm.assert_frame_equal(result, expected) def test_append_list_of_series_dicts(self): @@ -83,8 +96,9 @@ def test_append_list_of_series_dicts(self): dicts = [x.to_dict() for idx, x in df.iterrows()] - result = df.append(dicts, ignore_index=True) - expected = df.append(df, ignore_index=True) + with tm.assert_produces_warning(FutureWarning): + result = df.append(dicts, ignore_index=True) + expected = df.append(df, ignore_index=True) tm.assert_frame_equal(result, expected) # different columns @@ -92,8 +106,9 @@ def test_append_list_of_series_dicts(self): {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, ] - result = df.append(dicts, ignore_index=True, sort=True) - expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + with tm.assert_produces_warning(FutureWarning): + result = df.append(dicts, ignore_index=True, sort=True) + expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_list_retain_index_name(self): @@ -109,11 +124,13 @@ def test_append_list_retain_index_name(self): ) # append series - result = df.append(serc) + with tm.assert_produces_warning(FutureWarning): + result = df.append(serc) tm.assert_frame_equal(result, expected) # append list of series - result = df.append([serc]) + with tm.assert_produces_warning(FutureWarning): + result = df.append([serc]) tm.assert_frame_equal(result, expected) def test_append_missing_cols(self): @@ -124,10 +141,10 @@ def test_append_missing_cols(self): df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) dicts = [{"foo": 9}, {"bar": 10}] - with tm.assert_produces_warning(None): + with tm.assert_produces_warning(FutureWarning): result = df.append(dicts, ignore_index=True, sort=True) - expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_empty_dataframe(self): @@ -135,28 +152,32 @@ def test_append_empty_dataframe(self): # Empty df append empty df df1 = DataFrame() df2 = DataFrame() - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-empty df append empty df df1 = DataFrame(np.random.randn(5, 2)) df2 = DataFrame() - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Empty df with columns append empty df df1 = DataFrame(columns=["bar", "foo"]) df2 = DataFrame() - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-Empty df with columns append empty df df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) df2 = DataFrame() - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) @@ -168,19 +189,22 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) df2 = DataFrame() - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} ) @@ -189,7 +213,8 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} ) @@ -198,7 +223,8 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": np.nan}, index=range(1)) df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = DataFrame( {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} ) @@ -207,7 +233,8 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) - result = df1.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = df1.append(df2) expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) tm.assert_frame_equal(result, expected) @@ -218,7 +245,8 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): # GH 30238 tz = tz_naive_fixture df = DataFrame([Timestamp(timestamp, tz=tz)]) - result = df.append(df.iloc[0]).iloc[-1] + with tm.assert_produces_warning(FutureWarning): + result = df.append(df.iloc[0]).iloc[-1] expected = Series(Timestamp(timestamp, tz=tz), name=0) tm.assert_series_equal(result, expected) @@ -234,7 +262,8 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): ) def test_other_dtypes(self, data, dtype): df = DataFrame(data, dtype=dtype) - result = df.append(df.iloc[0]).iloc[-1] + with tm.assert_produces_warning(FutureWarning): + result = df.append(df.iloc[0]).iloc[-1] expected = Series(data, name=0, dtype=dtype) tm.assert_series_equal(result, expected) @@ -249,7 +278,8 @@ def test_append_numpy_bug_1681(self, dtype): df = DataFrame() other = DataFrame({"A": "foo", "B": index}, index=index) - result = df.append(other) + with tm.assert_produces_warning(FutureWarning): + result = df.append(other) assert (result["B"] == index).all() @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") @@ -264,7 +294,8 @@ def test_multiindex_column_append_multiple(self): df2 = df.copy() for i in range(1, 10): df[i, "colA"] = 10 - df = df.append(df2, ignore_index=True) + with tm.assert_produces_warning(FutureWarning): + df = df.append(df2, ignore_index=True) result = df["multi"] expected = DataFrame( {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index 2081e244b4e6c..70ef982923a76 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -15,11 +15,13 @@ class TestSeriesAppend: def test_append_preserve_name(self, datetime_series): - result = datetime_series[:5].append(datetime_series[5:]) + with tm.assert_produces_warning(FutureWarning): + result = datetime_series[:5].append(datetime_series[5:]) assert result.name == datetime_series.name def test_append(self, datetime_series, string_series, object_series): - appended_series = string_series.append(object_series) + with tm.assert_produces_warning(FutureWarning): + appended_series = string_series.append(object_series) for idx, value in appended_series.items(): if idx in string_series.index: assert value == string_series[idx] @@ -29,13 +31,16 @@ def test_append(self, datetime_series, string_series, object_series): raise AssertionError("orphaned index!") msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( + FutureWarning + ): datetime_series.append(datetime_series, verify_integrity=True) def test_append_many(self, datetime_series): pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] - result = pieces[0].append(pieces[1:]) + with tm.assert_produces_warning(FutureWarning): + result = pieces[0].append(pieces[1:]) tm.assert_series_equal(result, datetime_series) def test_append_duplicates(self): @@ -43,20 +48,24 @@ def test_append_duplicates(self): s1 = Series([1, 2, 3]) s2 = Series([4, 5, 6]) exp = Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) - tm.assert_series_equal(s1.append(s2), exp) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal(s1.append(s2), exp) tm.assert_series_equal(pd.concat([s1, s2]), exp) # the result must have RangeIndex exp = Series([1, 2, 3, 4, 5, 6]) - tm.assert_series_equal( - s1.append(s2, ignore_index=True), exp, check_index_type=True - ) + with tm.assert_produces_warning(FutureWarning): + tm.assert_series_equal( + s1.append(s2, ignore_index=True), exp, check_index_type=True + ) tm.assert_series_equal( pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True ) msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( + FutureWarning + ): s1.append(s2, verify_integrity=True) with pytest.raises(ValueError, match=msg): pd.concat([s1, s2], verify_integrity=True) @@ -67,8 +76,9 @@ def test_append_tuples(self): list_input = [s, s] tuple_input = (s, s) - expected = s.append(list_input) - result = s.append(tuple_input) + with tm.assert_produces_warning(FutureWarning): + expected = s.append(list_input) + result = s.append(tuple_input) tm.assert_series_equal(expected, result) @@ -77,9 +87,13 @@ def test_append_dataframe_raises(self): df = DataFrame({"A": [1, 2], "B": [3, 4]}) msg = "to_append should be a Series or list/tuple of Series, got DataFrame" - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( + FutureWarning + ): df.A.append(df) - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( + FutureWarning + ): df.A.append([df]) @@ -89,8 +103,9 @@ def test_append(self): ts = Series(np.random.randn(len(rng)), rng) df = DataFrame(np.random.randn(len(rng), 4), index=rng) - result = ts.append(ts) - result_df = df.append(df) + with tm.assert_produces_warning(FutureWarning): + result = ts.append(ts) + result_df = df.append(df) ex_index = DatetimeIndex(np.tile(rng.values, 2)) tm.assert_index_equal(result.index, ex_index) tm.assert_index_equal(result_df.index, ex_index) @@ -107,6 +122,7 @@ def test_append(self): rng2 = rng.copy() rng1.name = "foo" rng2.name = "bar" + assert rng1.append(rng1).name == "foo" assert rng1.append(rng2).name is None @@ -120,8 +136,9 @@ def test_append_tz(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - result = ts.append(ts2) - result_df = df.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = ts.append(ts2) + result_df = df.append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -146,8 +163,9 @@ def test_append_tz_explicit_pytz(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - result = ts.append(ts2) - result_df = df.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = ts.append(ts2) + result_df = df.append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -170,8 +188,9 @@ def test_append_tz_dateutil(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - result = ts.append(ts2) - result_df = df.append(df2) + with tm.assert_produces_warning(FutureWarning): + result = ts.append(ts2) + result_df = df.append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -183,7 +202,8 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) + with tm.assert_produces_warning(FutureWarning): + ts_result = ser1.append(ser2) exp_index = DatetimeIndex( ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern", freq="H" @@ -196,7 +216,8 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) + with tm.assert_produces_warning(FutureWarning): + ts_result = ser1.append(ser2) exp_index = DatetimeIndex( ["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC", freq="H" @@ -212,7 +233,8 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) + with tm.assert_produces_warning(FutureWarning): + ts_result = ser1.append(ser2) exp_index = Index( [ Timestamp("1/1/2011 01:00", tz="US/Eastern"), @@ -227,9 +249,10 @@ def test_series_append_aware_naive(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") ser1 = Series(np.random.randn(len(rng1)), index=rng1) ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1.append(ser2) + with tm.assert_produces_warning(FutureWarning): + ts_result = ser1.append(ser2) - expected = ser1.index.astype(object).append(ser2.index.astype(object)) + expected = ser1.index.astype(object).append(ser2.index.astype(object)) assert ts_result.index.equals(expected) # mixed @@ -237,9 +260,10 @@ def test_series_append_aware_naive(self): rng2 = range(100) ser1 = Series(np.random.randn(len(rng1)), index=rng1) ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1.append(ser2) + with tm.assert_produces_warning(FutureWarning): + ts_result = ser1.append(ser2) - expected = ser1.index.astype(object).append(ser2.index) + expected = ser1.index.astype(object).append(ser2.index) assert ts_result.index.equals(expected) def test_series_append_dst(self): @@ -247,7 +271,8 @@ def test_series_append_dst(self): rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") ser1 = Series([1, 2, 3], index=rng1) ser2 = Series([10, 11, 12], index=rng2) - ts_result = ser1.append(ser2) + with tm.assert_produces_warning(FutureWarning): + ts_result = ser1.append(ser2) exp_index = DatetimeIndex( [ From 7d96d25249ab89967eff05cd90cdc74a855a3caf Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 20 Nov 2021 19:11:09 +0000 Subject: [PATCH 02/29] Refer to pandas.concat instead of pandas.core.reshape.concat --- doc/source/whatsnew/v1.4.0.rst | 6 +++--- pandas/core/frame.py | 4 ++-- pandas/core/series.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 00f2de1588cd6..269d2c4e56a42 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -463,7 +463,7 @@ Use :func:`pandas.core.reshape.concat` instead (:issue:`35407`). In [3]: df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) In [4]: df1.append(df2) Out [4]: - :1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.core.reshape.concat instead. + :1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. A B 0 1 2 1 3 4 @@ -474,7 +474,7 @@ Use :func:`pandas.core.reshape.concat` instead (:issue:`35407`). .. code-block:: ipython - In [1]: pd.core.reshape.concat.concat([pd.Series([1, 2]), pd.Series([3, 4])]) + In [1]: pd.concat([pd.Series([1, 2]), pd.Series([3, 4])]) Out [1]: 0 1 1 2 @@ -484,7 +484,7 @@ Use :func:`pandas.core.reshape.concat` instead (:issue:`35407`). In [2]: df1 = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) In [3]: df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) - In [4]: pd.core.reshape.concat.concat([df1, df2]) + In [4]: pd.concat([df1, df2]) Out [4]: A B 0 1 2 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1ca19f22607ac..8954ef4c47a3f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9117,9 +9117,9 @@ def append( warnings.warn( "The frame.append method is deprecated " "and will be removed from pandas in a future version. " - "Use pandas.core.reshape.concat instead.", + "Use pandas.concat instead.", FutureWarning, - stacklevel=2, + stacklevel=find_stack_level(), ) combined_columns = None diff --git a/pandas/core/series.py b/pandas/core/series.py index 51f83fc7016ca..5e46915e093ad 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2891,9 +2891,9 @@ def append( warnings.warn( "The frame.append method is deprecated " "and will be removed from pandas in a future version. " - "Use pandas.core.reshape.concat instead.", + "Use pandas.concat instead.", FutureWarning, - stacklevel=2, + stacklevel=find_stack_level(), ) from pandas.core.reshape.concat import concat From 963d151c9f7e6022369f909796ef0fe0148be72e Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 20 Nov 2021 19:46:14 +0000 Subject: [PATCH 03/29] Add explicit test for warning, ignore in the remainder of the tests --- pandas/tests/frame/methods/test_append.py | 117 ++++++++------------- pandas/tests/series/methods/test_append.py | 88 ++++++---------- 2 files changed, 81 insertions(+), 124 deletions(-) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index 265c40218d595..9e9d0d305a842 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -12,6 +12,7 @@ import pandas._testing as tm +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestDataFrameAppend: def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): obj = multiindex_dataframe_random_data @@ -21,22 +22,19 @@ def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_seri a = obj[:5] b = obj[5:] - with tm.assert_produces_warning(FutureWarning): - result = a.append(b) + result = a.append(b) tm.assert_equal(result, obj) def test_append_empty_list(self): # GH 28769 df = DataFrame() - with tm.assert_produces_warning(FutureWarning): - result = df.append([]) + result = df.append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - with tm.assert_produces_warning(FutureWarning): - result = df.append([]) + result = df.append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df # .append() should return a new object @@ -46,49 +44,39 @@ def test_append_series_dict(self): series = df.loc[4] msg = "Indexes have overlapping values" - with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( - FutureWarning - ): + with pytest.raises(ValueError, match=msg): df.append(series, verify_integrity=True) series.name = None msg = "Can only append a Series if ignore_index=True" - with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( - FutureWarning - ): + with pytest.raises(TypeError, match=msg): df.append(series, verify_integrity=True) - with tm.assert_produces_warning(FutureWarning): - result = df.append(series[::-1], ignore_index=True) - expected = df.append( - DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True - ) + result = df.append(series[::-1], ignore_index=True) + expected = df.append( + DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True + ) tm.assert_frame_equal(result, expected) # dict - with tm.assert_produces_warning(FutureWarning): - result = df.append(series.to_dict(), ignore_index=True) + result = df.append(series.to_dict(), ignore_index=True) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - result = df.append(series[::-1][:3], ignore_index=True) - expected = df.append( - DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True - ) + result = df.append(series[::-1][:3], ignore_index=True) + expected = df.append( + DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True + ) tm.assert_frame_equal(result, expected.loc[:, result.columns]) msg = "Can only append a dict if ignore_index=True" - with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( - FutureWarning - ): + with pytest.raises(TypeError, match=msg): df.append(series.to_dict()) # can append when name set row = df.loc[4] row.name = 5 - with tm.assert_produces_warning(FutureWarning): - result = df.append(row) - expected = df.append(df[-1:], ignore_index=True) + result = df.append(row) + expected = df.append(df[-1:], ignore_index=True) tm.assert_frame_equal(result, expected) def test_append_list_of_series_dicts(self): @@ -96,9 +84,8 @@ def test_append_list_of_series_dicts(self): dicts = [x.to_dict() for idx, x in df.iterrows()] - with tm.assert_produces_warning(FutureWarning): - result = df.append(dicts, ignore_index=True) - expected = df.append(df, ignore_index=True) + result = df.append(dicts, ignore_index=True) + expected = df.append(df, ignore_index=True) tm.assert_frame_equal(result, expected) # different columns @@ -106,9 +93,8 @@ def test_append_list_of_series_dicts(self): {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, ] - with tm.assert_produces_warning(FutureWarning): - result = df.append(dicts, ignore_index=True, sort=True) - expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + result = df.append(dicts, ignore_index=True, sort=True) + expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_list_retain_index_name(self): @@ -124,13 +110,11 @@ def test_append_list_retain_index_name(self): ) # append series - with tm.assert_produces_warning(FutureWarning): - result = df.append(serc) + result = df.append(serc) tm.assert_frame_equal(result, expected) # append list of series - with tm.assert_produces_warning(FutureWarning): - result = df.append([serc]) + result = df.append([serc]) tm.assert_frame_equal(result, expected) def test_append_missing_cols(self): @@ -141,10 +125,9 @@ def test_append_missing_cols(self): df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) dicts = [{"foo": 9}, {"bar": 10}] - with tm.assert_produces_warning(FutureWarning): - result = df.append(dicts, ignore_index=True, sort=True) + result = df.append(dicts, ignore_index=True, sort=True) - expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_empty_dataframe(self): @@ -152,32 +135,28 @@ def test_append_empty_dataframe(self): # Empty df append empty df df1 = DataFrame() df2 = DataFrame() - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-empty df append empty df df1 = DataFrame(np.random.randn(5, 2)) df2 = DataFrame() - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Empty df with columns append empty df df1 = DataFrame(columns=["bar", "foo"]) df2 = DataFrame() - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-Empty df with columns append empty df df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) df2 = DataFrame() - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) @@ -189,22 +168,19 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) df2 = DataFrame() - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} ) @@ -213,8 +189,7 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} ) @@ -223,8 +198,7 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": np.nan}, index=range(1)) df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = DataFrame( {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} ) @@ -233,8 +207,7 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2) + result = df1.append(df2) expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) tm.assert_frame_equal(result, expected) @@ -245,8 +218,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): # GH 30238 tz = tz_naive_fixture df = DataFrame([Timestamp(timestamp, tz=tz)]) - with tm.assert_produces_warning(FutureWarning): - result = df.append(df.iloc[0]).iloc[-1] + result = df.append(df.iloc[0]).iloc[-1] expected = Series(Timestamp(timestamp, tz=tz), name=0) tm.assert_series_equal(result, expected) @@ -262,8 +234,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): ) def test_other_dtypes(self, data, dtype): df = DataFrame(data, dtype=dtype) - with tm.assert_produces_warning(FutureWarning): - result = df.append(df.iloc[0]).iloc[-1] + result = df.append(df.iloc[0]).iloc[-1] expected = Series(data, name=0, dtype=dtype) tm.assert_series_equal(result, expected) @@ -278,8 +249,7 @@ def test_append_numpy_bug_1681(self, dtype): df = DataFrame() other = DataFrame({"A": "foo", "B": index}, index=index) - with tm.assert_produces_warning(FutureWarning): - result = df.append(other) + result = df.append(other) assert (result["B"] == index).all() @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") @@ -294,10 +264,15 @@ def test_multiindex_column_append_multiple(self): df2 = df.copy() for i in range(1, 10): df[i, "colA"] = 10 - with tm.assert_produces_warning(FutureWarning): - df = df.append(df2, ignore_index=True) + df = df.append(df2, ignore_index=True) result = df["multi"] expected = DataFrame( {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} ) tm.assert_frame_equal(result, expected) + + def test_append_raises_future_warning(self): + df1 = DataFrame([[1, 2], [3, 4]]) + df2 = DataFrame([[5, 6], [7, 8]]) + with tm.assert_produces_warning(FutureWarning): + df1.append(df2) diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index 70ef982923a76..df86d8b1f8f14 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -13,15 +13,14 @@ import pandas._testing as tm +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestSeriesAppend: def test_append_preserve_name(self, datetime_series): - with tm.assert_produces_warning(FutureWarning): - result = datetime_series[:5].append(datetime_series[5:]) + result = datetime_series[:5].append(datetime_series[5:]) assert result.name == datetime_series.name def test_append(self, datetime_series, string_series, object_series): - with tm.assert_produces_warning(FutureWarning): - appended_series = string_series.append(object_series) + appended_series = string_series.append(object_series) for idx, value in appended_series.items(): if idx in string_series.index: assert value == string_series[idx] @@ -31,16 +30,13 @@ def test_append(self, datetime_series, string_series, object_series): raise AssertionError("orphaned index!") msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( - FutureWarning - ): + with pytest.raises(ValueError, match=msg): datetime_series.append(datetime_series, verify_integrity=True) def test_append_many(self, datetime_series): pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] - with tm.assert_produces_warning(FutureWarning): - result = pieces[0].append(pieces[1:]) + result = pieces[0].append(pieces[1:]) tm.assert_series_equal(result, datetime_series) def test_append_duplicates(self): @@ -48,24 +44,20 @@ def test_append_duplicates(self): s1 = Series([1, 2, 3]) s2 = Series([4, 5, 6]) exp = Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(s1.append(s2), exp) + tm.assert_series_equal(s1.append(s2), exp) tm.assert_series_equal(pd.concat([s1, s2]), exp) # the result must have RangeIndex exp = Series([1, 2, 3, 4, 5, 6]) - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal( - s1.append(s2, ignore_index=True), exp, check_index_type=True - ) + tm.assert_series_equal( + s1.append(s2, ignore_index=True), exp, check_index_type=True + ) tm.assert_series_equal( pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True ) msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg), tm.assert_produces_warning( - FutureWarning - ): + with pytest.raises(ValueError, match=msg): s1.append(s2, verify_integrity=True) with pytest.raises(ValueError, match=msg): pd.concat([s1, s2], verify_integrity=True) @@ -76,9 +68,8 @@ def test_append_tuples(self): list_input = [s, s] tuple_input = (s, s) - with tm.assert_produces_warning(FutureWarning): - expected = s.append(list_input) - result = s.append(tuple_input) + expected = s.append(list_input) + result = s.append(tuple_input) tm.assert_series_equal(expected, result) @@ -87,25 +78,25 @@ def test_append_dataframe_raises(self): df = DataFrame({"A": [1, 2], "B": [3, 4]}) msg = "to_append should be a Series or list/tuple of Series, got DataFrame" - with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( - FutureWarning - ): + with pytest.raises(TypeError, match=msg): df.A.append(df) - with pytest.raises(TypeError, match=msg), tm.assert_produces_warning( - FutureWarning - ): + with pytest.raises(TypeError, match=msg): df.A.append([df]) + def test_append_raises_future_warning(self): + with tm.assert_produces_warning(FutureWarning): + Series([1, 2]).append(Series([3, 4])) + +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestSeriesAppendWithDatetimeIndex: def test_append(self): rng = date_range("5/8/2012 1:45", periods=10, freq="5T") ts = Series(np.random.randn(len(rng)), rng) df = DataFrame(np.random.randn(len(rng), 4), index=rng) - with tm.assert_produces_warning(FutureWarning): - result = ts.append(ts) - result_df = df.append(df) + result = ts.append(ts) + result_df = df.append(df) ex_index = DatetimeIndex(np.tile(rng.values, 2)) tm.assert_index_equal(result.index, ex_index) tm.assert_index_equal(result_df.index, ex_index) @@ -136,9 +127,8 @@ def test_append_tz(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - with tm.assert_produces_warning(FutureWarning): - result = ts.append(ts2) - result_df = df.append(df2) + result = ts.append(ts2) + result_df = df.append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -163,9 +153,8 @@ def test_append_tz_explicit_pytz(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - with tm.assert_produces_warning(FutureWarning): - result = ts.append(ts2) - result_df = df.append(df2) + result = ts.append(ts2) + result_df = df.append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -188,9 +177,8 @@ def test_append_tz_dateutil(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - with tm.assert_produces_warning(FutureWarning): - result = ts.append(ts2) - result_df = df.append(df2) + result = ts.append(ts2) + result_df = df.append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -202,8 +190,7 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - with tm.assert_produces_warning(FutureWarning): - ts_result = ser1.append(ser2) + ts_result = ser1.append(ser2) exp_index = DatetimeIndex( ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern", freq="H" @@ -216,8 +203,7 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - with tm.assert_produces_warning(FutureWarning): - ts_result = ser1.append(ser2) + ts_result = ser1.append(ser2) exp_index = DatetimeIndex( ["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC", freq="H" @@ -233,8 +219,7 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - with tm.assert_produces_warning(FutureWarning): - ts_result = ser1.append(ser2) + ts_result = ser1.append(ser2) exp_index = Index( [ Timestamp("1/1/2011 01:00", tz="US/Eastern"), @@ -249,10 +234,9 @@ def test_series_append_aware_naive(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") ser1 = Series(np.random.randn(len(rng1)), index=rng1) ser2 = Series(np.random.randn(len(rng2)), index=rng2) - with tm.assert_produces_warning(FutureWarning): - ts_result = ser1.append(ser2) + ts_result = ser1.append(ser2) - expected = ser1.index.astype(object).append(ser2.index.astype(object)) + expected = ser1.index.astype(object).append(ser2.index.astype(object)) assert ts_result.index.equals(expected) # mixed @@ -260,10 +244,9 @@ def test_series_append_aware_naive(self): rng2 = range(100) ser1 = Series(np.random.randn(len(rng1)), index=rng1) ser2 = Series(np.random.randn(len(rng2)), index=rng2) - with tm.assert_produces_warning(FutureWarning): - ts_result = ser1.append(ser2) + ts_result = ser1.append(ser2) - expected = ser1.index.astype(object).append(ser2.index) + expected = ser1.index.astype(object).append(ser2.index) assert ts_result.index.equals(expected) def test_series_append_dst(self): @@ -271,8 +254,7 @@ def test_series_append_dst(self): rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") ser1 = Series([1, 2, 3], index=rng1) ser2 = Series([10, 11, 12], index=rng2) - with tm.assert_produces_warning(FutureWarning): - ts_result = ser1.append(ser2) + ts_result = ser1.append(ser2) exp_index = DatetimeIndex( [ From 2b818bba8465622858560ccf6834f56d86d6199e Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 20 Nov 2021 19:50:37 +0000 Subject: [PATCH 04/29] Refer to concat in root namespace --- doc/source/whatsnew/v1.4.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 269d2c4e56a42..453545aed6339 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -444,7 +444,7 @@ Deprecated Frame.append and Series.append ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :meth:`DataFrame.append` and :meth:`Series.append` have been deprecated and will be removed in Pandas 2.0. -Use :func:`pandas.core.reshape.concat` instead (:issue:`35407`). +Use :func:`pandas.concat` instead (:issue:`35407`). *Deprecated syntax* @@ -452,7 +452,7 @@ Use :func:`pandas.core.reshape.concat` instead (:issue:`35407`). In [1]: pd.Series([1, 2]).append(pd.Series([3, 4]) Out [1]: - :1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.core.reshape.concat instead. + :1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. 0 1 1 2 0 3 From 153a7840e17a68e691eafcd351e51321e0581db0 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 20 Nov 2021 20:50:39 +0000 Subject: [PATCH 05/29] Fix copy-paste errors --- doc/source/whatsnew/v1.4.0.rst | 4 ++-- pandas/core/series.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 453545aed6339..35e8de774c6f3 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -452,7 +452,7 @@ Use :func:`pandas.concat` instead (:issue:`35407`). In [1]: pd.Series([1, 2]).append(pd.Series([3, 4]) Out [1]: - :1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. + :1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. 0 1 1 2 0 3 @@ -463,7 +463,7 @@ Use :func:`pandas.concat` instead (:issue:`35407`). In [3]: df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) In [4]: df1.append(df2) Out [4]: - :1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. + :1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. A B 0 1 2 1 3 4 diff --git a/pandas/core/series.py b/pandas/core/series.py index 5e46915e093ad..6adf3bc71ff24 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2889,7 +2889,7 @@ def append( ValueError: Indexes have overlapping values: [0, 1, 2] """ warnings.warn( - "The frame.append method is deprecated " + "The series.append method is deprecated " "and will be removed from pandas in a future version. " "Use pandas.concat instead.", FutureWarning, From 98d0f9c3657f62cc66cc240e6386da43d153b1e2 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sun, 21 Nov 2021 13:10:48 +0000 Subject: [PATCH 06/29] Ignore warning in reshape/concat test_append and add issue reference --- pandas/tests/frame/methods/test_append.py | 1 + pandas/tests/reshape/concat/test_append.py | 3 ++- pandas/tests/series/methods/test_append.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index 9e9d0d305a842..6117c02c3f653 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -272,6 +272,7 @@ def test_multiindex_column_append_multiple(self): tm.assert_frame_equal(result, expected) def test_append_raises_future_warning(self): + # GH#35407 df1 = DataFrame([[1, 2], [3, 4]]) df2 = DataFrame([[5, 6], [7, 8]]) with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 061afb3a7e0f5..1df598f0830e4 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -109,7 +109,8 @@ def test_append_sorts(self, sort): df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3]) - with tm.assert_produces_warning(None): + # GH#35407 + with tm.assert_produces_warning(FutureWarning): result = df1.append(df2, sort=sort) # for None / True diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index df86d8b1f8f14..7cee9ed9518e8 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -84,6 +84,7 @@ def test_append_dataframe_raises(self): df.A.append([df]) def test_append_raises_future_warning(self): + # GH#35407 with tm.assert_produces_warning(FutureWarning): Series([1, 2]).append(Series([3, 4])) From 10ef0b679f993aed15361f60fdc6575a356a0d26 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sun, 21 Nov 2021 14:14:19 +0000 Subject: [PATCH 07/29] Ignore warnings in reshape test_crosstab --- pandas/tests/reshape/test_crosstab.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 74beda01e4b8a..3b5422269da51 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -14,6 +14,7 @@ import pandas._testing as tm +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestCrosstab: def setup_method(self, method): df = DataFrame( @@ -796,6 +797,7 @@ def test_margin_normalize_multiple_columns(self): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize("a_dtype", ["category", "int64"]) @pytest.mark.parametrize("b_dtype", ["category", "int64"]) def test_categoricals(a_dtype, b_dtype): From becc29f57693ab2d9f441c85512420917b8aebd0 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Mon, 29 Nov 2021 21:58:19 +0000 Subject: [PATCH 08/29] Replace further appends with concat --- .../comparison_with_spreadsheets.rst | 7 +- doc/source/user_guide/cookbook.rst | 4 +- doc/source/user_guide/merging.rst | 105 ------------------ doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/frame.py | 19 +++- pandas/core/indexing.py | 9 +- pandas/core/reshape/pivot.py | 6 +- 7 files changed, 33 insertions(+), 119 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_spreadsheets.rst b/doc/source/getting_started/comparison/comparison_with_spreadsheets.rst index bdd0f7d8cfddf..19999be9b461f 100644 --- a/doc/source/getting_started/comparison/comparison_with_spreadsheets.rst +++ b/doc/source/getting_started/comparison/comparison_with_spreadsheets.rst @@ -435,13 +435,14 @@ The equivalent in pandas: Adding a row ~~~~~~~~~~~~ -Assuming we are using a :class:`~pandas.RangeIndex` (numbered ``0``, ``1``, etc.), we can use :meth:`DataFrame.append` to add a row to the bottom of a ``DataFrame``. +Assuming we are using a :class:`~pandas.RangeIndex` (numbered ``0``, ``1``, etc.), we can use :func:`concat` to add a row to the bottom of a ``DataFrame``. .. ipython:: python df - new_row = {"class": "E", "student_count": 51, "all_pass": True} - df.append(new_row, ignore_index=True) + new_row = pd.DataFrame([["E", 51, True]], + columns=["class", "student_count", "all_pass"]) + pd.concat([df, new_row]) Find and Replace diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 03221e71ea32a..468790d1466cf 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -931,7 +931,7 @@ Merge The :ref:`Concat ` docs. The :ref:`Join ` docs. -`Append two dataframes with overlapping index (emulate R rbind) +`Concatenate two dataframes with overlapping index (emulate R rbind) `__ .. ipython:: python @@ -944,7 +944,7 @@ Depending on df construction, ``ignore_index`` may be needed .. ipython:: python - df = df1.append(df2, ignore_index=True) + df = pd.concat([df1, df2], ignore_index=True) df `Self Join of a DataFrame diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index cee12c6939b25..83c68cfc1c29f 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -237,59 +237,6 @@ Similarly, we could index before the concatenation: p.plot([df1, df4], result, labels=["df1", "df4"], vertical=False); plt.close("all"); -.. _merging.concatenation: - -Concatenating using ``append`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A useful shortcut to :func:`~pandas.concat` are the :meth:`~DataFrame.append` -instance methods on ``Series`` and ``DataFrame``. These methods actually predated -``concat``. They concatenate along ``axis=0``, namely the index: - -.. ipython:: python - - result = df1.append(df2) - -.. ipython:: python - :suppress: - - @savefig merging_append1.png - p.plot([df1, df2], result, labels=["df1", "df2"], vertical=True); - plt.close("all"); - -In the case of ``DataFrame``, the indexes must be disjoint but the columns do not -need to be: - -.. ipython:: python - - result = df1.append(df4, sort=False) - -.. ipython:: python - :suppress: - - @savefig merging_append2.png - p.plot([df1, df4], result, labels=["df1", "df4"], vertical=True); - plt.close("all"); - -``append`` may take multiple objects to concatenate: - -.. ipython:: python - - result = df1.append([df2, df3]) - -.. ipython:: python - :suppress: - - @savefig merging_append3.png - p.plot([df1, df2, df3], result, labels=["df1", "df2", "df3"], vertical=True); - plt.close("all"); - -.. note:: - - Unlike the :py:meth:`~list.append` method, which appends to the original list - and returns ``None``, :meth:`~DataFrame.append` here **does not** modify - ``df1`` and returns its copy with ``df2`` appended. - .. _merging.ignore_index: Ignoring indexes on the concatenation axis @@ -309,19 +256,6 @@ do this, use the ``ignore_index`` argument: p.plot([df1, df4], result, labels=["df1", "df4"], vertical=True); plt.close("all"); -This is also a valid argument to :meth:`DataFrame.append`: - -.. ipython:: python - - result = df1.append(df4, ignore_index=True, sort=False) - -.. ipython:: python - :suppress: - - @savefig merging_append_ignore_index.png - p.plot([df1, df4], result, labels=["df1", "df4"], vertical=True); - plt.close("all"); - .. _merging.mixed_ndims: Concatenating with mixed ndims @@ -468,45 +402,6 @@ do so using the ``levels`` argument: This is fairly esoteric, but it is actually necessary for implementing things like GroupBy where the order of a categorical variable is meaningful. -.. _merging.append.row: - -Appending rows to a DataFrame -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -While not especially efficient (since a new object must be created), you can -append a single row to a ``DataFrame`` by passing a ``Series`` or dict to -``append``, which returns a new ``DataFrame`` as above. - -.. ipython:: python - - s2 = pd.Series(["X0", "X1", "X2", "X3"], index=["A", "B", "C", "D"]) - result = df1.append(s2, ignore_index=True) - -.. ipython:: python - :suppress: - - @savefig merging_append_series_as_row.png - p.plot([df1, s2], result, labels=["df1", "s2"], vertical=True); - plt.close("all"); - -You should use ``ignore_index`` with this method to instruct DataFrame to -discard its index. If you wish to preserve the index, you should construct an -appropriately-indexed DataFrame and append or concatenate those objects. - -You can also pass a list of dicts or Series: - -.. ipython:: python - - dicts = [{"A": 1, "B": 2, "C": 3, "X": 4}, {"A": 5, "B": 6, "C": 7, "Y": 8}] - result = df1.append(dicts, ignore_index=True, sort=False) - -.. ipython:: python - :suppress: - - @savefig merging_append_dits.png - p.plot([df1, pd.DataFrame(dicts)], result, labels=["df1", "dicts"], vertical=True); - plt.close("all"); - .. _merging.join: Database-style DataFrame or named Series joining/merging diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 35e8de774c6f3..f4d78c3ae11d3 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -252,7 +252,7 @@ ignored when finding the concatenated dtype. These are now consistently _not_ i df1 = pd.DataFrame({"bar": [pd.Timestamp("2013-01-01")]}, index=range(1)) df2 = pd.DataFrame({"bar": np.nan}, index=range(1, 2)) - res = df1.append(df2) + res = pd.concat([df1, df2]) Previously, the float-dtype in ``df2`` would be ignored so the result dtype would be ``datetime64[ns]``. As a result, the ``np.nan`` would be cast to ``NaT``. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8954ef4c47a3f..cb4419b3351ff 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3372,9 +3372,16 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: index=self.columns, ) if index: - result = self._constructor_sliced( - self.index.memory_usage(deep=deep), index=["Index"] - ).append(result) + from pandas.core.reshape.concat import concat + + result = concat( + [ + self._constructor_sliced( + self.index.memory_usage(deep=deep), index=["Index"] + ), + result, + ] + ) return result def transpose(self, *args, copy: bool = False) -> DataFrame: @@ -9824,7 +9831,11 @@ def c(x): idx_diff = result_index.difference(correl.index) if len(idx_diff) > 0: - correl = correl.append(Series([np.nan] * len(idx_diff), index=idx_diff)) + from pandas.core.reshape.concat import concat + + correl = concat( + [correl, Series([np.nan] * len(idx_diff), index=idx_diff)] + ) return correl diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b2e2bb0642c41..ca22c50b27144 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1997,7 +1997,14 @@ def _setitem_with_indexer_missing(self, indexer, value): df = df.infer_objects() self.obj._mgr = df._mgr else: - self.obj._mgr = self.obj.append(value)._mgr + from pandas.core.reshape.concat import concat + + df = value.to_frame().T + index_names = self.obj.index.names + df_preserve_dtype_and_index = df.infer_objects().rename_axis( + index_names + ) + self.obj._mgr = concat([self.obj, df_preserve_dtype_and_index])._mgr self.obj._maybe_update_cacher(clear=True) def _ensure_iterable_column_indexer(self, column_indexer): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index edd3599aabe35..4fc5e05234fdd 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -327,7 +327,7 @@ def _add_margins( margin_dummy[cols] = margin_dummy[cols].apply( maybe_downcast_to_dtype, args=(dtype,) ) - result = result.append(margin_dummy) + result = concat([result, margin_dummy]) result.index.names = row_names return result @@ -740,7 +740,7 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): elif normalize == "index": index_margin = index_margin / index_margin.sum() - table = table.append(index_margin) + table = concat([table, index_margin.to_frame().T]) table = table.fillna(0) table.index = table_index @@ -749,7 +749,7 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): index_margin = index_margin / index_margin.sum() index_margin.loc[margins_name] = 1 table = concat([table, column_margin], axis=1) - table = table.append(index_margin) + table = concat([table, index_margin.to_frame().T]) table = table.fillna(0) table.index = table_index From 02620b8c5269d445585d404c5513c6de077a6fc4 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Tue, 30 Nov 2021 08:18:17 +0000 Subject: [PATCH 09/29] Ignore FutureWarning in test_value_counts_null --- pandas/tests/base/test_value_counts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index ddb21408a1a04..9a728f9fdd62e 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -38,6 +38,7 @@ def test_value_counts(index_or_series_obj): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize("null_obj", [np.nan, None]) def test_value_counts_null(null_obj, index_or_series_obj): orig = index_or_series_obj From 7b9fdf3c7107e195eceb4a0c056d5fb2e5a93af2 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Tue, 30 Nov 2021 11:52:00 +0000 Subject: [PATCH 10/29] Filter FutureWarning in remaining affected tests --- pandas/tests/frame/methods/test_drop_duplicates.py | 1 + pandas/tests/generic/test_duplicate_labels.py | 1 + pandas/tests/generic/test_finalize.py | 1 + pandas/tests/indexes/period/test_indexing.py | 1 + pandas/tests/indexing/test_partial.py | 2 ++ pandas/tests/io/formats/test_format.py | 1 + pandas/tests/resample/test_time_grouper.py | 2 ++ pandas/tests/reshape/concat/test_append.py | 1 + pandas/tests/reshape/concat/test_append_common.py | 1 + pandas/tests/reshape/test_pivot.py | 1 + 10 files changed, 12 insertions(+) diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 8cbf7bbfe0368..53f1acabde45a 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -21,6 +21,7 @@ def test_drop_duplicates_with_misspelled_column_name(subset): df.drop_duplicates(subset) +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_drop_duplicates(): df = DataFrame( { diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 1b32675ec2d35..082786f377a2d 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -294,6 +294,7 @@ def test_setting_allows_duplicate_labels_raises(self, data): assert data.flags.allows_duplicate_labels is True + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize( "func", [operator.methodcaller("append", pd.Series(0, index=["a", "b"]))] ) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 135e8cc7b7aba..343aa61056f3e 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -511,6 +511,7 @@ def ndframe_method(request): return request.param +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_finalize_called(ndframe_method): cls, init_args, method = ndframe_method ndframe = cls(*init_args) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index df2f114e73df2..10af4d2c0083c 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -115,6 +115,7 @@ def test_getitem_index(self): ) tm.assert_index_equal(result, exp) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_getitem_partial(self): rng = period_range("2007-01", periods=50, freq="M") ts = Series(np.random.randn(len(rng)), rng) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 95a9fd227c685..3b19c8a1eac4d 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -353,6 +353,7 @@ def test_partial_setting2(self): df.at[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_partial_setting_mixed_dtype(self): # in a mixed dtype environment, try to preserve dtypes @@ -528,6 +529,7 @@ def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): tm.assert_frame_equal(df, expected) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_partial_set_invalid(self): # GH 4940 diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index ab0199dca3f24..9847e35257643 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2418,6 +2418,7 @@ def test_float_trim_zeros(self): else: assert "+10" in line + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_datetimeindex(self): index = date_range("20130102", periods=6) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 4f69a7f590319..b80268a392a2c 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -177,6 +177,7 @@ def test_resample_entirely_nat_window(method, method_args, unit): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize( "func, fill_value", [("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)], @@ -215,6 +216,7 @@ def test_aggregate_with_nat(func, fill_value): assert dt_result.index.name == "key" +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_aggregate_with_nat_size(): # GH 9925 n = 20 diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 1df598f0830e4..84c642e893e71 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -17,6 +17,7 @@ import pandas._testing as tm +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestAppend: def test_append(self, sort, float_frame): mixed_frame = float_frame.copy() diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index b8b254e786194..a43ed51ec8903 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -11,6 +11,7 @@ import pandas._testing as tm +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestConcatAppendCommon: """ Test common dtype coercion rules between concat and append. diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 88607f4b036a0..835ae131d23e7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1127,6 +1127,7 @@ def test_pivot_complex_aggfunc(self): tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_margins_no_values_no_cols(self): # Regression test on pivot table: no values or cols passed. result = self.data[["A", "B"]].pivot_table( From bf2f30c2fb478cb719cb9b1a1e05ff2f6b805e89 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Tue, 30 Nov 2021 13:01:23 +0000 Subject: [PATCH 11/29] Ignore FutureWarning in even more tests --- pandas/tests/reshape/concat/test_categorical.py | 2 ++ pandas/tests/reshape/concat/test_concat.py | 1 + pandas/tests/reshape/concat/test_index.py | 1 + pandas/tests/reshape/merge/test_merge.py | 2 ++ pandas/tests/series/accessors/test_dt_accessor.py | 1 + pandas/tests/series/indexing/test_indexing.py | 2 ++ 6 files changed, 9 insertions(+) diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py index aba14fd2fcd77..be9331b352230 100644 --- a/pandas/tests/reshape/concat/test_categorical.py +++ b/pandas/tests/reshape/concat/test_categorical.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas.core.dtypes.dtypes import CategoricalDtype @@ -184,6 +185,7 @@ def test_concat_categorical_unchanged(self): ) tm.assert_equal(result, expected) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_categorical_concat_gh7864(self): # GH 7864 # make sure ordering is preserved diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 676571e419a1a..ef0961b53d32d 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -219,6 +219,7 @@ def test_concat_keys_levels_no_overlap(self): with pytest.raises(ValueError, match=msg): concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]]) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_crossed_dtypes_weird_corner(self): columns = ["A", "B", "C", "D"] df1 = DataFrame( diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index f8ad9d1084c53..8ff87817786db 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -141,6 +141,7 @@ def test_default_index(self): exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_dups_index(self): # GH 4771 diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 371a7fed543e4..45c59382796d2 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -682,6 +682,7 @@ def _constructor(self): assert isinstance(result, NotADataFrame) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_join_append_timedeltas(self): # timedelta64 issues with join/merge # GH 5695 @@ -1098,6 +1099,7 @@ def test_merge_indicator_multiple_columns(self): test5 = df3.merge(df4, on=["col1", "col2"], how="outer", indicator=True) tm.assert_frame_equal(test5, hand_coded_result) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_validation(self): left = DataFrame( {"a": ["a", "b", "c", "d"], "b": ["cat", "dog", "weasel", "horse"]}, diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 48a3ebd25c239..4342042b9599f 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -426,6 +426,7 @@ def test_dt_accessor_no_new_attributes(self): with pytest.raises(AttributeError, match="You cannot add any new attribute"): ser.dt.xlabel = "a" + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize( "time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales() ) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 8a34882b1e5d4..9f81e26f19196 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -80,6 +80,7 @@ def test_getitem_setitem_ellipsis(): assert (result == 5).all() +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize( "result_1, duplicate_item, expected_1", [ @@ -153,6 +154,7 @@ def test_getitem_dups_with_missing(indexer_sl): indexer_sl(ser)[["foo", "bar", "bah", "bam"]] +@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_setitem_ambiguous_keyerror(indexer_sl): s = Series(range(10), index=list(range(0, 20, 2))) From 982250d5bd6b38ef7d8061f8d6c116cae6a00ae5 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Tue, 30 Nov 2021 14:07:38 +0000 Subject: [PATCH 12/29] Ignore FutureWarning in one last test --- pandas/tests/series/indexing/test_setitem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5f96078ba70b1..0eb3c2c38fd19 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -433,6 +433,7 @@ def test_setitem_with_expansion_type_promotion(self): expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) tm.assert_series_equal(ser, expected) + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_setitem_not_contained(self, string_series): # set item that's not contained ser = string_series.copy() From fbdbc24f1c1b03e1c588b43b84eb6d22acf2ac53 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Tue, 30 Nov 2021 14:21:55 +0000 Subject: [PATCH 13/29] Delete refs to merging.concatenation and merging.append.row (documenting append) --- doc/source/user_guide/10min.rst | 1 - doc/source/user_guide/cookbook.rst | 2 +- doc/source/whatsnew/v0.6.1.rst | 2 +- doc/source/whatsnew/v0.7.0.rst | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 4aca107b7c106..08488a33936f0 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -478,7 +478,6 @@ Concatenating pandas objects together with :func:`concat`: a row requires a copy, and may be expensive. We recommend passing a pre-built list of records to the :class:`DataFrame` constructor instead of building a :class:`DataFrame` by iteratively appending records to it. - See :ref:`Appending to dataframe ` for more. Join ~~~~ diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 468790d1466cf..8c2dd3ba60f13 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -929,7 +929,7 @@ Valid frequency arguments to Grouper :ref:`Timeseries ` docs. The :ref:`Join ` docs. +The :ref:`Join ` docs. `Concatenate two dataframes with overlapping index (emulate R rbind) `__ diff --git a/doc/source/whatsnew/v0.6.1.rst b/doc/source/whatsnew/v0.6.1.rst index 139c6e2d1cb0c..4e72a630ad9f1 100644 --- a/doc/source/whatsnew/v0.6.1.rst +++ b/doc/source/whatsnew/v0.6.1.rst @@ -6,7 +6,7 @@ Version 0.6.1 (December 13, 2011) New features ~~~~~~~~~~~~ -- Can :ref:`append single rows ` (as Series) to a DataFrame +- Can append single rows (as Series) to a DataFrame - Add Spearman and Kendall rank :ref:`correlation ` options to Series.corr and DataFrame.corr (:issue:`428`) - :ref:`Added ` ``get_value`` and ``set_value`` methods to diff --git a/doc/source/whatsnew/v0.7.0.rst b/doc/source/whatsnew/v0.7.0.rst index 52747f2992dc4..1b947030ab8ab 100644 --- a/doc/source/whatsnew/v0.7.0.rst +++ b/doc/source/whatsnew/v0.7.0.rst @@ -19,7 +19,7 @@ New features intersection of the other axes. Improves performance of ``Series.append`` and ``DataFrame.append`` (:issue:`468`, :issue:`479`, :issue:`273`) -- :ref:`Can ` pass multiple DataFrames to +- Can pass multiple DataFrames to ``DataFrame.append`` to concatenate (stack) and multiple Series to ``Series.append`` too From 506868b56779319eb0393bde6988191a8190a83b Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Wed, 1 Dec 2021 20:11:24 +0000 Subject: [PATCH 14/29] Replace append by concat in tests instead of ignoring warnings --- pandas/tests/base/test_value_counts.py | 3 +-- pandas/tests/frame/methods/test_drop_duplicates.py | 5 +++-- pandas/tests/generic/test_finalize.py | 9 +++++++-- pandas/tests/indexes/period/test_indexing.py | 5 +++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 9a728f9fdd62e..af76f26e4a91c 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -38,7 +38,6 @@ def test_value_counts(index_or_series_obj): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize("null_obj", [np.nan, None]) def test_value_counts_null(null_obj, index_or_series_obj): orig = index_or_series_obj @@ -75,7 +74,7 @@ def test_value_counts_null(null_obj, index_or_series_obj): # can't use expected[null_obj] = 3 as # IntervalIndex doesn't allow assignment new_entry = Series({np.nan: 3}, dtype=np.int64) - expected = expected.append(new_entry) + expected = pd.concat([expected, new_entry]) result = obj.value_counts(dropna=False) if obj.duplicated().any(): diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 53f1acabde45a..cd31590042522 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -21,7 +21,6 @@ def test_drop_duplicates_with_misspelled_column_name(subset): df.drop_duplicates(subset) -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_drop_duplicates(): df = DataFrame( { @@ -112,7 +111,9 @@ def test_drop_duplicates(): # GH 11864 df = DataFrame([i] * 9 for i in range(16)) - df = df.append([[1] + [0] * 8], ignore_index=True) + from pandas.core.reshape.concat import concat + + df = concat([df, DataFrame([[1] + [0] * 8])], ignore_index=True) for keep in ["first", "last", False]: assert df.duplicated(keep=keep).sum() == 0 diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 343aa61056f3e..c139ecc22fa49 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -180,7 +180,10 @@ pd.DataFrame, frame_data, operator.methodcaller("append", pd.DataFrame({"A": [1]})), - ) + ), + marks=pytest.mark.filterwarnings( + "ignore:.*append method is deprecated.*:FutureWarning" + ), ), pytest.param( ( @@ -188,6 +191,9 @@ frame_data, operator.methodcaller("append", pd.DataFrame({"B": [1]})), ), + marks=pytest.mark.filterwarnings( + "ignore:.*append method is deprecated.*:FutureWarning" + ), ), pytest.param( ( @@ -511,7 +517,6 @@ def ndframe_method(request): return request.param -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_finalize_called(ndframe_method): cls, init_args, method = ndframe_method ndframe = cls(*init_args) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 10af4d2c0083c..9dc66e3602a55 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -115,7 +115,6 @@ def test_getitem_index(self): ) tm.assert_index_equal(result, exp) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_getitem_partial(self): rng = period_range("2007-01", periods=50, freq="M") ts = Series(np.random.randn(len(rng)), rng) @@ -145,7 +144,9 @@ def test_getitem_partial(self): result = ts[24:] tm.assert_series_equal(exp, result) - ts = ts[10:].append(ts[10:]) + from pandas.core.reshape.concat import concat + + ts = concat([ts[10:], ts[10:]]) msg = "left slice bound for non-unique label: '2008'" with pytest.raises(KeyError, match=msg): ts[slice("2008", "2009")] From 525297b599b7499044bd9fd3807b5effb35eaf01 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Wed, 1 Dec 2021 20:11:45 +0000 Subject: [PATCH 15/29] Introduce intermediate variables --- pandas/core/frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 75035c0b7bd0c..fbc3cd6cfc36b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3267,11 +3267,12 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: if index: from pandas.core.reshape.concat import concat + index_memory_usage = self._constructor_sliced( + self.index.memory_usage(deep=deep), index=["Index"] + ) result = concat( [ - self._constructor_sliced( - self.index.memory_usage(deep=deep), index=["Index"] - ), + index_memory_usage, result, ] ) @@ -9726,9 +9727,8 @@ def c(x): if len(idx_diff) > 0: from pandas.core.reshape.concat import concat - correl = concat( - [correl, Series([np.nan] * len(idx_diff), index=idx_diff)] - ) + nan_correl = Series([np.nan] * len(idx_diff), index=idx_diff) + correl = concat([correl, nan_correl]) return correl From e19b2e1939a6be0487270dad998455224170f234 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Wed, 1 Dec 2021 20:12:03 +0000 Subject: [PATCH 16/29] Use ipython instead of code block --- doc/source/whatsnew/v1.4.0.rst | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1c6b856c8f150..828ba13a33a2b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -524,25 +524,13 @@ Use :func:`pandas.concat` instead (:issue:`35407`). *Recommended syntax* -.. code-block:: ipython +.. ipython:: python - In [1]: pd.concat([pd.Series([1, 2]), pd.Series([3, 4])]) - Out [1]: - 0 1 - 1 2 - 0 3 - 1 4 - dtype: int64 + pd.concat([pd.Series([1, 2]), pd.Series([3, 4])]) - In [2]: df1 = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) - In [3]: df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) - In [4]: pd.concat([df1, df2]) - Out [4]: - A B - 0 1 2 - 1 3 4 - 0 5 6 - 1 7 8 + df1 = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) + df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) + pd.concat([df1, df2]) .. _whatsnew_140.deprecations.other: From ca851c46f30d95882561742b8a3d60ef82eebea9 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Thu, 2 Dec 2021 21:24:04 +0000 Subject: [PATCH 17/29] Extract _append and replace append by _append/concat in tests with ignored warning --- pandas/core/frame.py | 9 +++++++++ pandas/core/indexing.py | 9 +-------- pandas/tests/indexing/test_partial.py | 6 ++---- pandas/tests/io/formats/test_format.py | 3 +-- pandas/tests/resample/test_time_grouper.py | 6 ++---- pandas/tests/reshape/merge/test_merge.py | 7 +++---- pandas/tests/reshape/test_crosstab.py | 13 ++++++------- pandas/tests/reshape/test_pivot.py | 1 - pandas/tests/series/accessors/test_dt_accessor.py | 5 ++--- pandas/tests/series/indexing/test_indexing.py | 4 ++-- pandas/tests/series/indexing/test_setitem.py | 4 ++-- 11 files changed, 30 insertions(+), 37 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 33576c96f8d14..2b592d0723ef8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9026,6 +9026,15 @@ def append( stacklevel=find_stack_level(), ) + return self._append(other, ignore_index, verify_integrity, sort) + + def _append( + self, + other, + ignore_index: bool = False, + verify_integrity: bool = False, + sort: bool = False, + ) -> DataFrame: combined_columns = None if isinstance(other, (Series, dict)): if isinstance(other, dict): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 13f75e6ee239c..96240385b80a7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1993,14 +1993,7 @@ def _setitem_with_indexer_missing(self, indexer, value): df = df.infer_objects() self.obj._mgr = df._mgr else: - from pandas.core.reshape.concat import concat - - df = value.to_frame().T - index_names = self.obj.index.names - df_preserve_dtype_and_index = df.infer_objects().rename_axis( - index_names - ) - self.obj._mgr = concat([self.obj, df_preserve_dtype_and_index])._mgr + self.obj._mgr = self.obj._append(value)._mgr self.obj._maybe_update_cacher(clear=True) def _ensure_iterable_column_indexer(self, column_indexer): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 3b19c8a1eac4d..0054cd2d4e2ec 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -353,7 +353,6 @@ def test_partial_setting2(self): df.at[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_partial_setting_mixed_dtype(self): # in a mixed dtype environment, try to preserve dtypes @@ -362,7 +361,7 @@ def test_partial_setting_mixed_dtype(self): s = df.loc[1].copy() s.name = 2 - expected = df.append(s) + expected = df._append(s) df.loc[2] = df.loc[1] tm.assert_frame_equal(df, expected) @@ -529,7 +528,6 @@ def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): tm.assert_frame_equal(df, expected) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_partial_set_invalid(self): # GH 4940 @@ -540,7 +538,7 @@ def test_partial_set_invalid(self): # allow object conversion here df = orig.copy() df.loc["a", :] = df.iloc[0] - exp = orig.append(Series(df.iloc[0], name="a")) + exp = orig._append(Series(df.iloc[0], name="a")) tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object" diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 9847e35257643..fe6914e6ef4f5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2418,7 +2418,6 @@ def test_float_trim_zeros(self): else: assert "+10" in line - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_datetimeindex(self): index = date_range("20130102", periods=6) @@ -2428,7 +2427,7 @@ def test_datetimeindex(self): # nat in index s2 = Series(2, index=[Timestamp("20130111"), NaT]) - s = s2.append(s) + s = pd.concat([s2, s]) result = s.to_string() assert "NaT" in result diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index b80268a392a2c..2e512d74076d3 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -177,7 +177,6 @@ def test_resample_entirely_nat_window(method, method_args, unit): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize( "func, fill_value", [("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)], @@ -208,7 +207,7 @@ def test_aggregate_with_nat(func, fill_value): dt_result = getattr(dt_grouped, func)() pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"]) - expected = normal_result.append(pad) + expected = pd.concat([normal_result, pad]) expected = expected.sort_index() dti = date_range(start="2013-01-01", freq="D", periods=5, name="key") expected.index = dti._with_freq(None) # TODO: is this desired? @@ -216,7 +215,6 @@ def test_aggregate_with_nat(func, fill_value): assert dt_result.index.name == "key" -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_aggregate_with_nat_size(): # GH 9925 n = 20 @@ -240,7 +238,7 @@ def test_aggregate_with_nat_size(): dt_result = dt_grouped.size() pad = Series([0], index=[3]) - expected = normal_result.append(pad) + expected = pd.concat([normal_result, pad]) expected = expected.sort_index() expected.index = date_range( start="2013-01-01", freq="D", periods=5, name="key" diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 45c59382796d2..b6b3a7d629e16 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1099,7 +1099,6 @@ def test_merge_indicator_multiple_columns(self): test5 = df3.merge(df4, on=["col1", "col2"], how="outer", indicator=True) tm.assert_frame_equal(test5, hand_coded_result) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_validation(self): left = DataFrame( {"a": ["a", "b", "c", "d"], "b": ["cat", "dog", "weasel", "horse"]}, @@ -1178,7 +1177,7 @@ def test_validation(self): tm.assert_frame_equal(result, expected_3) # Dups on right - right_w_dups = right.append(DataFrame({"a": ["e"], "c": ["moo"]}, index=[4])) + right_w_dups = concat([right, DataFrame({"a": ["e"], "c": ["moo"]}, index=[4])]) merge( left, right_w_dups, @@ -1201,8 +1200,8 @@ def test_validation(self): merge(left, right_w_dups, on="a", validate="one_to_one") # Dups on left - left_w_dups = left.append( - DataFrame({"a": ["a"], "c": ["cow"]}, index=[3]), sort=True + left_w_dups = concat( + [left, DataFrame({"a": ["a"], "c": ["cow"]}, index=[3])], sort=True ) merge( left_w_dups, diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 3b5422269da51..cc6eec671ac3a 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -3,6 +3,7 @@ from pandas.core.dtypes.common import is_categorical_dtype +import pandas as pd from pandas import ( CategoricalIndex, DataFrame, @@ -14,7 +15,6 @@ import pandas._testing as tm -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestCrosstab: def setup_method(self, method): df = DataFrame( @@ -64,7 +64,7 @@ def setup_method(self, method): } ) - self.df = df.append(df, ignore_index=True) + self.df = pd.concat([df, df], ignore_index=True) def test_crosstab_single(self): df = self.df @@ -143,14 +143,14 @@ def test_crosstab_margins(self): exp_cols = df.groupby(["a"]).size().astype("i8") # to keep index.name exp_margin = Series([len(df)], index=Index(["All"], name="a")) - exp_cols = exp_cols.append(exp_margin) + exp_cols = pd.concat([exp_cols, exp_margin]) exp_cols.name = ("All", "") tm.assert_series_equal(all_cols, exp_cols) all_rows = result.loc["All"] exp_rows = df.groupby(["b", "c"]).size().astype("i8") - exp_rows = exp_rows.append(Series([len(df)], index=[("All", "")])) + exp_rows = pd.concat([exp_rows, Series([len(df)], index=[("All", "")])]) exp_rows.name = "All" exp_rows = exp_rows.reindex(all_rows.index) @@ -181,14 +181,14 @@ def test_crosstab_margins_set_margin_name(self): exp_cols = df.groupby(["a"]).size().astype("i8") # to keep index.name exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a")) - exp_cols = exp_cols.append(exp_margin) + exp_cols = pd.concat([exp_cols, exp_margin]) exp_cols.name = ("TOTAL", "") tm.assert_series_equal(all_cols, exp_cols) all_rows = result.loc["TOTAL"] exp_rows = df.groupby(["b", "c"]).size().astype("i8") - exp_rows = exp_rows.append(Series([len(df)], index=[("TOTAL", "")])) + exp_rows = pd.concat([exp_rows, Series([len(df)], index=[("TOTAL", "")])]) exp_rows.name = "TOTAL" exp_rows = exp_rows.reindex(all_rows.index) @@ -797,7 +797,6 @@ def test_margin_normalize_multiple_columns(self): tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize("a_dtype", ["category", "int64"]) @pytest.mark.parametrize("b_dtype", ["category", "int64"]) def test_categoricals(a_dtype, b_dtype): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 835ae131d23e7..88607f4b036a0 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1127,7 +1127,6 @@ def test_pivot_complex_aggfunc(self): tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_margins_no_values_no_cols(self): # Regression test on pivot table: no values or cols passed. result = self.data[["A", "B"]].pivot_table( diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 4342042b9599f..ce4121c37b246 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -426,7 +426,6 @@ def test_dt_accessor_no_new_attributes(self): with pytest.raises(AttributeError, match="You cannot add any new attribute"): ser.dt.xlabel = "a" - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") @pytest.mark.parametrize( "time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales() ) @@ -477,7 +476,7 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale): name = name.capitalize() assert ser.dt.day_name(locale=time_locale)[day] == name assert ser.dt.day_name(locale=None)[day] == eng_name - ser = ser.append(Series([pd.NaT])) + ser = pd.concat([ser, Series([pd.NaT])]) assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1]) ser = Series(date_range(freq="M", start="2012", end="2013")) @@ -499,7 +498,7 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale): assert result == expected - ser = ser.append(Series([pd.NaT])) + ser = pd.concat([ser, Series([pd.NaT])]) assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1]) def test_strftime(self): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 9f81e26f19196..4da6bb451fb4c 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -5,6 +5,7 @@ import numpy as np import pytest +import pandas as pd from pandas import ( DataFrame, IndexSlice, @@ -154,14 +155,13 @@ def test_getitem_dups_with_missing(indexer_sl): indexer_sl(ser)[["foo", "bar", "bah", "bam"]] -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_setitem_ambiguous_keyerror(indexer_sl): s = Series(range(10), index=list(range(0, 20, 2))) # equivalent of an append s2 = s.copy() indexer_sl(s2)[1] = 5 - expected = s.append(Series([5], index=[1])) + expected = pd.concat([s, Series([5], index=[1])]) tm.assert_series_equal(s2, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 0eb3c2c38fd19..706b02e2ef925 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -8,6 +8,7 @@ from pandas.core.dtypes.common import is_list_like +import pandas as pd from pandas import ( Categorical, DataFrame, @@ -433,7 +434,6 @@ def test_setitem_with_expansion_type_promotion(self): expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) tm.assert_series_equal(ser, expected) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_setitem_not_contained(self, string_series): # set item that's not contained ser = string_series.copy() @@ -441,7 +441,7 @@ def test_setitem_not_contained(self, string_series): ser["foobar"] = 1 app = Series([1], index=["foobar"], name="series") - expected = string_series.append(app) + expected = pd.concat([string_series, app]) tm.assert_series_equal(ser, expected) From 2be90106b584b0259a1193407b23586e712d167b Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Fri, 3 Dec 2021 15:56:16 +0000 Subject: [PATCH 18/29] Reinsert modified instructions for appending one row to df (suggestion) --- doc/source/user_guide/merging.rst | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 83c68cfc1c29f..2caec8569aa00 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -402,6 +402,37 @@ do so using the ``levels`` argument: This is fairly esoteric, but it is actually necessary for implementing things like GroupBy where the order of a categorical variable is meaningful. +.. _merging.append.row: + +Appending rows to a DataFrame +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can append a single row in form of a series to a ``DataFrame`` in-place +using ``loc``. + +.. ipython:: python + + s2 = pd.Series(["X0", "X1", "X2", "X3"], index=["A", "B", "C", "D"]) + df1.loc[len(df1)] = s2 + +Alternatively, you can convert the row into a DataFrame and use ``concat``, +which doesn't have a side-effect on df1. + +.. ipython:: python + + result = pd.concat([df1, s2.to_frame().T], ignore_index=True) + +.. ipython:: python + :suppress: + + @savefig merging_append_series_as_row.png + p.plot([df1, s2], result, labels=["df1", "s2"], vertical=True); + plt.close("all"); + +You should use ``ignore_index`` with this method to instruct DataFrame to +discard its index. If you wish to preserve the index, you should construct an +appropriately-indexed DataFrame and append or concatenate those objects. + .. _merging.join: Database-style DataFrame or named Series joining/merging From 028b01294e5958c3aac7487abef607dfde0bc0d0 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 4 Dec 2021 11:23:10 +0000 Subject: [PATCH 19/29] Import concat from pandas in tests --- pandas/tests/series/indexing/test_indexing.py | 4 ++-- pandas/tests/series/indexing/test_setitem.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 09aea2a285a79..35f31224aa681 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -5,13 +5,13 @@ import numpy as np import pytest -import pandas as pd from pandas import ( DataFrame, IndexSlice, Series, Timedelta, Timestamp, + concat, date_range, period_range, timedelta_range, @@ -160,7 +160,7 @@ def test_setitem_ambiguous_keyerror(indexer_sl): # equivalent of an append s2 = s.copy() indexer_sl(s2)[1] = 5 - expected = pd.concat([s, Series([5], index=[1])]) + expected = concat([s, Series([5], index=[1])]) tm.assert_series_equal(s2, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 706b02e2ef925..47f6257fd885e 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -8,7 +8,6 @@ from pandas.core.dtypes.common import is_list_like -import pandas as pd from pandas import ( Categorical, DataFrame, @@ -21,6 +20,7 @@ Series, Timedelta, Timestamp, + concat, date_range, period_range, ) @@ -441,7 +441,7 @@ def test_setitem_not_contained(self, string_series): ser["foobar"] = 1 app = Series([1], index=["foobar"], name="series") - expected = pd.concat([string_series, app]) + expected = concat([string_series, app]) tm.assert_series_equal(ser, expected) From 1d8f3c51b83ebef35b4b8f732a96976012f306d4 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 4 Dec 2021 11:33:48 +0000 Subject: [PATCH 20/29] Fit call to concat on one line --- pandas/core/frame.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6e2ca8d83417d..c2f35af08963f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3272,12 +3272,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: index_memory_usage = self._constructor_sliced( self.index.memory_usage(deep=deep), index=["Index"] ) - result = concat( - [ - index_memory_usage, - result, - ] - ) + result = concat([index_memory_usage, result]) return result def transpose(self, *args, copy: bool = False) -> DataFrame: From d319b0fcdc386c1725cc10d61142e5ec236ce10b Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 4 Dec 2021 14:23:43 +0000 Subject: [PATCH 21/29] Replace append by concat in _add_margins --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 4fc5e05234fdd..09353bb5db206 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -289,7 +289,7 @@ def _add_margins( if not values and isinstance(table, ABCSeries): # If there are no values and the table is a series, then there is only # one column in the data. Compute grand margin and return it. - return table.append(Series({key: grand_margin[margins_name]})) + return concat([table, Series({key: grand_margin[margins_name]})]) elif values: marginal_result_set = _generate_marginal_results( From 46170d48e5bc2da0733b0ad8a2a4a05266e5d58e Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sun, 5 Dec 2021 14:49:28 +0000 Subject: [PATCH 22/29] Replace append and _append by concat in tests --- pandas/tests/generic/test_duplicate_labels.py | 8 ++------ pandas/tests/indexing/test_partial.py | 5 +++-- pandas/tests/reshape/concat/test_categorical.py | 5 ----- pandas/tests/reshape/concat/test_concat.py | 3 +-- pandas/tests/reshape/concat/test_index.py | 9 ++------- pandas/tests/reshape/merge/test_merge.py | 9 +++++---- 6 files changed, 13 insertions(+), 26 deletions(-) diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 082786f377a2d..dddcc8efe3627 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -294,15 +294,11 @@ def test_setting_allows_duplicate_labels_raises(self, data): assert data.flags.allows_duplicate_labels is True - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") - @pytest.mark.parametrize( - "func", [operator.methodcaller("append", pd.Series(0, index=["a", "b"]))] - ) - def test_series_raises(self, func): + def test_series_raises(self): s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): - func(s) + pd.concat([pd.Series(0, index=["a", "b"]), s]) @pytest.mark.parametrize( "getter, target", diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 0054cd2d4e2ec..f226ce1001aad 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -361,7 +361,7 @@ def test_partial_setting_mixed_dtype(self): s = df.loc[1].copy() s.name = 2 - expected = df._append(s) + expected = pd.concat([df, DataFrame(s).T.infer_objects()]) df.loc[2] = df.loc[1] tm.assert_frame_equal(df, expected) @@ -538,7 +538,8 @@ def test_partial_set_invalid(self): # allow object conversion here df = orig.copy() df.loc["a", :] = df.iloc[0] - exp = orig._append(Series(df.iloc[0], name="a")) + s = Series(df.iloc[0], name="a") + exp = pd.concat([orig, DataFrame(s).T.infer_objects()]) tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object" diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py index be9331b352230..9beef8838eee9 100644 --- a/pandas/tests/reshape/concat/test_categorical.py +++ b/pandas/tests/reshape/concat/test_categorical.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from pandas.core.dtypes.dtypes import CategoricalDtype @@ -185,7 +184,6 @@ def test_concat_categorical_unchanged(self): ) tm.assert_equal(result, expected) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_categorical_concat_gh7864(self): # GH 7864 # make sure ordering is preserved @@ -202,9 +200,6 @@ def test_categorical_concat_gh7864(self): dfx = pd.concat([df1, df2]) tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories) - dfa = df1.append(df2) - tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories) - def test_categorical_index_upcast(self): # GH 17629 # test upcasting to object when concatinating on categorical indexes diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index ef0961b53d32d..d30e7c6874964 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -219,7 +219,6 @@ def test_concat_keys_levels_no_overlap(self): with pytest.raises(ValueError, match=msg): concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]]) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_crossed_dtypes_weird_corner(self): columns = ["A", "B", "C", "D"] df1 = DataFrame( @@ -242,7 +241,7 @@ def test_crossed_dtypes_weird_corner(self): columns=columns, ) - appended = df1.append(df2, ignore_index=True) + appended = concat([df1, df2], ignore_index=True) expected = DataFrame( np.concatenate([df1.values, df2.values], axis=0), columns=columns ) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 5f27427cdfdb0..004951c8e4811 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -141,7 +141,6 @@ def test_default_index(self): exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_dups_index(self): # GH 4771 @@ -179,16 +178,12 @@ def test_dups_index(self): tm.assert_frame_equal(result.iloc[10:], df) # append - result = df.iloc[0:8, :].append(df.iloc[8:]) + result = concat([df.iloc[0:8, :], df.iloc[8:]]) tm.assert_frame_equal(result, df) - result = df.iloc[0:8, :].append(df.iloc[8:9]).append(df.iloc[9:10]) + result = concat([df.iloc[0:8, :], df.iloc[8:9], df.iloc[9:10]]) tm.assert_frame_equal(result, df) - expected = concat([df, df], axis=0) - result = df.append(df) - tm.assert_frame_equal(result, expected) - class TestMultiIndexConcat: def test_concat_multiindex_with_keys(self, multiindex_dataframe_random_data): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index b6b3a7d629e16..f8b73088b5247 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -682,15 +682,16 @@ def _constructor(self): assert isinstance(result, NotADataFrame) - @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_join_append_timedeltas(self): # timedelta64 issues with join/merge # GH 5695 - d = {"d": datetime(2013, 11, 5, 5, 56), "t": timedelta(0, 22500)} + d = DataFrame.from_dict( + {"d": [datetime(2013, 11, 5, 5, 56)], "t": [timedelta(0, 22500)]} + ) df = DataFrame(columns=list("dt")) - df = df.append(d, ignore_index=True) - result = df.append(d, ignore_index=True) + df = concat([df, d], ignore_index=True) + result = concat([df, d], ignore_index=True) expected = DataFrame( { "d": [datetime(2013, 11, 5, 5, 56), datetime(2013, 11, 5, 5, 56)], From caf1983addb52737baf6a465a15e7fa67a266ebf Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sun, 5 Dec 2021 18:03:59 +0000 Subject: [PATCH 23/29] Import concat from pandas instead of locally from submodule --- pandas/tests/frame/methods/test_drop_duplicates.py | 3 +-- pandas/tests/indexes/period/test_indexing.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index cd31590042522..cd61f59a85d1e 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -7,6 +7,7 @@ from pandas import ( DataFrame, NaT, + concat, ) import pandas._testing as tm @@ -111,8 +112,6 @@ def test_drop_duplicates(): # GH 11864 df = DataFrame([i] * 9 for i in range(16)) - from pandas.core.reshape.concat import concat - df = concat([df, DataFrame([[1] + [0] * 8])], ignore_index=True) for keep in ["first", "last", False]: diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 9dc66e3602a55..dc6ef42bc4d63 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -144,9 +144,7 @@ def test_getitem_partial(self): result = ts[24:] tm.assert_series_equal(exp, result) - from pandas.core.reshape.concat import concat - - ts = concat([ts[10:], ts[10:]]) + ts = pd.concat([ts[10:], ts[10:]]) msg = "left slice bound for non-unique label: '2008'" with pytest.raises(KeyError, match=msg): ts[slice("2008", "2009")] From aa90ca6a1d67ed58472a90c5944da133ee169789 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Wed, 8 Dec 2021 12:28:16 +0000 Subject: [PATCH 24/29] Test DataFrame._append where DataFrame.append is subject under test --- pandas/tests/frame/methods/test_append.py | 72 +++++++++++----------- pandas/tests/reshape/concat/test_append.py | 63 +++++++++---------- pandas/tests/reshape/concat/test_index.py | 8 ++- 3 files changed, 72 insertions(+), 71 deletions(-) diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index 6117c02c3f653..4ff1f0ffbf453 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -12,8 +12,8 @@ import pandas._testing as tm -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestDataFrameAppend: + @pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): obj = multiindex_dataframe_random_data if frame_or_series is Series: @@ -28,16 +28,16 @@ def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_seri def test_append_empty_list(self): # GH 28769 df = DataFrame() - result = df.append([]) + result = df._append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - result = df.append([]) + result = df._append([]) expected = df tm.assert_frame_equal(result, expected) - assert result is not df # .append() should return a new object + assert result is not df # ._append() should return a new object def test_append_series_dict(self): df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) @@ -45,38 +45,38 @@ def test_append_series_dict(self): series = df.loc[4] msg = "Indexes have overlapping values" with pytest.raises(ValueError, match=msg): - df.append(series, verify_integrity=True) + df._append(series, verify_integrity=True) series.name = None msg = "Can only append a Series if ignore_index=True" with pytest.raises(TypeError, match=msg): - df.append(series, verify_integrity=True) + df._append(series, verify_integrity=True) - result = df.append(series[::-1], ignore_index=True) - expected = df.append( + result = df._append(series[::-1], ignore_index=True) + expected = df._append( DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True ) tm.assert_frame_equal(result, expected) # dict - result = df.append(series.to_dict(), ignore_index=True) + result = df._append(series.to_dict(), ignore_index=True) tm.assert_frame_equal(result, expected) - result = df.append(series[::-1][:3], ignore_index=True) - expected = df.append( + result = df._append(series[::-1][:3], ignore_index=True) + expected = df._append( DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True ) tm.assert_frame_equal(result, expected.loc[:, result.columns]) msg = "Can only append a dict if ignore_index=True" with pytest.raises(TypeError, match=msg): - df.append(series.to_dict()) + df._append(series.to_dict()) # can append when name set row = df.loc[4] row.name = 5 - result = df.append(row) - expected = df.append(df[-1:], ignore_index=True) + result = df._append(row) + expected = df._append(df[-1:], ignore_index=True) tm.assert_frame_equal(result, expected) def test_append_list_of_series_dicts(self): @@ -84,8 +84,8 @@ def test_append_list_of_series_dicts(self): dicts = [x.to_dict() for idx, x in df.iterrows()] - result = df.append(dicts, ignore_index=True) - expected = df.append(df, ignore_index=True) + result = df._append(dicts, ignore_index=True) + expected = df._append(df, ignore_index=True) tm.assert_frame_equal(result, expected) # different columns @@ -93,8 +93,8 @@ def test_append_list_of_series_dicts(self): {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, ] - result = df.append(dicts, ignore_index=True, sort=True) - expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + result = df._append(dicts, ignore_index=True, sort=True) + expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_list_retain_index_name(self): @@ -110,11 +110,11 @@ def test_append_list_retain_index_name(self): ) # append series - result = df.append(serc) + result = df._append(serc) tm.assert_frame_equal(result, expected) # append list of series - result = df.append([serc]) + result = df._append([serc]) tm.assert_frame_equal(result, expected) def test_append_missing_cols(self): @@ -125,9 +125,9 @@ def test_append_missing_cols(self): df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) dicts = [{"foo": 9}, {"bar": 10}] - result = df.append(dicts, ignore_index=True, sort=True) + result = df._append(dicts, ignore_index=True, sort=True) - expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_empty_dataframe(self): @@ -135,28 +135,28 @@ def test_append_empty_dataframe(self): # Empty df append empty df df1 = DataFrame() df2 = DataFrame() - result = df1.append(df2) + result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-empty df append empty df df1 = DataFrame(np.random.randn(5, 2)) df2 = DataFrame() - result = df1.append(df2) + result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Empty df with columns append empty df df1 = DataFrame(columns=["bar", "foo"]) df2 = DataFrame() - result = df1.append(df2) + result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-Empty df with columns append empty df df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) df2 = DataFrame() - result = df1.append(df2) + result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) @@ -168,19 +168,19 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) df2 = DataFrame() - result = df1.append(df2) + result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) - result = df1.append(df2) + result = df1._append(df2) expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) - result = df1.append(df2) + result = df1._append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} ) @@ -189,7 +189,7 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) - result = df1.append(df2) + result = df1._append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} ) @@ -198,7 +198,7 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": np.nan}, index=range(1)) df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) - result = df1.append(df2) + result = df1._append(df2) expected = DataFrame( {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} ) @@ -207,7 +207,7 @@ def test_append_dtypes(self): df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) - result = df1.append(df2) + result = df1._append(df2) expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) tm.assert_frame_equal(result, expected) @@ -218,7 +218,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): # GH 30238 tz = tz_naive_fixture df = DataFrame([Timestamp(timestamp, tz=tz)]) - result = df.append(df.iloc[0]).iloc[-1] + result = df._append(df.iloc[0]).iloc[-1] expected = Series(Timestamp(timestamp, tz=tz), name=0) tm.assert_series_equal(result, expected) @@ -234,7 +234,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): ) def test_other_dtypes(self, data, dtype): df = DataFrame(data, dtype=dtype) - result = df.append(df.iloc[0]).iloc[-1] + result = df._append(df.iloc[0]).iloc[-1] expected = Series(data, name=0, dtype=dtype) tm.assert_series_equal(result, expected) @@ -249,7 +249,7 @@ def test_append_numpy_bug_1681(self, dtype): df = DataFrame() other = DataFrame({"A": "foo", "B": index}, index=index) - result = df.append(other) + result = df._append(other) assert (result["B"] == index).all() @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning") @@ -264,7 +264,7 @@ def test_multiindex_column_append_multiple(self): df2 = df.copy() for i in range(1, 10): df[i, "colA"] = 10 - df = df.append(df2, ignore_index=True) + df = df._append(df2, ignore_index=True) result = df["multi"] expected = DataFrame( {"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)} diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 84c642e893e71..0b1d1c4a3d346 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -17,7 +17,6 @@ import pandas._testing as tm -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestAppend: def test_append(self, sort, float_frame): mixed_frame = float_frame.copy() @@ -29,23 +28,23 @@ def test_append(self, sort, float_frame): begin_frame = float_frame.reindex(begin_index) end_frame = float_frame.reindex(end_index) - appended = begin_frame.append(end_frame) + appended = begin_frame._append(end_frame) tm.assert_almost_equal(appended["A"], float_frame["A"]) del end_frame["A"] - partial_appended = begin_frame.append(end_frame, sort=sort) + partial_appended = begin_frame._append(end_frame, sort=sort) assert "A" in partial_appended - partial_appended = end_frame.append(begin_frame, sort=sort) + partial_appended = end_frame._append(begin_frame, sort=sort) assert "A" in partial_appended # mixed type handling - appended = mixed_frame[:5].append(mixed_frame[5:]) + appended = mixed_frame[:5]._append(mixed_frame[5:]) tm.assert_frame_equal(appended, mixed_frame) # what to test here - mixed_appended = mixed_frame[:5].append(float_frame[5:], sort=sort) - mixed_appended2 = float_frame[:5].append(mixed_frame[5:], sort=sort) + mixed_appended = mixed_frame[:5]._append(float_frame[5:], sort=sort) + mixed_appended2 = float_frame[:5]._append(mixed_frame[5:], sort=sort) # all equal except 'foo' column tm.assert_frame_equal( @@ -56,18 +55,18 @@ def test_append(self, sort, float_frame): def test_append_empty(self, float_frame): empty = DataFrame() - appended = float_frame.append(empty) + appended = float_frame._append(empty) tm.assert_frame_equal(float_frame, appended) assert appended is not float_frame - appended = empty.append(float_frame) + appended = empty._append(float_frame) tm.assert_frame_equal(float_frame, appended) assert appended is not float_frame def test_append_overlap_raises(self, float_frame): msg = "Indexes have overlapping values" with pytest.raises(ValueError, match=msg): - float_frame.append(float_frame, verify_integrity=True) + float_frame._append(float_frame, verify_integrity=True) def test_append_new_columns(self): # see gh-6129: new columns @@ -80,13 +79,13 @@ def test_append_new_columns(self): "c": {"z": 7}, } ) - result = df.append(row) + result = df._append(row) tm.assert_frame_equal(result, expected) def test_append_length0_frame(self, sort): df = DataFrame(columns=["A", "B", "C"]) df3 = DataFrame(index=[0, 1], columns=["A", "B"]) - df5 = df.append(df3, sort=sort) + df5 = df._append(df3, sort=sort) expected = DataFrame(index=[0, 1], columns=["A", "B", "C"]) tm.assert_frame_equal(df5, expected) @@ -101,7 +100,7 @@ def test_append_records(self): df1 = DataFrame(arr1) df2 = DataFrame(arr2) - result = df1.append(df2, ignore_index=True) + result = df1._append(df2, ignore_index=True) expected = DataFrame(np.concatenate((arr1, arr2))) tm.assert_frame_equal(result, expected) @@ -110,9 +109,7 @@ def test_append_sorts(self, sort): df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3]) - # GH#35407 - with tm.assert_produces_warning(FutureWarning): - result = df1.append(df2, sort=sort) + result = df1._append(df2, sort=sort) # for None / True expected = DataFrame( @@ -136,7 +133,7 @@ def test_append_different_columns(self, sort): a = df[:5].loc[:, ["bools", "ints", "floats"]] b = df[5:].loc[:, ["strings", "ints", "floats"]] - appended = a.append(b, sort=sort) + appended = a._append(b, sort=sort) assert isna(appended["strings"][0:4]).all() assert isna(appended["bools"][5:]).all() @@ -148,12 +145,12 @@ def test_append_many(self, sort, float_frame): float_frame[15:], ] - result = chunks[0].append(chunks[1:]) + result = chunks[0]._append(chunks[1:]) tm.assert_frame_equal(result, float_frame) chunks[-1] = chunks[-1].copy() chunks[-1]["foo"] = "bar" - result = chunks[0].append(chunks[1:], sort=sort) + result = chunks[0]._append(chunks[1:], sort=sort) tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame) assert (result["foo"][15:] == "bar").all() assert result["foo"][:15].isna().all() @@ -165,7 +162,7 @@ def test_append_preserve_index_name(self): df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"]) df2 = df2.set_index(["A"]) - result = df1.append(df2) + result = df1._append(df2) assert result.index.name == "A" indexes_can_append = [ @@ -196,7 +193,7 @@ def test_append_same_columns_type(self, index): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index) ser_index = index[:2] ser = Series([7, 8], index=ser_index, name=2) - result = df.append(ser) + result = df._append(ser) expected = DataFrame( [[1, 2, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index ) @@ -211,7 +208,7 @@ def test_append_same_columns_type(self, index): index = index[:2] df = DataFrame([[1, 2], [4, 5]], columns=index) ser = Series([7, 8, 9], index=ser_index, name=2) - result = df.append(ser) + result = df._append(ser) expected = DataFrame( [[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]], index=[0, 1, 2], @@ -232,7 +229,7 @@ def test_append_different_columns_types(self, df_columns, series_index): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) ser = Series([7, 8, 9], index=series_index, name=2) - result = df.append(ser) + result = df._append(ser) idx_diff = ser.index.difference(df_columns) combined_columns = Index(df_columns.tolist()).append(idx_diff) expected = DataFrame( @@ -289,7 +286,7 @@ def test_append_dtype_coerce(self, sort): axis=1, sort=sort, ) - result = df1.append(df2, ignore_index=True, sort=sort) + result = df1._append(df2, ignore_index=True, sort=sort) if sort: expected = expected[["end_time", "start_time"]] else: @@ -301,7 +298,7 @@ def test_append_missing_column_proper_upcast(self, sort): df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")}) df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)}) - appended = df1.append(df2, ignore_index=True, sort=sort) + appended = df1._append(df2, ignore_index=True, sort=sort) assert appended["A"].dtype == "f8" assert appended["B"].dtype == "O" @@ -310,7 +307,7 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc()) ser = Series({"a": 1.0, "b": 2.0, "date": date}) df = DataFrame(columns=["c", "d"]) - result_a = df.append(ser, ignore_index=True) + result_a = df._append(ser, ignore_index=True) expected = DataFrame( [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"] ) @@ -324,10 +321,10 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): ) expected["c"] = expected["c"].astype(object) expected["d"] = expected["d"].astype(object) - result_b = result_a.append(ser, ignore_index=True) + result_b = result_a._append(ser, ignore_index=True) tm.assert_frame_equal(result_b, expected) - result = df.append([ser, ser], ignore_index=True) + result = df._append([ser, ser], ignore_index=True) tm.assert_frame_equal(result, expected) def test_append_empty_tz_frame_with_datetime64ns(self): @@ -335,20 +332,20 @@ def test_append_empty_tz_frame_with_datetime64ns(self): df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") # pd.NaT gets inferred as tz-naive, so append result is tz-naive - result = df.append({"a": pd.NaT}, ignore_index=True) + result = df._append({"a": pd.NaT}, ignore_index=True) expected = DataFrame({"a": [pd.NaT]}).astype(object) tm.assert_frame_equal(result, expected) # also test with typed value to append df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") other = Series({"a": pd.NaT}, dtype="datetime64[ns]") - result = df.append(other, ignore_index=True) + result = df._append(other, ignore_index=True) expected = DataFrame({"a": [pd.NaT]}).astype(object) tm.assert_frame_equal(result, expected) # mismatched tz other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]") - result = df.append(other, ignore_index=True) + result = df._append(other, ignore_index=True) expected = DataFrame({"a": [pd.NaT]}).astype(object) tm.assert_frame_equal(result, expected) @@ -361,7 +358,7 @@ def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str, val): df = DataFrame(columns=["a"]).astype(dtype_str) other = DataFrame({"a": [np.timedelta64(val, "ns")]}) - result = df.append(other, ignore_index=True) + result = df._append(other, ignore_index=True) expected = other.astype(object) tm.assert_frame_equal(result, expected) @@ -375,7 +372,7 @@ def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val): df = DataFrame({"a": pd.array([1], dtype=dtype_str)}) other = DataFrame({"a": [np.timedelta64(val, "ns")]}) - result = df.append(other, ignore_index=True) + result = df._append(other, ignore_index=True) expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 004951c8e4811..22c1e603af08e 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -178,12 +178,16 @@ def test_dups_index(self): tm.assert_frame_equal(result.iloc[10:], df) # append - result = concat([df.iloc[0:8, :], df.iloc[8:]]) + result = df.iloc[0:8, :]._append(df.iloc[8:]) tm.assert_frame_equal(result, df) - result = concat([df.iloc[0:8, :], df.iloc[8:9], df.iloc[9:10]]) + result = df.iloc[0:8, :]._append(df.iloc[8:9])._append(df.iloc[9:10]) tm.assert_frame_equal(result, df) + expected = concat([df, df], axis=0) + result = df._append(df) + tm.assert_frame_equal(result, expected) + class TestMultiIndexConcat: def test_concat_multiindex_with_keys(self, multiindex_dataframe_random_data): From b673e4bfcc7dfbdb0452d64e920f42a609f1b1e1 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Wed, 8 Dec 2021 12:28:55 +0000 Subject: [PATCH 25/29] Extract variable and improve variable name --- pandas/tests/generic/test_duplicate_labels.py | 5 +++-- pandas/tests/indexing/test_partial.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index dddcc8efe3627..1c0ae46aa5500 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -295,10 +295,11 @@ def test_setting_allows_duplicate_labels_raises(self, data): assert data.flags.allows_duplicate_labels is True def test_series_raises(self): - s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + a = pd.Series(0, index=["a", "b"]) + b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): - pd.concat([pd.Series(0, index=["a", "b"]), s]) + pd.concat([a, b]) @pytest.mark.parametrize( "getter, target", diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index f226ce1001aad..8251f09b97062 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -538,8 +538,8 @@ def test_partial_set_invalid(self): # allow object conversion here df = orig.copy() df.loc["a", :] = df.iloc[0] - s = Series(df.iloc[0], name="a") - exp = pd.concat([orig, DataFrame(s).T.infer_objects()]) + ser = Series(df.iloc[0], name="a") + exp = pd.concat([orig, DataFrame(ser).T.infer_objects()]) tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object" From e0492bc2a9fe65dc7c1aa26fa70876155c67bc7b Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Wed, 8 Dec 2021 21:43:36 +0000 Subject: [PATCH 26/29] Introduce Series._append and use in test_append and frame.py --- pandas/core/frame.py | 4 +- pandas/core/series.py | 5 +++ pandas/tests/series/methods/test_append.py | 52 +++++++++++----------- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7ba0c57d523ce..42fb912e0b39e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3267,12 +3267,10 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: index=self.columns, ) if index: - from pandas.core.reshape.concat import concat - index_memory_usage = self._constructor_sliced( self.index.memory_usage(deep=deep), index=["Index"] ) - result = concat([index_memory_usage, result]) + result = index_memory_usage._append(result) return result def transpose(self, *args, copy: bool = False) -> DataFrame: diff --git a/pandas/core/series.py b/pandas/core/series.py index b414c3c5ca428..0eabd7c8f47c4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2902,6 +2902,11 @@ def append( stacklevel=find_stack_level(), ) + return self._append(to_append, ignore_index, verify_integrity) + + def _append( + self, to_append, ignore_index: bool = False, verify_integrity: bool = False + ): from pandas.core.reshape.concat import concat if isinstance(to_append, (list, tuple)): diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index 7cee9ed9518e8..6f8852ade6408 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -13,14 +13,13 @@ import pandas._testing as tm -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestSeriesAppend: def test_append_preserve_name(self, datetime_series): - result = datetime_series[:5].append(datetime_series[5:]) + result = datetime_series[:5]._append(datetime_series[5:]) assert result.name == datetime_series.name def test_append(self, datetime_series, string_series, object_series): - appended_series = string_series.append(object_series) + appended_series = string_series._append(object_series) for idx, value in appended_series.items(): if idx in string_series.index: assert value == string_series[idx] @@ -31,12 +30,12 @@ def test_append(self, datetime_series, string_series, object_series): msg = "Indexes have overlapping values:" with pytest.raises(ValueError, match=msg): - datetime_series.append(datetime_series, verify_integrity=True) + datetime_series._append(datetime_series, verify_integrity=True) def test_append_many(self, datetime_series): pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] - result = pieces[0].append(pieces[1:]) + result = pieces[0]._append(pieces[1:]) tm.assert_series_equal(result, datetime_series) def test_append_duplicates(self): @@ -44,13 +43,13 @@ def test_append_duplicates(self): s1 = Series([1, 2, 3]) s2 = Series([4, 5, 6]) exp = Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) - tm.assert_series_equal(s1.append(s2), exp) + tm.assert_series_equal(s1._append(s2), exp) tm.assert_series_equal(pd.concat([s1, s2]), exp) # the result must have RangeIndex exp = Series([1, 2, 3, 4, 5, 6]) tm.assert_series_equal( - s1.append(s2, ignore_index=True), exp, check_index_type=True + s1._append(s2, ignore_index=True), exp, check_index_type=True ) tm.assert_series_equal( pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True @@ -58,7 +57,7 @@ def test_append_duplicates(self): msg = "Indexes have overlapping values:" with pytest.raises(ValueError, match=msg): - s1.append(s2, verify_integrity=True) + s1._append(s2, verify_integrity=True) with pytest.raises(ValueError, match=msg): pd.concat([s1, s2], verify_integrity=True) @@ -68,8 +67,8 @@ def test_append_tuples(self): list_input = [s, s] tuple_input = (s, s) - expected = s.append(list_input) - result = s.append(tuple_input) + expected = s._append(list_input) + result = s._append(tuple_input) tm.assert_series_equal(expected, result) @@ -79,9 +78,9 @@ def test_append_dataframe_raises(self): msg = "to_append should be a Series or list/tuple of Series, got DataFrame" with pytest.raises(TypeError, match=msg): - df.A.append(df) + df.A._append(df) with pytest.raises(TypeError, match=msg): - df.A.append([df]) + df.A._append([df]) def test_append_raises_future_warning(self): # GH#35407 @@ -89,15 +88,14 @@ def test_append_raises_future_warning(self): Series([1, 2]).append(Series([3, 4])) -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestSeriesAppendWithDatetimeIndex: def test_append(self): rng = date_range("5/8/2012 1:45", periods=10, freq="5T") ts = Series(np.random.randn(len(rng)), rng) df = DataFrame(np.random.randn(len(rng), 4), index=rng) - result = ts.append(ts) - result_df = df.append(df) + result = ts._append(ts) + result_df = df._append(df) ex_index = DatetimeIndex(np.tile(rng.values, 2)) tm.assert_index_equal(result.index, ex_index) tm.assert_index_equal(result_df.index, ex_index) @@ -128,8 +126,8 @@ def test_append_tz(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - result = ts.append(ts2) - result_df = df.append(df2) + result = ts._append(ts2) + result_df = df._append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -154,8 +152,8 @@ def test_append_tz_explicit_pytz(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - result = ts.append(ts2) - result_df = df.append(df2) + result = ts._append(ts2) + result_df = df._append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -178,8 +176,8 @@ def test_append_tz_dateutil(self): ts2 = Series(np.random.randn(len(rng2)), rng2) df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - result = ts.append(ts2) - result_df = df.append(df2) + result = ts._append(ts2) + result_df = df._append(df2) tm.assert_index_equal(result.index, rng3) tm.assert_index_equal(result_df.index, rng3) @@ -191,7 +189,7 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) + ts_result = ser1._append(ser2) exp_index = DatetimeIndex( ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern", freq="H" @@ -204,7 +202,7 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) + ts_result = ser1._append(ser2) exp_index = DatetimeIndex( ["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC", freq="H" @@ -220,7 +218,7 @@ def test_series_append_aware(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") ser1 = Series([1], index=rng1) ser2 = Series([2], index=rng2) - ts_result = ser1.append(ser2) + ts_result = ser1._append(ser2) exp_index = Index( [ Timestamp("1/1/2011 01:00", tz="US/Eastern"), @@ -235,7 +233,7 @@ def test_series_append_aware_naive(self): rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") ser1 = Series(np.random.randn(len(rng1)), index=rng1) ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1.append(ser2) + ts_result = ser1._append(ser2) expected = ser1.index.astype(object).append(ser2.index.astype(object)) assert ts_result.index.equals(expected) @@ -245,7 +243,7 @@ def test_series_append_aware_naive(self): rng2 = range(100) ser1 = Series(np.random.randn(len(rng1)), index=rng1) ser2 = Series(np.random.randn(len(rng2)), index=rng2) - ts_result = ser1.append(ser2) + ts_result = ser1._append(ser2) expected = ser1.index.astype(object).append(ser2.index) assert ts_result.index.equals(expected) @@ -255,7 +253,7 @@ def test_series_append_dst(self): rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") ser1 = Series([1, 2, 3], index=rng1) ser2 = Series([10, 11, 12], index=rng2) - ts_result = ser1.append(ser2) + ts_result = ser1._append(ser2) exp_index = DatetimeIndex( [ From 569abb1c6b4a34f5464dc2826b02ad2d85d48e15 Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Wed, 8 Dec 2021 22:01:02 +0000 Subject: [PATCH 27/29] Catch two more tests that should test _append --- .../reshape/concat/test_append_common.py | 111 +++++++++--------- .../tests/reshape/concat/test_categorical.py | 3 + 2 files changed, 58 insertions(+), 56 deletions(-) diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index a43ed51ec8903..9c69f6fd3a7e4 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -11,7 +11,6 @@ import pandas._testing as tm -@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning") class TestConcatAppendCommon: """ Test common dtype coercion rules between concat and append. @@ -128,7 +127,7 @@ def test_concatlike_same_dtypes(self): # ----- Series ----- # # series.append - res = Series(vals1).append(Series(vals2), ignore_index=True) + res = Series(vals1)._append(Series(vals2), ignore_index=True) exp = Series(exp_data) tm.assert_series_equal(res, exp, check_index_type=True) @@ -137,7 +136,7 @@ def test_concatlike_same_dtypes(self): tm.assert_series_equal(res, exp, check_index_type=True) # 3 elements - res = Series(vals1).append( + res = Series(vals1)._append( [Series(vals2), Series(vals3)], ignore_index=True ) exp = Series(exp_data3) @@ -152,7 +151,7 @@ def test_concatlike_same_dtypes(self): # name mismatch s1 = Series(vals1, name="x") s2 = Series(vals2, name="y") - res = s1.append(s2, ignore_index=True) + res = s1._append(s2, ignore_index=True) exp = Series(exp_data) tm.assert_series_equal(res, exp, check_index_type=True) @@ -162,7 +161,7 @@ def test_concatlike_same_dtypes(self): # name match s1 = Series(vals1, name="x") s2 = Series(vals2, name="x") - res = s1.append(s2, ignore_index=True) + res = s1._append(s2, ignore_index=True) exp = Series(exp_data, name="x") tm.assert_series_equal(res, exp, check_index_type=True) @@ -175,10 +174,10 @@ def test_concatlike_same_dtypes(self): "only Series and DataFrame objs are valid" ) with pytest.raises(TypeError, match=msg): - Series(vals1).append(vals2) + Series(vals1)._append(vals2) with pytest.raises(TypeError, match=msg): - Series(vals1).append([Series(vals2), vals3]) + Series(vals1)._append([Series(vals2), vals3]) with pytest.raises(TypeError, match=msg): pd.concat([Series(vals1), vals2]) @@ -237,8 +236,8 @@ def test_concatlike_dtypes_coercion(self): # ----- Series ----- # - # series.append - res = Series(vals1).append(Series(vals2), ignore_index=True) + # series._append + res = Series(vals1)._append(Series(vals2), ignore_index=True) exp = Series(exp_data, dtype=exp_series_dtype) tm.assert_series_equal(res, exp, check_index_type=True) @@ -247,7 +246,7 @@ def test_concatlike_dtypes_coercion(self): tm.assert_series_equal(res, exp, check_index_type=True) # 3 elements - res = Series(vals1).append( + res = Series(vals1)._append( [Series(vals2), Series(vals3)], ignore_index=True ) exp = Series(exp_data3, dtype=exp_series_dtype) @@ -281,7 +280,7 @@ def test_concatlike_common_coerce_to_pandas_object(self): dts = Series(dti) tds = Series(tdi) - res = dts.append(tds) + res = dts._append(tds) tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) assert isinstance(res.iloc[0], pd.Timestamp) assert isinstance(res.iloc[-1], pd.Timedelta) @@ -306,7 +305,7 @@ def test_concatlike_datetimetz(self, tz_aware_fixture): dts1 = Series(dti1) dts2 = Series(dti2) - res = dts1.append(dts2) + res = dts1._append(dts2) tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) res = pd.concat([dts1, dts2]) @@ -326,7 +325,7 @@ def test_concatlike_datetimetz_short(self, tz): ) exp = DataFrame(0, index=exp_idx, columns=["A", "B"]) - tm.assert_frame_equal(df1.append(df2), exp) + tm.assert_frame_equal(df1._append(df2), exp) tm.assert_frame_equal(pd.concat([df1, df2]), exp) def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): @@ -352,7 +351,7 @@ def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): dts1 = Series(dti1) dts2 = Series(dti2) - res = dts1.append(dts2) + res = dts1._append(dts2) tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) res = pd.concat([dts1, dts2]) @@ -376,7 +375,7 @@ def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): dts1 = Series(dti1) dts3 = Series(dti3) - res = dts1.append(dts3) + res = dts1._append(dts3) tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) res = pd.concat([dts1, dts3]) @@ -394,7 +393,7 @@ def test_concatlike_common_period(self): ps1 = Series(pi1) ps2 = Series(pi2) - res = ps1.append(ps2) + res = ps1._append(ps2) tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) res = pd.concat([ps1, ps2]) @@ -420,7 +419,7 @@ def test_concatlike_common_period_diff_freq_to_object(self): ps1 = Series(pi1) ps2 = Series(pi2) - res = ps1.append(ps2) + res = ps1._append(ps2) tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) res = pd.concat([ps1, ps2]) @@ -446,7 +445,7 @@ def test_concatlike_common_period_mixed_dt_to_object(self): ps1 = Series(pi1) tds = Series(tdi) - res = ps1.append(tds) + res = ps1._append(tds) tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) res = pd.concat([ps1, tds]) @@ -468,7 +467,7 @@ def test_concatlike_common_period_mixed_dt_to_object(self): ps1 = Series(pi1) tds = Series(tdi) - res = tds.append(ps1) + res = tds._append(ps1) tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1])) res = pd.concat([tds, ps1]) @@ -483,7 +482,7 @@ def test_concat_categorical(self): exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category") tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) # partially different categories => not-category s1 = Series([3, 2], dtype="category") @@ -491,7 +490,7 @@ def test_concat_categorical(self): exp = Series([3, 2, 2, 1]) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) # completely different categories (same dtype) => not-category s1 = Series([10, 11, np.nan], dtype="category") @@ -499,7 +498,7 @@ def test_concat_categorical(self): exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object") tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) def test_union_categorical_same_categories_different_order(self): # https://github.com/pandas-dev/pandas/issues/19096 @@ -520,12 +519,12 @@ def test_concat_categorical_coercion(self): exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object") tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) # result shouldn't be affected by 1st elem dtype exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object") tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) # all values are not in category => not-category s1 = Series([3, 2], dtype="category") @@ -533,11 +532,11 @@ def test_concat_categorical_coercion(self): exp = Series([3, 2, 2, 1]) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) exp = Series([2, 1, 3, 2]) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) # completely different categories => not-category s1 = Series([10, 11, np.nan], dtype="category") @@ -545,11 +544,11 @@ def test_concat_categorical_coercion(self): exp = Series([10, 11, np.nan, 1, 3, 2], dtype="object") tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) exp = Series([1, 3, 2, 10, 11, np.nan], dtype="object") tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) # different dtype => not-category s1 = Series([10, 11, np.nan], dtype="category") @@ -557,11 +556,11 @@ def test_concat_categorical_coercion(self): exp = Series([10, 11, np.nan, "a", "b", "c"]) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) exp = Series(["a", "b", "c", 10, 11, np.nan]) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) # if normal series only contains NaN-likes => not-category s1 = Series([10, 11], dtype="category") @@ -569,11 +568,11 @@ def test_concat_categorical_coercion(self): exp = Series([10, 11, np.nan, np.nan, np.nan]) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) exp = Series([np.nan, np.nan, np.nan, 10, 11]) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) def test_concat_categorical_3elem_coercion(self): # GH 13524 @@ -585,11 +584,11 @@ def test_concat_categorical_3elem_coercion(self): exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float") tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) - tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float") tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) # values are all in either category => not-category s1 = Series([4, 5, 6], dtype="category") @@ -598,11 +597,11 @@ def test_concat_categorical_3elem_coercion(self): exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4]) tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) - tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3]) tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) # values are all in either category => not-category s1 = Series([4, 5, 6], dtype="category") @@ -611,11 +610,11 @@ def test_concat_categorical_3elem_coercion(self): exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12]) tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) - tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp) exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3]) tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp) def test_concat_categorical_multi_coercion(self): # GH 13524 @@ -631,13 +630,13 @@ def test_concat_categorical_multi_coercion(self): exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2]) res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True) tm.assert_series_equal(res, exp) - res = s1.append([s2, s3, s4, s5, s6], ignore_index=True) + res = s1._append([s2, s3, s4, s5, s6], ignore_index=True) tm.assert_series_equal(res, exp) exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3]) res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True) tm.assert_series_equal(res, exp) - res = s6.append([s5, s4, s3, s2, s1], ignore_index=True) + res = s6._append([s5, s4, s3, s2, s1], ignore_index=True) tm.assert_series_equal(res, exp) def test_concat_categorical_ordered(self): @@ -648,11 +647,11 @@ def test_concat_categorical_ordered(self): exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True)) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True)) tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s1._append([s2, s1], ignore_index=True), exp) def test_concat_categorical_coercion_nan(self): # GH 13524 @@ -664,14 +663,14 @@ def test_concat_categorical_coercion_nan(self): exp = Series([np.nan, np.nan, np.nan, 1]) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) s1 = Series([1, np.nan], dtype="category") s2 = Series([np.nan, np.nan]) exp = Series([1, np.nan, np.nan, np.nan], dtype="float") tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) # mixed dtype, all nan-likes => not-category s1 = Series([np.nan, np.nan], dtype="category") @@ -679,9 +678,9 @@ def test_concat_categorical_coercion_nan(self): exp = Series([np.nan, np.nan, np.nan, np.nan]) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) # all category nan-likes => category s1 = Series([np.nan, np.nan], dtype="category") @@ -690,7 +689,7 @@ def test_concat_categorical_coercion_nan(self): exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category") tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) def test_concat_categorical_empty(self): # GH 13524 @@ -699,25 +698,25 @@ def test_concat_categorical_empty(self): s2 = Series([1, 2], dtype="category") tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) - tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) + tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) - tm.assert_series_equal(s2.append(s1, ignore_index=True), s2) + tm.assert_series_equal(s2._append(s1, ignore_index=True), s2) s1 = Series([], dtype="category") s2 = Series([], dtype="category") tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) - tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) + tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) s1 = Series([], dtype="category") s2 = Series([], dtype="object") # different dtype => not-category tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) - tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) + tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) - tm.assert_series_equal(s2.append(s1, ignore_index=True), s2) + tm.assert_series_equal(s2._append(s1, ignore_index=True), s2) s1 = Series([], dtype="category") s2 = Series([np.nan, np.nan]) @@ -725,10 +724,10 @@ def test_concat_categorical_empty(self): # empty Series is ignored exp = Series([np.nan, np.nan]) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) def test_categorical_concat_append(self): cat = Categorical(["a", "b"], categories=["a", "b"]) @@ -739,7 +738,7 @@ def test_categorical_concat_append(self): exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1])) tm.assert_frame_equal(pd.concat([df, df]), exp) - tm.assert_frame_equal(df.append(df), exp) + tm.assert_frame_equal(df._append(df), exp) # GH 13524 can concat different categories cat3 = Categorical(["a", "b"], categories=["a", "b", "c"]) @@ -750,5 +749,5 @@ def test_categorical_concat_append(self): exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]}) tm.assert_frame_equal(res, exp) - res = df.append(df_different_categories, ignore_index=True) + res = df._append(df_different_categories, ignore_index=True) tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py index 9beef8838eee9..93197a1814077 100644 --- a/pandas/tests/reshape/concat/test_categorical.py +++ b/pandas/tests/reshape/concat/test_categorical.py @@ -200,6 +200,9 @@ def test_categorical_concat_gh7864(self): dfx = pd.concat([df1, df2]) tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories) + dfa = df1._append(df2) + tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories) + def test_categorical_index_upcast(self): # GH 17629 # test upcasting to object when concatinating on categorical indexes From c43ea638c6dee844f816d473547111c9868560cf Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Wed, 8 Dec 2021 23:32:01 +0000 Subject: [PATCH 28/29] Revert append -> concat and use _append internally instead --- pandas/core/frame.py | 7 +++---- pandas/core/reshape/pivot.py | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 42fb912e0b39e..5b7d36719bd6f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9744,10 +9744,9 @@ def c(x): idx_diff = result_index.difference(correl.index) if len(idx_diff) > 0: - from pandas.core.reshape.concat import concat - - nan_correl = Series([np.nan] * len(idx_diff), index=idx_diff) - correl = concat([correl, nan_correl]) + correl = correl._append( + Series([np.nan] * len(idx_diff), index=idx_diff) + ) return correl diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 09353bb5db206..7ac953248df9f 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -289,7 +289,7 @@ def _add_margins( if not values and isinstance(table, ABCSeries): # If there are no values and the table is a series, then there is only # one column in the data. Compute grand margin and return it. - return concat([table, Series({key: grand_margin[margins_name]})]) + return table._append(Series({key: grand_margin[margins_name]})) elif values: marginal_result_set = _generate_marginal_results( @@ -327,7 +327,7 @@ def _add_margins( margin_dummy[cols] = margin_dummy[cols].apply( maybe_downcast_to_dtype, args=(dtype,) ) - result = concat([result, margin_dummy]) + result = result._append(margin_dummy) result.index.names = row_names return result @@ -740,7 +740,7 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): elif normalize == "index": index_margin = index_margin / index_margin.sum() - table = concat([table, index_margin.to_frame().T]) + table = table._append(index_margin) table = table.fillna(0) table.index = table_index @@ -749,7 +749,7 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): index_margin = index_margin / index_margin.sum() index_margin.loc[margins_name] = 1 table = concat([table, column_margin], axis=1) - table = concat([table, index_margin.to_frame().T]) + table = table._append(index_margin) table = table.fillna(0) table.index = table_index From f6586c8f42fc4230d4339eeb92a3dff0134b1def Mon Sep 17 00:00:00 2001 From: Gesa Stupperich Date: Sat, 25 Dec 2021 16:12:13 +0000 Subject: [PATCH 29/29] Remove in-place append of single rows from the docs --- doc/source/user_guide/merging.rst | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 2caec8569aa00..bbca5773afdfe 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -407,19 +407,12 @@ like GroupBy where the order of a categorical variable is meaningful. Appending rows to a DataFrame ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -You can append a single row in form of a series to a ``DataFrame`` in-place -using ``loc``. +If you have a series that you want to append as a single row to a ``DataFrame``, you can convert the row into a +``DataFrame`` and use ``concat`` .. ipython:: python s2 = pd.Series(["X0", "X1", "X2", "X3"], index=["A", "B", "C", "D"]) - df1.loc[len(df1)] = s2 - -Alternatively, you can convert the row into a DataFrame and use ``concat``, -which doesn't have a side-effect on df1. - -.. ipython:: python - result = pd.concat([df1, s2.to_frame().T], ignore_index=True) .. ipython:: python