From c28529051ea07800792889a84cd7ffb34d9f5538 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 12 Mar 2019 15:35:05 +0000 Subject: [PATCH 1/2] TST/CLN: empty DataFrames and some 'empty' Series --- pandas/tests/frame/test_arithmetic.py | 2 +- pandas/tests/frame/test_combine_concat.py | 10 ++--- pandas/tests/frame/test_constructors.py | 39 ++++++++++++++----- pandas/tests/frame/test_reshape.py | 2 +- pandas/tests/groupby/test_function.py | 4 +- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/groupby/test_grouping.py | 2 +- pandas/tests/indexing/common.py | 4 +- .../tests/io/json/test_json_table_schema.py | 4 +- pandas/tests/io/json/test_pandas.py | 2 +- pandas/tests/io/parser/test_common.py | 8 ++-- pandas/tests/io/parser/test_index_col.py | 4 +- pandas/tests/resample/test_period_index.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 10 ++--- pandas/tests/reshape/test_concat.py | 4 +- pandas/tests/series/test_constructors.py | 26 ++++++++++++- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_strings.py | 4 +- 18 files changed, 89 insertions(+), 44 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f14ecae448723..a4c05bd5b2f80 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -437,7 +437,7 @@ def test_arith_flex_zero_len_raises(self): # GH 19522 passing fill_value to frame flex arith methods should # raise even in the zero-length special cases ser_len0 = pd.Series([]) - df_len0 = pd.DataFrame([], columns=['A', 'B']) + df_len0 = pd.DataFrame(columns=['A', 'B']) df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) with pytest.raises(NotImplementedError, match='fill_value'): diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index c803d15a690c4..ed7403093580a 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -172,8 +172,8 @@ def test_append_list_of_series_dicts(self): def test_append_empty_dataframe(self): # Empty df append empty df - df1 = DataFrame([]) - df2 = DataFrame([]) + df1 = DataFrame() + df2 = DataFrame() result = df1.append(df2) expected = df1.copy() assert_frame_equal(result, expected) @@ -576,10 +576,10 @@ def test_combine_first(self, float_frame): assert_series_equal(combined['A'].reindex(g.index), g['A']) # corner cases - comb = float_frame.combine_first(DataFrame({})) + comb = float_frame.combine_first(DataFrame()) assert_frame_equal(comb, float_frame) - comb = DataFrame({}).combine_first(float_frame) + comb = DataFrame().combine_first(float_frame) assert_frame_equal(comb, float_frame) comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) @@ -587,7 +587,7 @@ def test_combine_first(self, float_frame): # #2525 df = DataFrame({'a': [1]}, index=[datetime(2012, 1, 1)]) - df2 = DataFrame({}, columns=['b']) + df2 = DataFrame(columns=['b']) result = df.combine_first(df2) assert 'b' in result diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1d5cbfec8de52..4499965f8a3eb 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -32,12 +32,33 @@ class TestDataFrameConstructors(TestData): - def test_constructor(self): - df = DataFrame() - assert len(df.index) == 0 - - df = DataFrame(data={}) - assert len(df.index) == 0 + @pytest.mark.parametrize('constructor', [ + lambda: DataFrame(), + lambda: DataFrame(None), + lambda: DataFrame({}), + lambda: DataFrame(()), + lambda: DataFrame([]), + lambda: DataFrame((x for x in [])), + lambda: DataFrame(data=None), + lambda: DataFrame(data={}), + lambda: DataFrame(data=()), + lambda: DataFrame(data=[]), + lambda: DataFrame(data=(x for x in [])), + # these are NOT empty DataFrames + pytest.param(lambda: DataFrame([[]]), marks=pytest.mark.xfail( + reason='creates a non-zero length RangeIndex')), + pytest.param(lambda: DataFrame([[], []]), marks=pytest.mark.xfail( + reason='creates a non-zero length RangeIndex')), + pytest.param(lambda: DataFrame([(x for x in [])]), + marks=pytest.mark.xfail( + reason='creates a non-zero length RangeIndex')) + ]) + def test_empty_constructor(self, constructor): + expected = DataFrame() + result = constructor() + assert len(result.index) == 0 + assert len(result.columns) == 0 + tm.assert_frame_equal(result, expected) def test_constructor_mixed(self): index, data = tm.getMixedTypeDict() @@ -95,7 +116,7 @@ def test_constructor_dtype_list_data(self): def test_constructor_list_frames(self): # see gh-3243 - result = DataFrame([DataFrame([])]) + result = DataFrame([DataFrame()]) assert result.shape == (1, 0) result = DataFrame([DataFrame(dict(A=lrange(5)))]) @@ -265,7 +286,7 @@ def test_constructor_dict(self): frame = DataFrame({}, index=idx) assert frame.index is idx - # empty with index and columns + # empty dict with index and columns idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx, columns=idx) assert frame.index is idx @@ -1122,7 +1143,7 @@ def test_constructor_list_of_series(self): result2 = DataFrame(data, index=np.arange(6)) tm.assert_frame_equal(result, result2) - result = DataFrame([Series({})]) + result = DataFrame([Series()]) expected = DataFrame(index=[0]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 8abf3a6706886..f9a0916ccdfb7 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -56,7 +56,7 @@ def test_pivot_duplicates(self): data.pivot('a', 'b', 'c') def test_pivot_empty(self): - df = DataFrame({}, columns=['a', 'b', 'c']) + df = DataFrame(columns=['a', 'b', 'c']) result = df.pivot('a', 'b', 'c') expected = DataFrame() tm.assert_frame_equal(result, expected, check_names=False) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index b5e328ef64424..dc57989094e52 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1064,8 +1064,8 @@ def test_size(df): tm.assert_series_equal(left, right, check_names=False) # GH11699 - df = DataFrame([], columns=['A', 'B']) - out = Series([], dtype='int64', index=Index([], name='A')) + df = DataFrame(columns=['A', 'B']) + out = Series(dtype='int64', index=Index([], name='A')) tm.assert_series_equal(df.groupby('A').size(), out) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c062fb90ca43b..7ab5b6b4b3b02 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -298,7 +298,7 @@ def f1(x): if y.empty: multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2, names=['b', 'c']) - res = DataFrame(None, columns=['a'], index=multiindex) + res = DataFrame(columns=['a'], index=multiindex) return res else: y = y.set_index(['b', 'c']) @@ -317,7 +317,7 @@ def f3(x): if y.empty: multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2, names=['foo', 'bar']) - res = DataFrame(None, columns=['a', 'b'], index=multiindex) + res = DataFrame(columns=['a', 'b'], index=multiindex) return res else: return y diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 44b5bd5f13992..787ea30abc2a2 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -643,7 +643,7 @@ def test_groupby_with_single_column(self): df = pd.DataFrame({'a': list('abssbab')}) tm.assert_frame_equal(df.groupby('a').get_group('a'), df.iloc[[0, 5]]) # GH 13530 - exp = pd.DataFrame([], index=pd.Index(['a', 'b', 's'], name='a')) + exp = pd.DataFrame(index=pd.Index(['a', 'b', 's'], name='a')) tm.assert_frame_equal(df.groupby('a').count(), exp) tm.assert_frame_equal(df.groupby('a').sum(), exp) tm.assert_frame_equal(df.groupby('a').nth(1), exp) diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 1b74eeea1a8c3..59415de57f95e 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -85,8 +85,8 @@ def setup_method(self, method): self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev) - self.frame_empty = DataFrame({}) - self.series_empty = Series({}) + self.frame_empty = DataFrame() + self.series_empty = Series() # form agglomerates for o in self._objs: diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 351b495e5d8fc..941ac1943375d 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -459,7 +459,7 @@ def test_set_names_unset(self, idx, nm, prop): ]) def test_warns_non_roundtrippable_names(self, idx): # GH 19130 - df = pd.DataFrame([[]], index=idx) + df = pd.DataFrame(index=idx) df.index.name = 'index' with tm.assert_produces_warning(): set_default_names(df) @@ -566,7 +566,7 @@ def test_multiindex(self, index_names): def test_empty_frame_roundtrip(self): # GH 21287 - df = pd.DataFrame([], columns=['a', 'b', 'c']) + df = pd.DataFrame(columns=['a', 'b', 'c']) expected = df.copy() out = df.to_json(orient='table') result = pd.read_json(out, orient='table') diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ed598b730d960..0ef7fa1ee6306 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -54,7 +54,7 @@ def setup(self, datapath): self.objSeries.name = 'objects' self.empty_series = Series([], index=[]) - self.empty_frame = DataFrame({}) + self.empty_frame = DataFrame() self.frame = _frame.copy() self.frame2 = _frame2.copy() diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 05da171d7dc31..607c7db122405 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -1151,7 +1151,7 @@ def test_empty_with_index(all_parsers): parser = all_parsers result = parser.read_csv(StringIO(data), index_col=0) - expected = DataFrame([], columns=["y"], index=Index([], name="x")) + expected = DataFrame(columns=["y"], index=Index([], name="x")) tm.assert_frame_equal(result, expected) @@ -1161,7 +1161,7 @@ def test_empty_with_multi_index(all_parsers): parser = all_parsers result = parser.read_csv(StringIO(data), index_col=["x", "y"]) - expected = DataFrame([], columns=["z"], + expected = DataFrame(columns=["z"], index=MultiIndex.from_arrays( [[]] * 2, names=["x", "y"])) tm.assert_frame_equal(result, expected) @@ -1172,7 +1172,7 @@ def test_empty_with_reversed_multi_index(all_parsers): parser = all_parsers result = parser.read_csv(StringIO(data), index_col=[1, 0]) - expected = DataFrame([], columns=["z"], + expected = DataFrame(columns=["z"], index=MultiIndex.from_arrays( [[]] * 2, names=["y", "x"])) tm.assert_frame_equal(result, expected) @@ -1284,7 +1284,7 @@ def test_numeric_range_too_wide(all_parsers, exp_data): def test_empty_with_nrows_chunksize(all_parsers, iterator): # see gh-9535 parser = all_parsers - expected = DataFrame([], columns=["foo", "bar"]) + expected = DataFrame(columns=["foo", "bar"]) nrows = 10 data = StringIO("foo,bar\n") diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 6421afba18f94..b1e02df693932 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -105,7 +105,7 @@ def test_index_col_empty_data(all_parsers, index_col, kwargs): parser = all_parsers result = parser.read_csv(StringIO(data), index_col=index_col) - expected = DataFrame([], **kwargs) + expected = DataFrame(**kwargs) tm.assert_frame_equal(result, expected) @@ -115,7 +115,7 @@ def test_empty_with_index_col_false(all_parsers): parser = all_parsers result = parser.read_csv(StringIO(data), index_col=False) - expected = DataFrame([], columns=["x", "y"]) + expected = DataFrame(columns=["x", "y"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 8abdf9034527b..8f2923f17acce 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -715,7 +715,7 @@ def test_resample_with_only_nat(self): pi = PeriodIndex([pd.NaT] * 3, freq='S') frame = DataFrame([2, 3, 5], index=pi) expected_index = PeriodIndex(data=[], freq=pi.freq) - expected = DataFrame([], index=expected_index) + expected = DataFrame(index=expected_index) result = frame.resample('1s').mean() assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 7a97368504fd6..2a3ed25ea9346 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -386,10 +386,10 @@ def test_left_merge_empty_dataframe(self): dict(left_on='a', right_on='x')]) def test_merge_left_empty_right_empty(self, join_type, kwarg): # GH 10824 - left = pd.DataFrame([], columns=['a', 'b', 'c']) - right = pd.DataFrame([], columns=['x', 'y', 'z']) + left = pd.DataFrame(columns=['a', 'b', 'c']) + right = pd.DataFrame(columns=['x', 'y', 'z']) - exp_in = pd.DataFrame([], columns=['a', 'b', 'c', 'x', 'y', 'z'], + exp_in = pd.DataFrame(columns=['a', 'b', 'c', 'x', 'y', 'z'], index=pd.Index([], dtype=object), dtype=object) @@ -398,7 +398,7 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg): def test_merge_left_empty_right_notempty(self): # GH 10824 - left = pd.DataFrame([], columns=['a', 'b', 'c']) + left = pd.DataFrame(columns=['a', 'b', 'c']) right = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['x', 'y', 'z']) @@ -444,7 +444,7 @@ def test_merge_left_notempty_right_empty(self): # GH 10824 left = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=['a', 'b', 'c']) - right = pd.DataFrame([], columns=['x', 'y', 'z']) + right = pd.DataFrame(columns=['x', 'y', 'z']) exp_out = pd.DataFrame({'a': [1, 4, 7], 'b': [2, 5, 8], diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index a186d32ed8800..fdcc98e25510f 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -766,7 +766,7 @@ def test_append(self, sort): mixed_appended2.reindex(columns=['A', 'B', 'C', 'D'])) # append empty - empty = DataFrame({}) + empty = DataFrame() appended = self.frame.append(empty) tm.assert_frame_equal(self.frame, appended) @@ -868,7 +868,7 @@ def test_append_many(self, sort): def test_append_preserve_index_name(self): # #980 - df1 = DataFrame(data=None, columns=['A', 'B', 'C']) + df1 = DataFrame(columns=['A', 'B', 'C']) df1 = df1.set_index(['A']) df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=['A', 'B', 'C']) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 8525b877618c9..f7b473a5791b4 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -28,6 +28,30 @@ class TestSeriesConstructors(): + @pytest.mark.parametrize('constructor,check_index_type', [ + # NOTE: some overlap with test_constructor_empty but that test does not + # test for None or an empty generator. + # test_constructor_pass_none tests None but only with the index also + # passed. + (lambda: Series(), True), + (lambda: Series(None), True), + (lambda: Series({}), True), + (lambda: Series(()), False), # creates a RangeIndex + (lambda: Series([]), False), # creates a RangeIndex + (lambda: Series((x for x in [])), False), # creates a RangeIndex + (lambda: Series(data=None), True), + (lambda: Series(data={}), True), + (lambda: Series(data=()), False), # creates a RangeIndex + (lambda: Series(data=[]), False), # creates a RangeIndex + (lambda: Series(data=(x for x in [])), False), # creates a RangeIndex + ]) + def test_empty_constructor(self, constructor, check_index_type): + expected = Series() + result = constructor() + assert len(result.index) == 0 + tm.assert_series_equal(result, expected, + check_index_type=check_index_type) + def test_invalid_dtype(self): # GH15520 msg = 'not understood' @@ -66,7 +90,7 @@ def test_constructor(self, datetime_series): assert mixed[1] is np.NaN assert not empty_series.index.is_all_dates - assert not Series({}).index.is_all_dates + assert not Series().index.is_all_dates # exception raised is of type Exception with pytest.raises(Exception, match="Data must be 1-dimensional"): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a9a59c6d95373..0c4ec2483a4b4 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -314,7 +314,7 @@ def test_count_level_corner(self): df = self.frame[:0] result = df.count(level=0) - expected = DataFrame({}, index=s.index.levels[0], + expected = DataFrame(index=s.index.levels[0], columns=df.columns).fillna(0).astype(np.int64) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 40a83f90c8dfd..0be6ec076837b 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1901,7 +1901,7 @@ def test_empty_str_methods(self): def test_empty_str_methods_to_frame(self): empty = Series(dtype=str) - empty_df = DataFrame([]) + empty_df = DataFrame() tm.assert_frame_equal(empty_df, empty.str.partition('a')) tm.assert_frame_equal(empty_df, empty.str.rpartition('a')) @@ -2551,7 +2551,7 @@ def test_split_blank_string(self): # expand blank split GH 20067 values = Series([''], name='test') result = values.str.split(expand=True) - exp = DataFrame([[]]) + exp = DataFrame([[]]) # NOTE: this is NOT an empty DataFrame tm.assert_frame_equal(result, exp) values = Series(['a b c', 'a b', '', ' '], name='test') From 05ab6704189c6044e41fd12599245c11d9c48229 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 27 Mar 2019 11:22:50 +0000 Subject: [PATCH 2/2] separate test for non zero length indexes --- pandas/tests/frame/test_constructors.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 3ddacdf166edd..3adbfc829b4c6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -18,8 +18,8 @@ import pandas as pd from pandas import ( - Categorical, DataFrame, Index, MultiIndex, Series, Timedelta, Timestamp, - compat, date_range, isna) + Categorical, DataFrame, Index, MultiIndex, RangeIndex, Series, Timedelta, + Timestamp, compat, date_range, isna) from pandas.tests.frame.common import TestData import pandas.util.testing as tm @@ -41,15 +41,7 @@ class TestDataFrameConstructors(TestData): lambda: DataFrame(data={}), lambda: DataFrame(data=()), lambda: DataFrame(data=[]), - lambda: DataFrame(data=(x for x in [])), - # these are NOT empty DataFrames - pytest.param(lambda: DataFrame([[]]), marks=pytest.mark.xfail( - reason='creates a non-zero length RangeIndex')), - pytest.param(lambda: DataFrame([[], []]), marks=pytest.mark.xfail( - reason='creates a non-zero length RangeIndex')), - pytest.param(lambda: DataFrame([(x for x in [])]), - marks=pytest.mark.xfail( - reason='creates a non-zero length RangeIndex')) + lambda: DataFrame(data=(x for x in [])) ]) def test_empty_constructor(self, constructor): expected = DataFrame() @@ -58,6 +50,17 @@ def test_empty_constructor(self, constructor): assert len(result.columns) == 0 tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('emptylike,expected_index,expected_columns', [ + ([[]], RangeIndex(1), RangeIndex(0)), + ([[], []], RangeIndex(2), RangeIndex(0)), + ([(x for x in [])], RangeIndex(1), RangeIndex(0)) + ]) + def test_emptylike_constructor( + self, emptylike, expected_index, expected_columns): + expected = DataFrame(index=expected_index, columns=expected_columns) + result = DataFrame(emptylike) + tm.assert_frame_equal(result, expected) + def test_constructor_mixed(self): index, data = tm.getMixedTypeDict()