diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 41db72612a66b..ee4ca6a06a634 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -402,6 +402,7 @@ Conversion - Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`) - Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`) - Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`) +- Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`) - Strings diff --git a/pandas/core/construction.py b/pandas/core/construction.py index db9239d03dd13..9ba9a5bd38164 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -518,9 +518,9 @@ def sanitize_array( elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: # TODO: deque, array.array - if isinstance(data, set): + if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys - raise TypeError("Set type is unordered") + raise TypeError(f"'{type(data).__name__}' type is unordered") data = list(data) if dtype is not None: diff --git a/pandas/core/series.py b/pandas/core/series.py index b2e620c9b8047..24c356e7a8269 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -375,10 +375,8 @@ def __init__( "`index` argument. `copy` must be False." ) - elif is_extension_array_dtype(data): + elif isinstance(data, ExtensionArray): pass - elif isinstance(data, (set, frozenset)): - raise TypeError(f"'{type(data).__name__}' type is unordered") else: data = com.maybe_iterable_to_list(data) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 3bbe5f9e46efa..493763cf9c4e0 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -377,15 +377,18 @@ def test_constructor_dict(self): with pytest.raises(ValueError, match=msg): DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) + def test_constructor_dict_length1(self): # Length-one dict micro-optimization frame = DataFrame({"A": {"1": 1, "2": 2}}) tm.assert_index_equal(frame.index, Index(["1", "2"])) + def test_constructor_dict_with_index(self): # empty dict plus index idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx) assert frame.index is idx + def test_constructor_dict_with_index_and_columns(self): # empty dict with index and columns idx = Index([0, 1, 2]) frame = DataFrame({}, index=idx, columns=idx) @@ -393,10 +396,12 @@ def test_constructor_dict(self): assert frame.columns is idx assert len(frame._series) == 3 + def test_constructor_dict_of_empty_lists(self): # with dict of empty list and Series frame = DataFrame({"A": [], "B": []}, columns=["A", "B"]) tm.assert_index_equal(frame.index, RangeIndex(0), exact=True) + def test_constructor_dict_with_none(self): # GH 14381 # Dict with None value frame_none = DataFrame({"a": None}, index=[0]) @@ -405,6 +410,7 @@ def test_constructor_dict(self): assert frame_none_list._get_value(0, "a") is None tm.assert_frame_equal(frame_none, frame_none_list) + def test_constructor_dict_errors(self): # GH10856 # dict with scalar values should raise error, even if columns passed msg = "If using all scalar values, you must pass an index" @@ -560,7 +566,7 @@ def test_constructor_error_msgs(self): with pytest.raises(ValueError, match=msg): DataFrame({"a": False, "b": True}) - def test_constructor_subclass_dict(self, float_frame, dict_subclass): + def test_constructor_subclass_dict(self, dict_subclass): # Test for passing dict subclass to constructor data = { "col1": dict_subclass((x, 10.0 * x) for x in range(10)), @@ -574,6 +580,7 @@ def test_constructor_subclass_dict(self, float_frame, dict_subclass): df = DataFrame(data) tm.assert_frame_equal(refdf, df) + def test_constructor_defaultdict(self, float_frame): # try with defaultdict from collections import defaultdict @@ -608,6 +615,7 @@ def test_constructor_dict_cast(self): assert frame["B"].dtype == np.object_ assert frame["A"].dtype == np.float64 + def test_constructor_dict_cast2(self): # can't cast to float test_data = { "A": dict(zip(range(20), tm.makeStringIndex(20))), @@ -623,6 +631,7 @@ def test_constructor_dict_dont_upcast(self): df = DataFrame(d) assert isinstance(df["Col1"]["Row2"], float) + def test_constructor_dict_dont_upcast2(self): dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2]) assert isinstance(dm[1][1], int) @@ -1195,6 +1204,7 @@ def __len__(self, n): expected = DataFrame([[1, "a"], [2, "b"]], columns=columns) tm.assert_frame_equal(result, expected, check_dtype=False) + def test_constructor_stdlib_array(self): # GH 4297 # support Array import array @@ -2427,11 +2437,16 @@ def test_from_2d_ndarray_with_dtype(self): expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]") tm.assert_frame_equal(df, expected) - def test_construction_from_set_raises(self): + @pytest.mark.parametrize("typ", [set, frozenset]) + def test_construction_from_set_raises(self, typ): # https://github.com/pandas-dev/pandas/issues/32582 - msg = "Set type is unordered" + values = typ({1, 2, 3}) + msg = f"'{typ.__name__}' type is unordered" with pytest.raises(TypeError, match=msg): - DataFrame({"a": {1, 2, 3}}) + DataFrame({"a": values}) + + with pytest.raises(TypeError, match=msg): + Series(values) def get1(obj): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index aec1e65cbb4c0..a69a693bb6203 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -69,6 +69,7 @@ class TestSeriesConstructors: ], ) def test_empty_constructor(self, constructor, check_index_type): + # TODO: share with frame test of the same name with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): expected = Series() result = constructor() @@ -310,6 +311,7 @@ def test_constructor_generator(self): exp = Series(range(10)) tm.assert_series_equal(result, exp) + # same but with non-default index gen = (i for i in range(10)) result = Series(gen, index=range(10, 20)) exp.index = range(10, 20) @@ -323,6 +325,7 @@ def test_constructor_map(self): exp = Series(range(10)) tm.assert_series_equal(result, exp) + # same but with non-default index m = map(lambda x: x, range(10)) result = Series(m, index=range(10, 20)) exp.index = range(10, 20) @@ -386,6 +389,7 @@ def test_constructor_categorical_with_coercion(self): str(df.values) str(df) + def test_constructor_categorical_with_coercion2(self): # GH8623 x = DataFrame( [[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]], @@ -747,6 +751,7 @@ def test_constructor_datelike_coercion(self): assert s.iloc[1] == "NOV" assert s.dtype == object + def test_constructor_datelike_coercion2(self): # the dtype was being reset on the slicing and re-inferred to datetime # even thought the blocks are mixed belly = "216 3T19".split() @@ -798,6 +803,7 @@ def test_constructor_dtype_datetime64(self): assert isna(s[1]) assert s.dtype == "M8[ns]" + def test_constructor_dtype_datetime64_10(self): # GH3416 dates = [ np.datetime64(datetime(2013, 1, 1)), @@ -850,6 +856,7 @@ def test_constructor_dtype_datetime64(self): expected = Series(dts.astype(np.int64)) tm.assert_series_equal(result, expected) + def test_constructor_dtype_datetime64_9(self): # invalid dates can be help as object result = Series([datetime(2, 1, 1)]) assert result[0] == datetime(2, 1, 1, 0, 0) @@ -857,11 +864,13 @@ def test_constructor_dtype_datetime64(self): result = Series([datetime(3000, 1, 1)]) assert result[0] == datetime(3000, 1, 1, 0, 0) + def test_constructor_dtype_datetime64_8(self): # don't mix types result = Series([Timestamp("20130101"), 1], index=["a", "b"]) assert result["a"] == Timestamp("20130101") assert result["b"] == 1 + def test_constructor_dtype_datetime64_7(self): # GH6529 # coerce datetime64 non-ns properly dates = date_range("01-Jan-2015", "01-Dec-2015", freq="M") @@ -887,6 +896,7 @@ def test_constructor_dtype_datetime64(self): tm.assert_numpy_array_equal(series1.values, dates2) assert series1.dtype == object + def test_constructor_dtype_datetime64_6(self): # these will correctly infer a datetime s = Series([None, NaT, "2013-08-05 15:30:00.000001"]) assert s.dtype == "datetime64[ns]" @@ -897,6 +907,7 @@ def test_constructor_dtype_datetime64(self): s = Series([NaT, np.nan, "2013-08-05 15:30:00.000001"]) assert s.dtype == "datetime64[ns]" + def test_constructor_dtype_datetime64_5(self): # tz-aware (UTC and other tz's) # GH 8411 dr = date_range("20130101", periods=3) @@ -906,18 +917,21 @@ def test_constructor_dtype_datetime64(self): dr = date_range("20130101", periods=3, tz="US/Eastern") assert str(Series(dr).iloc[0].tz) == "US/Eastern" + def test_constructor_dtype_datetime64_4(self): # non-convertible s = Series([1479596223000, -1479590, NaT]) assert s.dtype == "object" assert s[2] is NaT assert "NaT" in str(s) + def test_constructor_dtype_datetime64_3(self): # if we passed a NaT it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), NaT]) assert s.dtype == "object" assert s[2] is NaT assert "NaT" in str(s) + def test_constructor_dtype_datetime64_2(self): # if we passed a nan it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) assert s.dtype == "object" @@ -980,6 +994,7 @@ def test_constructor_with_datetime_tz(self): result = DatetimeIndex(s, freq="infer") tm.assert_index_equal(result, dr) + def test_constructor_with_datetime_tz4(self): # inference s = Series( [ @@ -990,6 +1005,7 @@ def test_constructor_with_datetime_tz(self): assert s.dtype == "datetime64[ns, US/Pacific]" assert lib.infer_dtype(s, skipna=True) == "datetime64" + def test_constructor_with_datetime_tz3(self): s = Series( [ Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), @@ -999,6 +1015,7 @@ def test_constructor_with_datetime_tz(self): assert s.dtype == "object" assert lib.infer_dtype(s, skipna=True) == "datetime" + def test_constructor_with_datetime_tz2(self): # with all NaT s = Series(NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") expected = Series(DatetimeIndex(["NaT", "NaT"], tz="US/Eastern")) @@ -1231,14 +1248,6 @@ def test_constructor_dict_of_tuples(self): expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)])) tm.assert_series_equal(result, expected) - def test_constructor_set(self): - values = {1, 2, 3, 4, 5} - with pytest.raises(TypeError, match="'set' type is unordered"): - Series(values) - values = frozenset(values) - with pytest.raises(TypeError, match="'frozenset' type is unordered"): - Series(values) - # https://github.com/pandas-dev/pandas/issues/22698 @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning") def test_fromDict(self):