diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst index 200d567a62732..d7f7690f8c3d0 100644 --- a/doc/source/user_guide/dsintro.rst +++ b/doc/source/user_guide/dsintro.rst @@ -397,6 +397,28 @@ The result will be a DataFrame with the same index as the input Series, and with one column whose name is the original name of the Series (only if no other column name provided). +.. _basics.dataframe.from_list_dataclasses: + +From a list of dataclasses +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1.0 + +Data Classes as introduced in `PEP557 `__, +can be passed into the DataFrame constructor. +Passing a list of dataclasses is equivilent to passing a list of dictionaries. + +Please be aware, that that all values in the list should be dataclasses, mixing +types in the list would result in a TypeError. + +.. ipython:: python + + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)]) + **Missing data** Much more will be said on this topic in the :ref:`Missing data ` diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 1afe7edf2641b..f5997a13e785d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -24,6 +24,7 @@ is_array_like, is_bool, is_complex, + is_dataclass, is_decimal, is_dict_like, is_file_like, diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 56b880dca1241..d1607b5ede6c3 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -386,3 +386,39 @@ def is_sequence(obj) -> bool: return not isinstance(obj, (str, bytes)) except (TypeError, AttributeError): return False + + +def is_dataclass(item): + """ + Checks if the object is a data-class instance + + Parameters + ---------- + item : object + + Returns + -------- + is_dataclass : bool + True if the item is an instance of a data-class, + will return false if you pass the data class itself + + Examples + -------- + >>> from dataclasses import dataclass + >>> @dataclass + ... class Point: + ... x: int + ... y: int + + >>> is_dataclass(Point) + False + >>> is_dataclass(Point(0,2)) + True + + """ + try: + from dataclasses import is_dataclass + + return is_dataclass(item) and not isinstance(item, type) + except ImportError: + return False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 72d9ef7d0d35f..9b140238a9389 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,6 +77,7 @@ ensure_platform_int, infer_dtype_from_object, is_bool_dtype, + is_dataclass, is_datetime64_any_dtype, is_dict_like, is_dtype_equal, @@ -117,6 +118,7 @@ from pandas.core.internals import BlockManager from pandas.core.internals.construction import ( arrays_to_mgr, + dataclasses_to_dicts, get_names_from_index, init_dict, init_ndarray, @@ -474,6 +476,8 @@ def __init__( if not isinstance(data, (abc.Sequence, ExtensionArray)): data = list(data) if len(data) > 0: + if is_dataclass(data[0]): + data = dataclasses_to_dicts(data) if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index ab363e10eb098..c4416472d451c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -429,6 +429,33 @@ def _get_axes(N, K, index, columns): return index, columns +def dataclasses_to_dicts(data): + """ Converts a list of dataclass instances to a list of dictionaries + + Parameters + ---------- + data : List[Type[dataclass]] + + Returns + -------- + list_dict : List[dict] + + Examples + -------- + >>> @dataclass + >>> class Point: + ... x: int + ... y: int + + >>> dataclasses_to_dicts([Point(1,2), Point(2,3)]) + [{"x":1,"y":2},{"x":2,"y":3}] + + """ + from dataclasses import asdict + + return list(map(asdict, data)) + + # --------------------------------------------------------------------- # Conversion of Inputs to Arrays diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d938c0f6f1066..058b706cfe3aa 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -9,7 +9,7 @@ import pytest import pytz -from pandas.compat import is_platform_little_endian +from pandas.compat import PY37, is_platform_little_endian from pandas.compat.numpy import _is_numpy_dev from pandas.core.dtypes.common import is_integer_dtype @@ -1364,6 +1364,46 @@ def test_constructor_list_of_namedtuples(self): result = DataFrame(tuples, columns=["y", "z"]) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7") + def test_constructor_list_of_dataclasses(self): + # GH21910 + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + datas = [Point(0, 3), Point(1, 3)] + expected = DataFrame({"x": [0, 1], "y": [3, 3]}) + result = DataFrame(datas) + tm.assert_frame_equal(result, expected) + + @pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7") + def test_constructor_list_of_dataclasses_with_varying_types(self): + # GH21910 + from dataclasses import make_dataclass + + # varying types + Point = make_dataclass("Point", [("x", int), ("y", int)]) + HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)]) + + datas = [Point(0, 3), HLine(1, 3, 3)] + + expected = DataFrame( + {"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]} + ) + result = DataFrame(datas) + tm.assert_frame_equal(result, expected) + + @pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7") + def test_constructor_list_of_dataclasses_error_thrown(self): + # GH21910 + from dataclasses import make_dataclass + + Point = make_dataclass("Point", [("x", int), ("y", int)]) + + # expect TypeError + with pytest.raises(TypeError): + DataFrame([Point(0, 0), {"x": 1, "y": 0}]) + def test_constructor_list_of_dict_order(self): # GH10056 data = [