From 0cc09c148a839647b0ce357cbbda61c1a33446f0 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Thu, 24 Nov 2022 09:15:10 -0500 Subject: [PATCH 1/7] nullable tolist to return python scalars --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/arrays/masked.py | 6 ++-- pandas/tests/series/methods/test_tolist.py | 34 ++++++++++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 pandas/tests/series/methods/test_tolist.py diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index fd7cc0598f850..0d76b73bcbbcd 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -759,6 +759,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) +- Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`#####`) - Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) - diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 434d6198210f7..1fd6482f650da 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -436,10 +436,8 @@ def to_numpy( def tolist(self): if self.ndim > 1: return [x.tolist() for x in self] - if not self._hasna: - # faster than list(self) - return list(self._data) - return list(self) + dtype = None if self._hasna else self._data.dtype + return self.to_numpy(dtype=dtype).tolist() @overload def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: diff --git a/pandas/tests/series/methods/test_tolist.py b/pandas/tests/series/methods/test_tolist.py new file mode 100644 index 0000000000000..d95c0bce59a6b --- /dev/null +++ b/pandas/tests/series/methods/test_tolist.py @@ -0,0 +1,34 @@ +import pytest + +from pandas import ( + Interval, + Period, + Series, + Timedelta, + Timestamp, +) + + +@pytest.mark.parametrize( + "values, dtype, expected_dtype", + ( + ([1], "int64", int), + ([1], "Int64", int), + ([1], "int64[pyarrow]", int), + ([1.0], "float64", float), + ([1.0], "Float64", float), + ([1.0], "float64[pyarrow]", float), + (["abc"], "object", str), + (["abc"], "string", str), + (["abc"], "string[pyarrow]", str), + ([Interval(1, 3)], "interval", Interval), + ([Period("2000-01-01", "D")], "period[D]", Period), + ([Timedelta(days=1)], "timedelta64[ns]", Timedelta), + ([Timestamp("2000-01-01")], "datetime64[ns]", Timestamp), + ), +) +def test_tolist_scalar_dtype(values, dtype, expected_dtype): + # GH ##### + ser = Series(values, dtype=dtype) + result_dtype = type(ser.tolist()[0]) + assert result_dtype == expected_dtype From 5df58158ec3e4d464207d9642f031c7c26a1aa70 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Thu, 24 Nov 2022 09:19:35 -0500 Subject: [PATCH 2/7] gh refs --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/tests/series/methods/test_tolist.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0d76b73bcbbcd..3dac2c9b5aaa2 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -759,7 +759,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) -- Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`#####`) +- Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`49890`) - Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) - diff --git a/pandas/tests/series/methods/test_tolist.py b/pandas/tests/series/methods/test_tolist.py index d95c0bce59a6b..4c2f7c360050f 100644 --- a/pandas/tests/series/methods/test_tolist.py +++ b/pandas/tests/series/methods/test_tolist.py @@ -28,7 +28,7 @@ ), ) def test_tolist_scalar_dtype(values, dtype, expected_dtype): - # GH ##### + # GH49890 ser = Series(values, dtype=dtype) result_dtype = type(ser.tolist()[0]) assert result_dtype == expected_dtype From 1fd54e76d1c0d9b607a69bf7f20ec644424e71a0 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Thu, 24 Nov 2022 12:21:35 -0500 Subject: [PATCH 3/7] fix test --- pandas/tests/series/methods/test_tolist.py | 23 +++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/methods/test_tolist.py b/pandas/tests/series/methods/test_tolist.py index 4c2f7c360050f..36c4892393d60 100644 --- a/pandas/tests/series/methods/test_tolist.py +++ b/pandas/tests/series/methods/test_tolist.py @@ -1,5 +1,7 @@ import pytest +import pandas.util._test_decorators as td + from pandas import ( Interval, Period, @@ -14,17 +16,32 @@ ( ([1], "int64", int), ([1], "Int64", int), - ([1], "int64[pyarrow]", int), ([1.0], "float64", float), ([1.0], "Float64", float), - ([1.0], "float64[pyarrow]", float), (["abc"], "object", str), (["abc"], "string", str), - (["abc"], "string[pyarrow]", str), ([Interval(1, 3)], "interval", Interval), ([Period("2000-01-01", "D")], "period[D]", Period), ([Timedelta(days=1)], "timedelta64[ns]", Timedelta), ([Timestamp("2000-01-01")], "datetime64[ns]", Timestamp), + pytest.param( + [1], + "int64[pyarrow]", + int, + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + pytest.param( + [1], + "float64[pyarrow]", + float, + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + pytest.param( + [1], + "string[pyarrow]", + str, + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), ), ) def test_tolist_scalar_dtype(values, dtype, expected_dtype): From 2e99f2ebc7a3828a2f2b0c9722c0b0fd0f599712 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Thu, 24 Nov 2022 12:32:57 -0500 Subject: [PATCH 4/7] fix test --- pandas/tests/series/methods/test_tolist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/methods/test_tolist.py b/pandas/tests/series/methods/test_tolist.py index 36c4892393d60..608c9b5edb91b 100644 --- a/pandas/tests/series/methods/test_tolist.py +++ b/pandas/tests/series/methods/test_tolist.py @@ -31,13 +31,13 @@ marks=td.skip_if_no("pyarrow", min_version="1.0.0"), ), pytest.param( - [1], + [1.0], "float64[pyarrow]", float, marks=td.skip_if_no("pyarrow", min_version="1.0.0"), ), pytest.param( - [1], + ["abc"], "string[pyarrow]", str, marks=td.skip_if_no("pyarrow", min_version="1.0.0"), From d0e4f543a23d19da43ec225e9f50f9b5d55072ec Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Fri, 25 Nov 2022 21:07:24 -0500 Subject: [PATCH 5/7] update min version --- pandas/tests/series/methods/test_tolist.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/methods/test_tolist.py b/pandas/tests/series/methods/test_tolist.py index 608c9b5edb91b..ba19487c78c34 100644 --- a/pandas/tests/series/methods/test_tolist.py +++ b/pandas/tests/series/methods/test_tolist.py @@ -28,19 +28,19 @@ [1], "int64[pyarrow]", int, - marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + marks=td.skip_if_no("pyarrow", min_version="6.0.0"), ), pytest.param( [1.0], "float64[pyarrow]", float, - marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + marks=td.skip_if_no("pyarrow", min_version="6.0.0"), ), pytest.param( ["abc"], "string[pyarrow]", str, - marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + marks=td.skip_if_no("pyarrow", min_version="6.0.0"), ), ), ) From e3105e2be2e9e5d5425a0573c1995d11895f9cd0 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Mon, 28 Nov 2022 18:07:31 -0500 Subject: [PATCH 6/7] remove min_version --- pandas/tests/series/methods/test_tolist.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/methods/test_tolist.py b/pandas/tests/series/methods/test_tolist.py index ba19487c78c34..cfb39cd53c199 100644 --- a/pandas/tests/series/methods/test_tolist.py +++ b/pandas/tests/series/methods/test_tolist.py @@ -28,19 +28,19 @@ [1], "int64[pyarrow]", int, - marks=td.skip_if_no("pyarrow", min_version="6.0.0"), + marks=td.skip_if_no("pyarrow"), ), pytest.param( [1.0], "float64[pyarrow]", float, - marks=td.skip_if_no("pyarrow", min_version="6.0.0"), + marks=td.skip_if_no("pyarrow"), ), pytest.param( ["abc"], "string[pyarrow]", str, - marks=td.skip_if_no("pyarrow", min_version="6.0.0"), + marks=td.skip_if_no("pyarrow"), ), ), ) From af9095a0ab71e031fd9d42dfb6dec314d329cb1b Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Mon, 28 Nov 2022 18:11:11 -0500 Subject: [PATCH 7/7] formatting --- pandas/tests/series/methods/test_tolist.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/pandas/tests/series/methods/test_tolist.py b/pandas/tests/series/methods/test_tolist.py index cfb39cd53c199..4af473528e238 100644 --- a/pandas/tests/series/methods/test_tolist.py +++ b/pandas/tests/series/methods/test_tolist.py @@ -24,24 +24,9 @@ ([Period("2000-01-01", "D")], "period[D]", Period), ([Timedelta(days=1)], "timedelta64[ns]", Timedelta), ([Timestamp("2000-01-01")], "datetime64[ns]", Timestamp), - pytest.param( - [1], - "int64[pyarrow]", - int, - marks=td.skip_if_no("pyarrow"), - ), - pytest.param( - [1.0], - "float64[pyarrow]", - float, - marks=td.skip_if_no("pyarrow"), - ), - pytest.param( - ["abc"], - "string[pyarrow]", - str, - marks=td.skip_if_no("pyarrow"), - ), + pytest.param([1], "int64[pyarrow]", int, marks=td.skip_if_no("pyarrow")), + pytest.param([1.0], "float64[pyarrow]", float, marks=td.skip_if_no("pyarrow")), + pytest.param(["abc"], "string[pyarrow]", str, marks=td.skip_if_no("pyarrow")), ), ) def test_tolist_scalar_dtype(values, dtype, expected_dtype):