From c89377cf9a753f5879cbed3ac04afc5e6dab7f1b Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 28 Jul 2023 17:52:46 +0200 Subject: [PATCH 1/8] TST: add a test for interchanging from non-pandas tz-aware --- pandas/tests/interchange/test_impl.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index bfb0eceaa0ca1..1c4ea75e276ef 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -295,3 +295,20 @@ def test_datetimetzdtype(tz, unit): ) df = pd.DataFrame({"ts_tz": tz_data}) tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) + + +def test_interchange_from_non_pandas_tz_aware(): + # GH 54239 + import pyarrow as pa + import pyarrow.compute as pc + + arr = pa.array([datetime(2020, 1, 1), None, datetime(2020, 1, 2)]) + arr = pc.assume_timezone(arr, "Asia/Kathmandu") + result = pa.table({"arr": arr}).to_pandas() + + df = pd.DataFrame( + ["2020-01-01 00:00:00+05:45", "NaT", "2020-01-02 00:00:00+05:45"], + columns=["arr"], + ) + expected = df.astype("datetime64[ns, Asia/Kathmandu]") + tm.assert_frame_equal(expected, result) From ddcf97878c73d7a2e41b4df9276d8dd9e7fce965 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 28 Jul 2023 18:14:59 +0200 Subject: [PATCH 2/8] TST: correct the test for interchanging from non-pandas tz-aware --- pandas/tests/interchange/test_impl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 1c4ea75e276ef..a8d15836d661f 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -304,8 +304,7 @@ def test_interchange_from_non_pandas_tz_aware(): arr = pa.array([datetime(2020, 1, 1), None, datetime(2020, 1, 2)]) arr = pc.assume_timezone(arr, "Asia/Kathmandu") - result = pa.table({"arr": arr}).to_pandas() - + result = from_dataframe(pa.table({"arr": arr}).to_pandas()) df = pd.DataFrame( ["2020-01-01 00:00:00+05:45", "NaT", "2020-01-02 00:00:00+05:45"], columns=["arr"], From 42aeee051e262ca9956436733bb8a2d1d6bf4cc4 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 28 Jul 2023 20:08:51 +0200 Subject: [PATCH 3/8] check an error in the test --- pandas/tests/interchange/test_impl.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index a8d15836d661f..a7040c511227a 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -5,6 +5,7 @@ import pytest from pandas._libs.tslibs import iNaT +from pandas.errors import SettingWithCopyError import pandas.util._test_decorators as td import pandas as pd @@ -297,17 +298,14 @@ def test_datetimetzdtype(tz, unit): tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) -def test_interchange_from_non_pandas_tz_aware(): +def test_interchange_from_non_pandas_tz_aware_raise_error(): # GH 54239 - import pyarrow as pa + pa = pytest.importorskip("pyarrow") import pyarrow.compute as pc arr = pa.array([datetime(2020, 1, 1), None, datetime(2020, 1, 2)]) arr = pc.assume_timezone(arr, "Asia/Kathmandu") - result = from_dataframe(pa.table({"arr": arr}).to_pandas()) - df = pd.DataFrame( - ["2020-01-01 00:00:00+05:45", "NaT", "2020-01-02 00:00:00+05:45"], - columns=["arr"], - ) - expected = df.astype("datetime64[ns, Asia/Kathmandu]") - tm.assert_frame_equal(expected, result) + + msg = "modifications to a method of a datetimelike object are not supported." + with pytest.raises(SettingWithCopyError, match=msg): + from_dataframe(pa.table({"arr": arr})) From 944824251d5f11906fa95ea4b73517d448237b3b Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 31 Jul 2023 10:07:52 +0200 Subject: [PATCH 4/8] correct test --- pandas/tests/interchange/test_impl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index a7040c511227a..2065584285713 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -305,7 +305,9 @@ def test_interchange_from_non_pandas_tz_aware_raise_error(): arr = pa.array([datetime(2020, 1, 1), None, datetime(2020, 1, 2)]) arr = pc.assume_timezone(arr, "Asia/Kathmandu") + table = pa.table({"arr": arr}) + exchange_df = table.__dataframe__() msg = "modifications to a method of a datetimelike object are not supported." with pytest.raises(SettingWithCopyError, match=msg): - from_dataframe(pa.table({"arr": arr})) + from_dataframe(exchange_df) From da6fe090cf501c00ed30dfa531448e65ee1b05d8 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 31 Jul 2023 10:59:06 +0200 Subject: [PATCH 5/8] add extra except in set_nulls --- pandas/core/interchange/from_dataframe.py | 4 ++++ pandas/tests/interchange/test_impl.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index d3aece6e63798..2337fcfae4e80 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -5,6 +5,7 @@ from typing import Any import numpy as np +from pandas.errors import SettingWithCopyError from pandas.compat._optional import import_optional_dependency @@ -513,5 +514,8 @@ def set_nulls( # cast the `data` to nullable float dtype. data = data.astype(float) data[null_pos] = None + except SettingWithCopyError: + data = data.copy() + data[null_pos] = None return data diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 2065584285713..0a6ebb0598bb1 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -307,7 +307,12 @@ def test_interchange_from_non_pandas_tz_aware_raise_error(): arr = pc.assume_timezone(arr, "Asia/Kathmandu") table = pa.table({"arr": arr}) exchange_df = table.__dataframe__() + result = from_dataframe(exchange_df) + + df = pd.DataFrame( + ["2020-01-01 00:00:00+05:45", "NaT", "2020-01-02 00:00:00+05:45"], + columns=["arr"], + ) + expected = df.astype("datetime64[ns, Asia/Kathmandu]") + tm.assert_frame_equal(expected, result) - msg = "modifications to a method of a datetimelike object are not supported." - with pytest.raises(SettingWithCopyError, match=msg): - from_dataframe(exchange_df) From 15a1fa684dd8efff813deafa9462b2126a94d7ef Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 31 Jul 2023 15:29:46 +0200 Subject: [PATCH 6/8] fix bug during interchanging from non-pandas tz-aware --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/interchange/from_dataframe.py | 4 +++- pandas/tests/interchange/test_impl.py | 10 ++++------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 91efcfd590c01..4d3a427fd1e76 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -682,6 +682,7 @@ Other - Bug in :class:`DataFrame` and :class:`Series` raising for data of complex dtype when ``NaN`` values are present (:issue:`53627`) - Bug in :class:`DatetimeIndex` where ``repr`` of index passed with time does not print time is midnight and non-day based freq(:issue:`53470`) - Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are present (:issue:`52840`) +- Bug in :func:`api.interchange.from_dataframe` was raising during interchanging from non-pandas tz-aware data containing ``NaN`` values (:issue:`54287`) - Bug in :func:`api.interchange.from_dataframe` when converting an empty DataFrame object (:issue:`53155`) - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 2337fcfae4e80..a027c93bce8ff 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -5,9 +5,9 @@ from typing import Any import numpy as np -from pandas.errors import SettingWithCopyError from pandas.compat._optional import import_optional_dependency +from pandas.errors import SettingWithCopyError import pandas as pd from pandas.core.interchange.dataframe_protocol import ( @@ -515,6 +515,8 @@ def set_nulls( data = data.astype(float) data[null_pos] = None except SettingWithCopyError: + # SettingWithCopyError happens if the `data` appears + # to have 'NaT'. If this happens, copy the `data`. data = data.copy() data[null_pos] = None diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 0a6ebb0598bb1..cea983a348b8a 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -5,7 +5,6 @@ import pytest from pandas._libs.tslibs import iNaT -from pandas.errors import SettingWithCopyError import pandas.util._test_decorators as td import pandas as pd @@ -298,8 +297,8 @@ def test_datetimetzdtype(tz, unit): tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) -def test_interchange_from_non_pandas_tz_aware_raise_error(): - # GH 54239 +def test_interchange_from_non_pandas_tz_aware(): + # GH 54239, 54287 pa = pytest.importorskip("pyarrow") import pyarrow.compute as pc @@ -309,10 +308,9 @@ def test_interchange_from_non_pandas_tz_aware_raise_error(): exchange_df = table.__dataframe__() result = from_dataframe(exchange_df) - df = pd.DataFrame( + expected = pd.DataFrame( ["2020-01-01 00:00:00+05:45", "NaT", "2020-01-02 00:00:00+05:45"], columns=["arr"], + dtype="datetime64[us, Asia/Kathmandu]", ) - expected = df.astype("datetime64[ns, Asia/Kathmandu]") tm.assert_frame_equal(expected, result) - From dd2c3ebd31feb426e19005ee2aebda68c601f539 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 31 Jul 2023 18:07:53 +0200 Subject: [PATCH 7/8] correct test --- pandas/tests/interchange/test_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index cea983a348b8a..9a10536f41875 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -299,7 +299,7 @@ def test_datetimetzdtype(tz, unit): def test_interchange_from_non_pandas_tz_aware(): # GH 54239, 54287 - pa = pytest.importorskip("pyarrow") + pa = pytest.importorskip("pyarrow", "11.0.0") import pyarrow.compute as pc arr = pa.array([datetime(2020, 1, 1), None, datetime(2020, 1, 2)]) From c5751570ecd14afb5d3d0ff9e830279fe7036014 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 31 Jul 2023 19:58:21 +0200 Subject: [PATCH 8/8] correct wording --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/interchange/from_dataframe.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 4d3a427fd1e76..ee5ceb8acc42c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -682,7 +682,7 @@ Other - Bug in :class:`DataFrame` and :class:`Series` raising for data of complex dtype when ``NaN`` values are present (:issue:`53627`) - Bug in :class:`DatetimeIndex` where ``repr`` of index passed with time does not print time is midnight and non-day based freq(:issue:`53470`) - Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are present (:issue:`52840`) -- Bug in :func:`api.interchange.from_dataframe` was raising during interchanging from non-pandas tz-aware data containing ``NaN`` values (:issue:`54287`) +- Bug in :func:`api.interchange.from_dataframe` was raising during interchanging from non-pandas tz-aware data containing null values (:issue:`54287`) - Bug in :func:`api.interchange.from_dataframe` when converting an empty DataFrame object (:issue:`53155`) - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index a027c93bce8ff..1a94fdb545d57 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -515,8 +515,7 @@ def set_nulls( data = data.astype(float) data[null_pos] = None except SettingWithCopyError: - # SettingWithCopyError happens if the `data` appears - # to have 'NaT'. If this happens, copy the `data`. + # `SettingWithCopyError` may happen for datetime-like with missing values. data = data.copy() data[null_pos] = None