From e75049fb327e73f49ac68a349154e7926b2c0107 Mon Sep 17 00:00:00 2001 From: Jacob Buckheit Date: Sun, 8 Dec 2019 16:21:59 -0500 Subject: [PATCH 1/8] Fix issue with to_csv na_rep when dtype=string --- pandas/core/internals/blocks.py | 2 +- pandas/tests/io/formats/test_to_csv.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c507226054d2c..1cb02c29b9548 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1773,8 +1773,8 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): mask = isna(values) try: - values = values.astype(str) values[mask] = na_rep + values = values.astype(str) except Exception: # eg SparseArray does not support setitem, needs to be converted to ndarray return super().to_native_types(slicer, na_rep, quoting, **kwargs) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index f86d5480ddafa..a99cbab929a88 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -205,6 +205,14 @@ def test_to_csv_na_rep(self): assert df.set_index("a").to_csv(na_rep="_") == expected assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected + # GH 29975 + # Make sure full na_rep shows up when a dtype is provided + csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ") + assert ",0\n0,a\n1,ZZZZZ\n2,c\n" == csv + + csv = pd.Series(["a", pd.NA, "c"], dtype="string").to_csv(na_rep="ZZZZZ") + assert ",0\n0,a\n1,ZZZZZ\n2,c\n" == csv + def test_to_csv_date_format(self): # GH 10209 df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")}) From 397f361cca854b10d747d292ed5e43e7b0042b75 Mon Sep 17 00:00:00 2001 From: Jacob Buckheit Date: Sun, 8 Dec 2019 16:27:18 -0500 Subject: [PATCH 2/8] Add what's new entry --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index efea1fc1f525f..778d1077de342 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -985,7 +985,7 @@ Other - Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) - Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`) - Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`) -- +- Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`) .. _whatsnew_1000.contributors: From 1a49b7eea6136c1f3b5c34d3f9e9b7475aadc1c6 Mon Sep 17 00:00:00 2001 From: Jacob Buckheit Date: Mon, 9 Dec 2019 08:23:15 -0500 Subject: [PATCH 3/8] Fix failing test --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/internals/blocks.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 778d1077de342..f93ebed2fe462 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -86,7 +86,7 @@ You can use the alias ``"string"`` as well. s The usual string accessor methods work. Where appropriate, the return type -of the Series or columns of a DataFrame will also have string dtype. +of the Series or columns of a DataFrame will also have string dtype. (:issue:`29975`) .. ipython:: python diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1cb02c29b9548..c0f1685076f69 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -657,9 +657,9 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): if slicer is not None: values = values[:, slicer] mask = isna(values) + itemsize = writers.word_len(na_rep) - if not self.is_object and not quoting: - itemsize = writers.word_len(na_rep) + if not self.is_object and not quoting and itemsize: values = values.astype(f" Date: Mon, 9 Dec 2019 14:56:35 -0500 Subject: [PATCH 4/8] Fix tests to respect system line endings --- pandas/tests/io/formats/test_to_csv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index a99cbab929a88..469f8df0cbc39 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -208,10 +208,10 @@ def test_to_csv_na_rep(self): # GH 29975 # Make sure full na_rep shows up when a dtype is provided csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ") - assert ",0\n0,a\n1,ZZZZZ\n2,c\n" == csv - + expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"]) + assert expected == csv csv = pd.Series(["a", pd.NA, "c"], dtype="string").to_csv(na_rep="ZZZZZ") - assert ",0\n0,a\n1,ZZZZZ\n2,c\n" == csv + assert expected == csv def test_to_csv_date_format(self): # GH 10209 From 1b9a5b5a94a47da84bcefda3a8c6c9af08552829 Mon Sep 17 00:00:00 2001 From: Jacob Buckheit Date: Tue, 10 Dec 2019 10:11:34 -0500 Subject: [PATCH 5/8] Move issue to line 59 in whats new docs --- doc/source/whatsnew/v1.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index f93ebed2fe462..0da6d7b20a5c0 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -56,7 +56,7 @@ Dedicated string data type ^^^^^^^^^^^^^^^^^^^^^^^^^^ We've added :class:`StringDtype`, an extension type dedicated to string data. -Previously, strings were typically stored in object-dtype NumPy arrays. +Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`) .. warning:: @@ -86,7 +86,7 @@ You can use the alias ``"string"`` as well. s The usual string accessor methods work. Where appropriate, the return type -of the Series or columns of a DataFrame will also have string dtype. (:issue:`29975`) +of the Series or columns of a DataFrame will also have string dtype. .. ipython:: python From b96e12bb9f1d37e66e24915fda28b7e7310f53a0 Mon Sep 17 00:00:00 2001 From: Jacob Buckheit Date: Tue, 10 Dec 2019 15:13:28 -0500 Subject: [PATCH 6/8] Revert changes to internals. Add skip to failing test --- pandas/tests/extension/list/test_list.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/list/test_list.py b/pandas/tests/extension/list/test_list.py index c5c4417155562..407ae1ce4004d 100644 --- a/pandas/tests/extension/list/test_list.py +++ b/pandas/tests/extension/list/test_list.py @@ -21,6 +21,7 @@ def data(): return ListArray(data) +@pytest.mark.skip(reason="fails with update to na_rep") def test_to_csv(data): # https://github.com/pandas-dev/pandas/issues/28840 # array with list-likes fail when doing astype(str) on the numpy array From 8c31e7fb88265eba6a9d89bd3cb9d97fadb0dcf0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Jan 2020 12:37:23 -0500 Subject: [PATCH 7/8] fixup --- pandas/tests/extension/list/test_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/list/test_list.py b/pandas/tests/extension/list/test_list.py index 407ae1ce4004d..56e97edfea6d4 100644 --- a/pandas/tests/extension/list/test_list.py +++ b/pandas/tests/extension/list/test_list.py @@ -21,7 +21,7 @@ def data(): return ListArray(data) -@pytest.mark.skip(reason="fails with update to na_rep") +# @pytest.mark.skip(reason="fails with update to na_rep") def test_to_csv(data): # https://github.com/pandas-dev/pandas/issues/28840 # array with list-likes fail when doing astype(str) on the numpy array From 3310fa59c12ba95c7e8e739690be098482062da5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Jan 2020 12:40:30 -0500 Subject: [PATCH 8/8] remove xfail --- pandas/tests/extension/list/test_list.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/extension/list/test_list.py b/pandas/tests/extension/list/test_list.py index 56e97edfea6d4..c5c4417155562 100644 --- a/pandas/tests/extension/list/test_list.py +++ b/pandas/tests/extension/list/test_list.py @@ -21,7 +21,6 @@ def data(): return ListArray(data) -# @pytest.mark.skip(reason="fails with update to na_rep") def test_to_csv(data): # https://github.com/pandas-dev/pandas/issues/28840 # array with list-likes fail when doing astype(str) on the numpy array