From 28e8bd250d3113eee32e54f0c89aaf917daa450b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 7 Sep 2021 12:41:04 -0400 Subject: [PATCH 1/3] REGR: SpooledTemporaryFile support in read_csv --- pandas/io/common.py | 33 ++++++++++++++----------- pandas/tests/io/parser/test_encoding.py | 12 +++++++++ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 4e97eaf8b953c..dfd2c728fbed1 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -723,20 +723,25 @@ def get_handle( # since get_handle would have opened it in binary mode is_wrapped = True elif is_text and (compression or _is_binary_mode(handle, ioargs.mode)): - handle = TextIOWrapper( - # error: Argument 1 to "TextIOWrapper" has incompatible type - # "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]"; - # expected "IO[bytes]" - handle, # type: ignore[arg-type] - encoding=ioargs.encoding, - errors=errors, - newline="", - ) - handles.append(handle) - # only marked as wrapped when the caller provided a handle - is_wrapped = not ( - isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close - ) + try: + # GH43439: tempfile.SpooledTemporaryFile has no attribute 'readable' + handle = TextIOWrapper( + # error: Argument 1 to "TextIOWrapper" has incompatible type + # "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]"; + # expected "IO[bytes]" + handle, # type: ignore[arg-type] + encoding=ioargs.encoding, + errors=errors, + newline="", + ) + handles.append(handle) + # only marked as wrapped when the caller provided a handle + is_wrapped = not ( + isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close + ) + except AttributeError: + # read_csv(engine="c") might still be able to deal with binary handles + pass handles.reverse() # close the most recently added buffer first if ioargs.should_close: diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index b902a99cc4ea2..78334db5805c3 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -250,3 +250,15 @@ def test_encoding_memory_map(all_parsers, encoding): expected.to_csv(file, index=False, encoding=encoding) df = parser.read_csv(file, encoding=encoding, memory_map=True) tm.assert_frame_equal(df, expected) + + +def test_non_readable(all_parsers): + parser = all_parsers + if parser.engine == "python": + pytest.skip("SpooledTemporaryFile does not work with Python engine") + with tempfile.SpooledTemporaryFile() as handle: + handle.write(b"abcd") + handle.seek(0) + df = parser.read_csv(handle) + expected = DataFrame([], columns=["abcd"]) + tm.assert_frame_equal(df, expected) From b646e15656906d51930822cfb4140e9282798029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 9 Sep 2021 17:30:48 -0400 Subject: [PATCH 2/3] add tempfile.SpooledTemporaryFile to the text-only list --- doc/source/whatsnew/v1.3.3.rst | 1 + pandas/io/common.py | 45 +++++++++++++------------ pandas/tests/io/parser/test_encoding.py | 3 +- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 6bcce36bef93d..4bf5d7bead0a6 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -29,6 +29,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`) - Fixed regression in :meth:`DataFrame.groupby` where aggregation on columns with object types dropped results on those columns (:issue:`42395`, :issue:`43108`) - Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float`` ``Series`` with list-like fill value having a dtype which couldn't cast lostlessly (like ``float32`` filled with ``float64``) (:issue:`43424`) +- Fixed regression in :func:`read_csv` throwing an ``AttributeError`` when the file handle is an ``tempfile.SpooledTemporaryFile`` object (:issue:`43439`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/common.py b/pandas/io/common.py index dfd2c728fbed1..1058e98445284 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -17,6 +17,7 @@ ) import mmap import os +import tempfile from typing import ( IO, Any, @@ -723,25 +724,20 @@ def get_handle( # since get_handle would have opened it in binary mode is_wrapped = True elif is_text and (compression or _is_binary_mode(handle, ioargs.mode)): - try: - # GH43439: tempfile.SpooledTemporaryFile has no attribute 'readable' - handle = TextIOWrapper( - # error: Argument 1 to "TextIOWrapper" has incompatible type - # "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]"; - # expected "IO[bytes]" - handle, # type: ignore[arg-type] - encoding=ioargs.encoding, - errors=errors, - newline="", - ) - handles.append(handle) - # only marked as wrapped when the caller provided a handle - is_wrapped = not ( - isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close - ) - except AttributeError: - # read_csv(engine="c") might still be able to deal with binary handles - pass + handle = TextIOWrapper( + # error: Argument 1 to "TextIOWrapper" has incompatible type + # "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]"; + # expected "IO[bytes]" + handle, # type: ignore[arg-type] + encoding=ioargs.encoding, + errors=errors, + newline="", + ) + handles.append(handle) + # only marked as wrapped when the caller provided a handle + is_wrapped = not ( + isinstance(ioargs.filepath_or_buffer, str) or ioargs.should_close + ) handles.reverse() # close the most recently added buffer first if ioargs.should_close: @@ -998,8 +994,15 @@ def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool: if "t" in mode or "b" in mode: return "b" in mode - # classes that expect string but have 'b' in mode - text_classes = (codecs.StreamWriter, codecs.StreamReader, codecs.StreamReaderWriter) + # exceptions + text_classes = ( + # classes that expect string but have 'b' in mode + codecs.StreamWriter, + codecs.StreamReader, + codecs.StreamReaderWriter, + # cannot be wrapped in TextIOWrapper GH43439 + tempfile.SpooledTemporaryFile, + ) if issubclass(type(handle), text_classes): return False diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 78334db5805c3..cce612724e96e 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -252,7 +252,8 @@ def test_encoding_memory_map(all_parsers, encoding): tm.assert_frame_equal(df, expected) -def test_non_readable(all_parsers): +def test_not_readable(all_parsers): + # GH43439 parser = all_parsers if parser.engine == "python": pytest.skip("SpooledTemporaryFile does not work with Python engine") From eca6967838152f60ecb375e807f3d1db1892cfc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 9 Sep 2021 17:59:00 -0400 Subject: [PATCH 3/3] skip pyarrow, only works with the c-engine --- pandas/tests/io/parser/test_encoding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index cce612724e96e..680c437f7087e 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -255,8 +255,8 @@ def test_encoding_memory_map(all_parsers, encoding): def test_not_readable(all_parsers): # GH43439 parser = all_parsers - if parser.engine == "python": - pytest.skip("SpooledTemporaryFile does not work with Python engine") + if parser.engine in ("python", "pyarrow"): + pytest.skip("SpooledTemporaryFile does only work with the c-engine") with tempfile.SpooledTemporaryFile() as handle: handle.write(b"abcd") handle.seek(0)