From ed1f3726e9d25702ccd5b2f8e901f99dfff77164 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 29 Nov 2020 18:45:43 -0500 Subject: [PATCH] BUG/CLN: Minimize number of ResourceWarnings --- pandas/io/excel/_base.py | 3 ++ pandas/io/excel/_xlwt.py | 4 +- pandas/io/parsers.py | 6 ++- pandas/io/sas/sasreader.py | 7 +++- pandas/tests/io/excel/test_openpyxl.py | 13 +++--- pandas/tests/io/excel/test_style.py | 17 ++++---- pandas/tests/io/excel/test_writers.py | 44 ++++++++++----------- pandas/tests/io/excel/test_xlsxwriter.py | 19 +++++---- pandas/tests/io/parser/test_multi_thread.py | 19 +++++---- 9 files changed, 71 insertions(+), 61 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c519baa4c21da..fa4b86216b1e1 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -553,6 +553,9 @@ class ExcelWriter(metaclass=abc.ABCMeta): Default is to use xlwt for xls, openpyxl for xlsx, odf for ods. See DataFrame.to_excel for typical usage. + The writer should be used as a context manager. Otherwise, call `close()` to save + and close any opened file handles. + Parameters ---------- path : str or typing.BinaryIO diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index 9ede7cd0c2b95..9a725c15de61e 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -45,7 +45,9 @@ def save(self): """ Save workbook to disk. """ - self.book.save(self.handles.handle) + if self.sheets: + # fails when the ExcelWriter is just opened and then closed + self.book.save(self.handles.handle) def write_cells( self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 25e8d9acf4690..9cdc9871e1e07 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1881,7 +1881,11 @@ def __init__(self, src: FilePathOrBuffer, **kwds): # no attribute "mmap" [union-attr] self.handles.handle = self.handles.handle.mmap # type: ignore[union-attr] - self._reader = parsers.TextReader(self.handles.handle, **kwds) + try: + self._reader = parsers.TextReader(self.handles.handle, **kwds) + except Exception: + self.handles.close() + raise self.unnamed_cols = self._reader.unnamed_cols passed_names = self.names is None diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 3f0370209e9a8..d9cb9902b930a 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -6,7 +6,7 @@ from pandas._typing import FilePathOrBuffer, Label -from pandas.io.common import stringify_path +from pandas.io.common import IOHandles, stringify_path if TYPE_CHECKING: from pandas import DataFrame @@ -18,6 +18,8 @@ class ReaderBase(metaclass=ABCMeta): Protocol for XportReader and SAS7BDATReader classes. """ + handles: IOHandles + @abstractmethod def read(self, nrows=None): pass @@ -134,4 +136,5 @@ def read_sas( if iterator or chunksize: return reader - return reader.read() + with reader.handles: + return reader.read() diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 1349808277d81..3155e22d3ff5d 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -68,11 +68,11 @@ def test_write_cells_merge_styled(ext): ] with tm.ensure_clean(ext) as path: - writer = _OpenpyxlWriter(path) - writer.write_cells(initial_cells, sheet_name=sheet_name) - writer.write_cells(merge_cells, sheet_name=sheet_name) + with _OpenpyxlWriter(path) as writer: + writer.write_cells(initial_cells, sheet_name=sheet_name) + writer.write_cells(merge_cells, sheet_name=sheet_name) - wks = writer.sheets[sheet_name] + wks = writer.sheets[sheet_name] xcell_b1 = wks["B1"] xcell_a2 = wks["A2"] assert xcell_b1.font == openpyxl_sty_merged @@ -93,9 +93,8 @@ def test_write_append_mode(ext, mode, expected): wb.worksheets[1]["A1"].value = "bar" wb.save(f) - writer = ExcelWriter(f, engine="openpyxl", mode=mode) - df.to_excel(writer, sheet_name="baz", index=False) - writer.save() + with ExcelWriter(f, engine="openpyxl", mode=mode) as writer: + df.to_excel(writer, sheet_name="baz", index=False) wb2 = openpyxl.load_workbook(f) result = [sheet.title for sheet in wb2.worksheets] diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 936fc175a493b..6b1abebe0506a 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -68,15 +68,14 @@ def custom_converter(css): df = DataFrame(np.random.randn(11, 3)) with tm.ensure_clean(".xlsx" if engine != "xlwt" else ".xls") as path: - writer = ExcelWriter(path, engine=engine) - df.to_excel(writer, sheet_name="frame") - df.style.to_excel(writer, sheet_name="unstyled") - styled = df.style.apply(style, axis=None) - styled.to_excel(writer, sheet_name="styled") - ExcelFormatter(styled, style_converter=custom_converter).write( - writer, sheet_name="custom" - ) - writer.save() + with ExcelWriter(path, engine=engine) as writer: + df.to_excel(writer, sheet_name="frame") + df.style.to_excel(writer, sheet_name="unstyled") + styled = df.style.apply(style, axis=None) + styled.to_excel(writer, sheet_name="styled") + ExcelFormatter(styled, style_converter=custom_converter).write( + writer, sheet_name="custom" + ) if engine not in ("openpyxl", "xlsxwriter"): # For other engines, we only smoke test diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 8da9c79160e91..e64297c3c8d1a 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -522,10 +522,9 @@ def test_sheets(self, frame, tsframe, path): frame.to_excel(path, "test1", index=False) # Test writing to separate sheets - writer = ExcelWriter(path) - frame.to_excel(writer, "test1") - tsframe.to_excel(writer, "test2") - writer.close() + with ExcelWriter(path) as writer: + frame.to_excel(writer, "test1") + tsframe.to_excel(writer, "test2") reader = ExcelFile(path) recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(frame, recons) @@ -1199,17 +1198,16 @@ def test_datetimes(self, path): def test_bytes_io(self, engine): # see gh-7074 - bio = BytesIO() - df = DataFrame(np.random.randn(10, 2)) + with BytesIO() as bio: + df = DataFrame(np.random.randn(10, 2)) - # Pass engine explicitly, as there is no file path to infer from. - writer = ExcelWriter(bio, engine=engine) - df.to_excel(writer) - writer.save() + # Pass engine explicitly, as there is no file path to infer from. + with ExcelWriter(bio, engine=engine) as writer: + df.to_excel(writer) - bio.seek(0) - reread_df = pd.read_excel(bio, index_col=0) - tm.assert_frame_equal(df, reread_df) + bio.seek(0) + reread_df = pd.read_excel(bio, index_col=0) + tm.assert_frame_equal(df, reread_df) def test_write_lists_dict(self, path): # see gh-8188. @@ -1317,12 +1315,12 @@ class TestExcelWriterEngineTests: ) def test_ExcelWriter_dispatch(self, klass, ext): with tm.ensure_clean(ext) as path: - writer = ExcelWriter(path) - if ext == ".xlsx" and td.safe_import("xlsxwriter"): - # xlsxwriter has preference over openpyxl if both installed - assert isinstance(writer, _XlsxWriter) - else: - assert isinstance(writer, klass) + with ExcelWriter(path) as writer: + if ext == ".xlsx" and td.safe_import("xlsxwriter"): + # xlsxwriter has preference over openpyxl if both installed + assert isinstance(writer, _XlsxWriter) + else: + assert isinstance(writer, klass) def test_ExcelWriter_dispatch_raises(self): with pytest.raises(ValueError, match="No engine"): @@ -1356,8 +1354,8 @@ def check_called(func): path = "something.xlsx" with tm.ensure_clean(path) as filepath: register_writer(DummyClass) - writer = ExcelWriter(filepath) - assert isinstance(writer, DummyClass) + with ExcelWriter(filepath) as writer: + assert isinstance(writer, DummyClass) df = tm.makeCustomDataframe(1, 1) check_called(lambda: df.to_excel(filepath)) with tm.ensure_clean("something.xls") as filepath: @@ -1377,5 +1375,5 @@ def test_excelfile_fspath(self): def test_excelwriter_fspath(self): with tm.ensure_clean("foo.xlsx") as path: - writer = ExcelWriter(path) - assert os.fspath(writer) == str(path) + with ExcelWriter(path) as writer: + assert os.fspath(writer) == str(path) diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index b6f791434a92b..6de378f6a3d3e 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -23,16 +23,15 @@ def test_column_format(ext): with tm.ensure_clean(ext) as path: frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]}) - writer = ExcelWriter(path) - frame.to_excel(writer) - - # Add a number format to col B and ensure it is applied to cells. - num_format = "#,##0" - write_workbook = writer.book - write_worksheet = write_workbook.worksheets()[0] - col_format = write_workbook.add_format({"num_format": num_format}) - write_worksheet.set_column("B:B", None, col_format) - writer.save() + with ExcelWriter(path) as writer: + frame.to_excel(writer) + + # Add a number format to col B and ensure it is applied to cells. + num_format = "#,##0" + write_workbook = writer.book + write_worksheet = write_workbook.worksheets()[0] + col_format = write_workbook.add_format({"num_format": num_format}) + write_worksheet.set_column("B:B", None, col_format) read_workbook = openpyxl.load_workbook(path) try: diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index d50560c684084..123dce2048a44 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -2,6 +2,7 @@ Tests multithreading behaviour for reading and parsing files for each parser defined in parsers.py """ +from contextlib import ExitStack from io import BytesIO from multiprocessing.pool import ThreadPool @@ -46,16 +47,18 @@ def test_multi_thread_string_io_read_csv(all_parsers): "\n".join([f"{i:d},{i:d},{i:d}" for i in range(max_row_range)]).encode() for _ in range(num_files) ] - files = [BytesIO(b) for b in bytes_to_df] # Read all files in many threads. - pool = ThreadPool(8) + with ExitStack() as stack: + files = [stack.enter_context(BytesIO(b)) for b in bytes_to_df] - results = pool.map(parser.read_csv, files) - first_result = results[0] + pool = stack.enter_context(ThreadPool(8)) - for result in results: - tm.assert_frame_equal(first_result, result) + results = pool.map(parser.read_csv, files) + first_result = results[0] + + for result in results: + tm.assert_frame_equal(first_result, result) def _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks): @@ -116,8 +119,8 @@ def reader(arg): (num_rows * i // num_tasks, num_rows // num_tasks) for i in range(num_tasks) ] - pool = ThreadPool(processes=num_tasks) - results = pool.map(reader, tasks) + with ThreadPool(processes=num_tasks) as pool: + results = pool.map(reader, tasks) header = results[0].columns