From 72dfe7bb96fcebe15ffe10140b10280515d7d20a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 21 Jan 2022 19:50:04 -0800 Subject: [PATCH 1/7] Use more context managers --- pandas/io/excel/_base.py | 11 ++++++----- pandas/tests/io/parser/test_python_parser_only.py | 5 ++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f159228b94545..2ce8be07dee48 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1266,11 +1266,12 @@ def inspect_excel_format( elif not peek.startswith(ZIP_SIGNATURE): return None - zf = zipfile.ZipFile(stream) - - # Workaround for some third party files that use forward slashes and - # lower case names. - component_names = [name.replace("\\", "/").lower() for name in zf.namelist()] + with zipfile.ZipFile(stream) as zf: + # Workaround for some third party files that use forward slashes and + # lower case names. + component_names = [ + name.replace("\\", "/").lower() for name in zf.namelist() + ] if "xl/workbook.xml" in component_names: return "xlsx" diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index 73a6c8226b554..999a6217efb68 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -167,9 +167,8 @@ def test_decompression_regex_sep(python_parser_only, csv1, compression, klass): klass = getattr(module, klass) with tm.ensure_clean() as path: - tmp = klass(path, mode="wb") - tmp.write(data) - tmp.close() + with klass(path, mode="wb") as tmp: + tmp.write(data) result = parser.read_csv(path, sep="::", compression=compression) tm.assert_frame_equal(result, expected) From 2152401786265dc097e6111b9e175c1336705882 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 13:38:19 -0800 Subject: [PATCH 2/7] Add more context managers --- doc/source/user_guide/io.rst | 15 ++++++--------- pandas/tests/io/test_common.py | 4 ++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index c51e91336b023..9e7de4bb2934e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -839,9 +839,8 @@ The simplest case is to just pass in ``parse_dates=True``: .. ipython:: python :suppress: - f = open("foo.csv", "w") - f.write("date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5") - f.close() + with open("foo.csv", "w") as f: + f.write("date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5") .. ipython:: python @@ -1452,7 +1451,6 @@ a different usage of the ``delimiter`` parameter: .. ipython:: python :suppress: - f = open("bar.csv", "w") data1 = ( "id8141 360.242940 149.910199 11950.7\n" "id1594 444.953632 166.985655 11788.4\n" @@ -1460,8 +1458,8 @@ a different usage of the ``delimiter`` parameter: "id1230 413.836124 184.375703 11916.8\n" "id1948 502.953953 173.237159 12468.3" ) - f.write(data1) - f.close() + with open("bar.csv", "w") as f: + f.write(data1) Consider a typical fixed-width data file: @@ -1604,9 +1602,8 @@ of multi-columns indices. :suppress: data = ",a,a,a,b,c,c\n,q,r,s,t,u,v\none,1,2,3,4,5,6\ntwo,7,8,9,10,11,12" - fh = open("mi2.csv", "w") - fh.write(data) - fh.close() + with open("mi2.csv", "w") as fh: + fh.write(data) .. ipython:: python diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b458f3351c860..7b7918a323c99 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -415,8 +415,8 @@ def test_constructor_bad_file(self, mmap_file): with pytest.raises(err, match=msg): icom._MMapWrapper(non_file) - target = open(mmap_file) - target.close() + with open(mmap_file) as target: + pass msg = "I/O operation on closed file" with pytest.raises(ValueError, match=msg): From c1ad1cc0b7c9f567f5ef76314d3fdcd41ceacf7d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 16:47:02 -0800 Subject: [PATCH 3/7] Context closing for openpyxl workbooks --- pandas/tests/io/excel/test_openpyxl.py | 56 +++++++++++++----------- pandas/tests/io/excel/test_style.py | 36 ++++++++------- pandas/tests/io/excel/test_xlsxwriter.py | 13 +++--- 3 files changed, 56 insertions(+), 49 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index e0d4a0c12ecdf..9f6e1ed9c08d9 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,3 +1,4 @@ +import contextlib from pathlib import Path import re @@ -159,12 +160,12 @@ def test_write_append_mode(ext, mode, expected): with ExcelWriter(f, engine="openpyxl", mode=mode) as writer: df.to_excel(writer, sheet_name="baz", index=False) - wb2 = openpyxl.load_workbook(f) - result = [sheet.title for sheet in wb2.worksheets] - assert result == expected + with contextlib.closing(openpyxl.load_workbook(f)) as wb2: + result = [sheet.title for sheet in wb2.worksheets] + assert result == expected - for index, cell_value in enumerate(expected): - assert wb2.worksheets[index]["A1"].value == cell_value + for index, cell_value in enumerate(expected): + assert wb2.worksheets[index]["A1"].value == cell_value @pytest.mark.parametrize( @@ -187,15 +188,14 @@ def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected ) as writer: df2.to_excel(writer, sheet_name="foo", index=False) - wb = openpyxl.load_workbook(f) - assert len(wb.sheetnames) == num_sheets - assert wb.sheetnames[0] == "foo" - result = pd.read_excel(wb, "foo", engine="openpyxl") - assert list(result["fruit"]) == expected - if len(wb.sheetnames) == 2: - result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl") - tm.assert_frame_equal(result, df2) - wb.close() + with contextlib.closing(openpyxl.load_workbook(f)) as wb: + assert len(wb.sheetnames) == num_sheets + assert wb.sheetnames[0] == "foo" + result = pd.read_excel(wb, "foo", engine="openpyxl") + assert list(result["fruit"]) == expected + if len(wb.sheetnames) == 2: + result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl") + tm.assert_frame_equal(result, df2) @pytest.mark.parametrize( @@ -279,9 +279,10 @@ def test_to_excel_with_openpyxl_engine(ext): def test_read_workbook(datapath, ext, read_only): # GH 39528 filename = datapath("io", "data", "excel", "test1" + ext) - wb = openpyxl.load_workbook(filename, read_only=read_only) - result = pd.read_excel(wb, engine="openpyxl") - wb.close() + with contextlib.closing( + openpyxl.load_workbook(filename, read_only=read_only) + ) as wb: + result = pd.read_excel(wb, engine="openpyxl") expected = pd.read_excel(filename) tm.assert_frame_equal(result, expected) @@ -313,9 +314,10 @@ def test_read_with_bad_dimension( if read_only is None: result = pd.read_excel(path, header=header) else: - wb = openpyxl.load_workbook(path, read_only=read_only) - result = pd.read_excel(wb, engine="openpyxl", header=header) - wb.close() + with contextlib.closing( + openpyxl.load_workbook(path, read_only=read_only) + ) as wb: + result = pd.read_excel(wb, engine="openpyxl", header=header) expected = DataFrame(expected_data) tm.assert_frame_equal(result, expected) @@ -349,9 +351,10 @@ def test_read_with_empty_trailing_rows(datapath, ext, read_only, request): if read_only is None: result = pd.read_excel(path) else: - wb = openpyxl.load_workbook(path, read_only=read_only) - result = pd.read_excel(wb, engine="openpyxl") - wb.close() + with contextlib.closing( + openpyxl.load_workbook(path, read_only=read_only) + ) as wb: + result = pd.read_excel(wb, engine="openpyxl") expected = DataFrame( { "Title": [np.nan, "A", 1, 2, 3], @@ -370,8 +373,9 @@ def test_read_empty_with_blank_row(datapath, ext, read_only): if read_only is None: result = pd.read_excel(path) else: - wb = openpyxl.load_workbook(path, read_only=read_only) - result = pd.read_excel(wb, engine="openpyxl") - wb.close() + with contextlib.closing( + openpyxl.load_workbook(path, read_only=read_only) + ) as wb: + result = pd.read_excel(wb, engine="openpyxl") expected = DataFrame() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 8a142aebd719d..1a92cc9672bfa 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -1,3 +1,5 @@ +import contextlib + import numpy as np import pytest @@ -37,13 +39,13 @@ def test_styler_to_excel_unstyled(engine): df.style.to_excel(writer, sheet_name="unstyled") openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl - wb = openpyxl.load_workbook(path) + with contextlib.closing(openpyxl.load_workbook(path)) as wb: - for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns): - assert len(col1) == len(col2) - for cell1, cell2 in zip(col1, col2): - assert cell1.value == cell2.value - assert_equal_cell_styles(cell1, cell2) + for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + assert cell1.value == cell2.value + assert_equal_cell_styles(cell1, cell2) shared_style_params = [ @@ -87,11 +89,11 @@ def test_styler_to_excel_basic(engine, css, attrs, expected): styler.to_excel(writer, sheet_name="styled") openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl - wb = openpyxl.load_workbook(path) + with contextlib.closing(openpyxl.load_workbook(path)) as wb: - # test unstyled data cell does not have expected styles - # test styled cell has expected styles - u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2) + # test unstyled data cell does not have expected styles + # test styled cell has expected styles + u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2) for attr in attrs: u_cell, s_cell = getattr(u_cell, attr), getattr(s_cell, attr) @@ -127,12 +129,12 @@ def test_styler_to_excel_basic_indexes(engine, css, attrs, expected): styler.to_excel(writer, sheet_name="styled") openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl - wb = openpyxl.load_workbook(path) + with contextlib.closing(openpyxl.load_workbook(path)) as wb: - # test null styled index cells does not have expected styles - # test styled cell has expected styles - ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1) - uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2) + # test null styled index cells does not have expected styles + # test styled cell has expected styles + ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1) + uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2) for attr in attrs: ui_cell, si_cell = getattr(ui_cell, attr), getattr(si_cell, attr) uc_cell, sc_cell = getattr(uc_cell, attr), getattr(sc_cell, attr) @@ -163,5 +165,5 @@ def custom_converter(css): writer, sheet_name="custom" ) - wb = openpyxl.load_workbook(path) - assert wb["custom"].cell(2, 2).font.color.value == "00111222" + with contextlib.closing(openpyxl.load_workbook(path)) as wb: + assert wb["custom"].cell(2, 2).font.color.value == "00111222" diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index 79d2f55a9b8ff..b5c1b47775089 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -1,3 +1,4 @@ +import contextlib import re import warnings @@ -34,12 +35,12 @@ def test_column_format(ext): col_format = write_workbook.add_format({"num_format": num_format}) write_worksheet.set_column("B:B", None, col_format) - read_workbook = openpyxl.load_workbook(path) - try: - read_worksheet = read_workbook["Sheet1"] - except TypeError: - # compat - read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1") + with contextlib.closing(openpyxl.load_workbook(path)) as read_workbook: + try: + read_worksheet = read_workbook["Sheet1"] + except TypeError: + # compat + read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1") # Get the number format from the cell. try: From 99c868db9a56621d7a378aa1fd544255a37a05ad Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 17:05:45 -0800 Subject: [PATCH 4/7] Context close xlrd objects --- pandas/io/excel/_base.py | 15 ++++++++++----- pandas/tests/io/excel/test_xlrd.py | 15 ++++++++------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 2ce8be07dee48..c9db2d93e7ec6 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -535,11 +535,16 @@ def load_workbook(self, filepath_or_buffer): pass def close(self) -> None: - if hasattr(self, "book") and hasattr(self.book, "close"): - # pyxlsb: opens a TemporaryFile - # openpyxl: https://stackoverflow.com/questions/31416842/ - # openpyxl-does-not-close-excel-workbook-in-read-only-mode - self.book.close() + if hasattr(self, "book"): + if hasattr(self.book, "close"): + # pyxlsb: opens a TemporaryFile + # openpyxl: https://stackoverflow.com/questions/31416842/ + # openpyxl-does-not-close-excel-workbook-in-read-only-mode + self.book.close() + elif hasattr(self.book, "release_resource"): + # xlrd + # https://github.com/python-excel/xlrd/blob/2.0.1/xlrd/book.py#L548 + self.book.release_resource() self.handles.close() @property diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index 2309187b8e9af..f2ae668dee5a3 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -45,13 +45,14 @@ def test_read_xlrd_book(read_ext_xlrd, frame): with tm.ensure_clean(read_ext_xlrd) as pth: df.to_excel(pth, sheet_name) - book = xlrd.open_workbook(pth) - - with ExcelFile(book, engine=engine) as xl: - result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) - tm.assert_frame_equal(df, result) - - result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0) + with xlrd.open_workbook(pth) as book: + with ExcelFile(book, engine=engine) as xl: + result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) + tm.assert_frame_equal(df, result) + + result = pd.read_excel( + book, sheet_name=sheet_name, engine=engine, index_col=0 + ) tm.assert_frame_equal(df, result) From 2d416a37ad3e09cb7e2a7e2c9de51d96ec63ef83 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 17:23:03 -0800 Subject: [PATCH 5/7] Fix method name --- pandas/io/excel/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c9db2d93e7ec6..3e1df9325713b 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -541,10 +541,10 @@ def close(self) -> None: # openpyxl: https://stackoverflow.com/questions/31416842/ # openpyxl-does-not-close-excel-workbook-in-read-only-mode self.book.close() - elif hasattr(self.book, "release_resource"): + elif hasattr(self.book, "release_resources"): # xlrd # https://github.com/python-excel/xlrd/blob/2.0.1/xlrd/book.py#L548 - self.book.release_resource() + self.book.release_resources() self.handles.close() @property From 9041ab7de373b8e18a01af9fa0e7a0b7e47aaba6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 18:57:52 -0800 Subject: [PATCH 6/7] more closing --- pandas/tests/io/parser/test_c_parser_only.py | 8 +++----- pandas/tests/io/sas/test_sas7bdat.py | 6 +++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 5df4470635af5..83cccdb37b343 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -592,11 +592,9 @@ def test_file_handles_mmap(c_parser_only, csv1): parser = c_parser_only with open(csv1) as f: - m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) - parser.read_csv(m) - - assert not m.closed - m.close() + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: + parser.read_csv(m) + assert not m.closed def test_file_binary_mode(c_parser_only): diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 5477559262cb8..1847d3634a550 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -1,3 +1,4 @@ +import contextlib from datetime import datetime import io import os @@ -135,9 +136,8 @@ def test_encoding_options(datapath): from pandas.io.sas.sas7bdat import SAS7BDATReader - rdr = SAS7BDATReader(fname, convert_header_text=False) - df3 = rdr.read() - rdr.close() + with contextlib.closing(SAS7BDATReader(fname, convert_header_text=False)) as rdr: + df3 = rdr.read() for x, y in zip(df1.columns, df3.columns): assert x == y.decode() From 6d16160a37266c706fa8d9603a9300a2e157d6b6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 18:59:19 -0800 Subject: [PATCH 7/7] Use mode --- doc/source/user_guide/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 9e7de4bb2934e..34f10c1b3ec28 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -839,7 +839,7 @@ The simplest case is to just pass in ``parse_dates=True``: .. ipython:: python :suppress: - with open("foo.csv", "w") as f: + with open("foo.csv", mode="w") as f: f.write("date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5") .. ipython:: python