Skip to content

REF: Use more context managers to close files #45579

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 24, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -839,9 +839,8 @@ The simplest case is to just pass in ``parse_dates=True``:
.. ipython:: python
:suppress:

f = open("foo.csv", "w")
f.write("date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5")
f.close()
with open("foo.csv", "w") as f:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using mode="w" might be slightly clearer

f.write("date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5")

.. ipython:: python

Expand Down Expand Up @@ -1452,16 +1451,15 @@ a different usage of the ``delimiter`` parameter:
.. ipython:: python
:suppress:

f = open("bar.csv", "w")
data1 = (
"id8141 360.242940 149.910199 11950.7\n"
"id1594 444.953632 166.985655 11788.4\n"
"id1849 364.136849 183.628767 11806.2\n"
"id1230 413.836124 184.375703 11916.8\n"
"id1948 502.953953 173.237159 12468.3"
)
f.write(data1)
f.close()
with open("bar.csv", "w") as f:
f.write(data1)

Consider a typical fixed-width data file:

Expand Down Expand Up @@ -1604,9 +1602,8 @@ of multi-columns indices.
:suppress:

data = ",a,a,a,b,c,c\n,q,r,s,t,u,v\none,1,2,3,4,5,6\ntwo,7,8,9,10,11,12"
fh = open("mi2.csv", "w")
fh.write(data)
fh.close()
with open("mi2.csv", "w") as fh:
fh.write(data)

.. ipython:: python

Expand Down
26 changes: 16 additions & 10 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,11 +535,16 @@ def load_workbook(self, filepath_or_buffer):
pass

def close(self) -> None:
if hasattr(self, "book") and hasattr(self.book, "close"):
# pyxlsb: opens a TemporaryFile
# openpyxl: https://stackoverflow.com/questions/31416842/
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
self.book.close()
if hasattr(self, "book"):
if hasattr(self.book, "close"):
# pyxlsb: opens a TemporaryFile
# openpyxl: https://stackoverflow.com/questions/31416842/
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
self.book.close()
elif hasattr(self.book, "release_resources"):
# xlrd
# https://github.com/python-excel/xlrd/blob/2.0.1/xlrd/book.py#L548
self.book.release_resources()
self.handles.close()

@property
Expand Down Expand Up @@ -1266,11 +1271,12 @@ def inspect_excel_format(
elif not peek.startswith(ZIP_SIGNATURE):
return None

zf = zipfile.ZipFile(stream)

# Workaround for some third party files that use forward slashes and
# lower case names.
component_names = [name.replace("\\", "/").lower() for name in zf.namelist()]
with zipfile.ZipFile(stream) as zf:
# Workaround for some third party files that use forward slashes and
# lower case names.
component_names = [
name.replace("\\", "/").lower() for name in zf.namelist()
]

if "xl/workbook.xml" in component_names:
return "xlsx"
Expand Down
56 changes: 30 additions & 26 deletions pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
from pathlib import Path
import re

Expand Down Expand Up @@ -159,12 +160,12 @@ def test_write_append_mode(ext, mode, expected):
with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
df.to_excel(writer, sheet_name="baz", index=False)

wb2 = openpyxl.load_workbook(f)
result = [sheet.title for sheet in wb2.worksheets]
assert result == expected
with contextlib.closing(openpyxl.load_workbook(f)) as wb2:
result = [sheet.title for sheet in wb2.worksheets]
assert result == expected

for index, cell_value in enumerate(expected):
assert wb2.worksheets[index]["A1"].value == cell_value
for index, cell_value in enumerate(expected):
assert wb2.worksheets[index]["A1"].value == cell_value


@pytest.mark.parametrize(
Expand All @@ -187,15 +188,14 @@ def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected
) as writer:
df2.to_excel(writer, sheet_name="foo", index=False)

wb = openpyxl.load_workbook(f)
assert len(wb.sheetnames) == num_sheets
assert wb.sheetnames[0] == "foo"
result = pd.read_excel(wb, "foo", engine="openpyxl")
assert list(result["fruit"]) == expected
if len(wb.sheetnames) == 2:
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
tm.assert_frame_equal(result, df2)
wb.close()
with contextlib.closing(openpyxl.load_workbook(f)) as wb:
assert len(wb.sheetnames) == num_sheets
assert wb.sheetnames[0] == "foo"
result = pd.read_excel(wb, "foo", engine="openpyxl")
assert list(result["fruit"]) == expected
if len(wb.sheetnames) == 2:
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
tm.assert_frame_equal(result, df2)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -279,9 +279,10 @@ def test_to_excel_with_openpyxl_engine(ext):
def test_read_workbook(datapath, ext, read_only):
# GH 39528
filename = datapath("io", "data", "excel", "test1" + ext)
wb = openpyxl.load_workbook(filename, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
with contextlib.closing(
openpyxl.load_workbook(filename, read_only=read_only)
) as wb:
result = pd.read_excel(wb, engine="openpyxl")
expected = pd.read_excel(filename)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -313,9 +314,10 @@ def test_read_with_bad_dimension(
if read_only is None:
result = pd.read_excel(path, header=header)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl", header=header)
wb.close()
with contextlib.closing(
openpyxl.load_workbook(path, read_only=read_only)
) as wb:
result = pd.read_excel(wb, engine="openpyxl", header=header)
expected = DataFrame(expected_data)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -349,9 +351,10 @@ def test_read_with_empty_trailing_rows(datapath, ext, read_only, request):
if read_only is None:
result = pd.read_excel(path)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
with contextlib.closing(
openpyxl.load_workbook(path, read_only=read_only)
) as wb:
result = pd.read_excel(wb, engine="openpyxl")
expected = DataFrame(
{
"Title": [np.nan, "A", 1, 2, 3],
Expand All @@ -370,8 +373,9 @@ def test_read_empty_with_blank_row(datapath, ext, read_only):
if read_only is None:
result = pd.read_excel(path)
else:
wb = openpyxl.load_workbook(path, read_only=read_only)
result = pd.read_excel(wb, engine="openpyxl")
wb.close()
with contextlib.closing(
openpyxl.load_workbook(path, read_only=read_only)
) as wb:
result = pd.read_excel(wb, engine="openpyxl")
expected = DataFrame()
tm.assert_frame_equal(result, expected)
36 changes: 19 additions & 17 deletions pandas/tests/io/excel/test_style.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import contextlib

import numpy as np
import pytest

Expand Down Expand Up @@ -37,13 +39,13 @@ def test_styler_to_excel_unstyled(engine):
df.style.to_excel(writer, sheet_name="unstyled")

openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
with contextlib.closing(openpyxl.load_workbook(path)) as wb:

for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
assert len(col1) == len(col2)
for cell1, cell2 in zip(col1, col2):
assert cell1.value == cell2.value
assert_equal_cell_styles(cell1, cell2)
for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
assert len(col1) == len(col2)
for cell1, cell2 in zip(col1, col2):
assert cell1.value == cell2.value
assert_equal_cell_styles(cell1, cell2)


shared_style_params = [
Expand Down Expand Up @@ -87,11 +89,11 @@ def test_styler_to_excel_basic(engine, css, attrs, expected):
styler.to_excel(writer, sheet_name="styled")

openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
with contextlib.closing(openpyxl.load_workbook(path)) as wb:

# test unstyled data cell does not have expected styles
# test styled cell has expected styles
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
# test unstyled data cell does not have expected styles
# test styled cell has expected styles
u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
for attr in attrs:
u_cell, s_cell = getattr(u_cell, attr), getattr(s_cell, attr)

Expand Down Expand Up @@ -127,12 +129,12 @@ def test_styler_to_excel_basic_indexes(engine, css, attrs, expected):
styler.to_excel(writer, sheet_name="styled")

openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl
wb = openpyxl.load_workbook(path)
with contextlib.closing(openpyxl.load_workbook(path)) as wb:

# test null styled index cells does not have expected styles
# test styled cell has expected styles
ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
# test null styled index cells does not have expected styles
# test styled cell has expected styles
ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
for attr in attrs:
ui_cell, si_cell = getattr(ui_cell, attr), getattr(si_cell, attr)
uc_cell, sc_cell = getattr(uc_cell, attr), getattr(sc_cell, attr)
Expand Down Expand Up @@ -163,5 +165,5 @@ def custom_converter(css):
writer, sheet_name="custom"
)

wb = openpyxl.load_workbook(path)
assert wb["custom"].cell(2, 2).font.color.value == "00111222"
with contextlib.closing(openpyxl.load_workbook(path)) as wb:
assert wb["custom"].cell(2, 2).font.color.value == "00111222"
15 changes: 8 additions & 7 deletions pandas/tests/io/excel/test_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ def test_read_xlrd_book(read_ext_xlrd, frame):

with tm.ensure_clean(read_ext_xlrd) as pth:
df.to_excel(pth, sheet_name)
book = xlrd.open_workbook(pth)

with ExcelFile(book, engine=engine) as xl:
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
tm.assert_frame_equal(df, result)

result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0)
with xlrd.open_workbook(pth) as book:
with ExcelFile(book, engine=engine) as xl:
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
tm.assert_frame_equal(df, result)

result = pd.read_excel(
book, sheet_name=sheet_name, engine=engine, index_col=0
)
tm.assert_frame_equal(df, result)


Expand Down
13 changes: 7 additions & 6 deletions pandas/tests/io/excel/test_xlsxwriter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import re
import warnings

Expand Down Expand Up @@ -34,12 +35,12 @@ def test_column_format(ext):
col_format = write_workbook.add_format({"num_format": num_format})
write_worksheet.set_column("B:B", None, col_format)

read_workbook = openpyxl.load_workbook(path)
try:
read_worksheet = read_workbook["Sheet1"]
except TypeError:
# compat
read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
with contextlib.closing(openpyxl.load_workbook(path)) as read_workbook:
try:
read_worksheet = read_workbook["Sheet1"]
except TypeError:
# compat
read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")

# Get the number format from the cell.
try:
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/io/parser/test_python_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,8 @@ def test_decompression_regex_sep(python_parser_only, csv1, compression, klass):
klass = getattr(module, klass)

with tm.ensure_clean() as path:
tmp = klass(path, mode="wb")
tmp.write(data)
tmp.close()
with klass(path, mode="wb") as tmp:
tmp.write(data)

result = parser.read_csv(path, sep="::", compression=compression)
tm.assert_frame_equal(result, expected)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,8 @@ def test_constructor_bad_file(self, mmap_file):
with pytest.raises(err, match=msg):
icom._MMapWrapper(non_file)

target = open(mmap_file)
target.close()
with open(mmap_file) as target:
pass

msg = "I/O operation on closed file"
with pytest.raises(ValueError, match=msg):
Expand Down