Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,7 @@ I/O
- Bug in the conversion from PyArrow to pandas (e.g. for reading Parquet) with nullable dtypes and a PyArrow array whose data buffer size is not a multiple of the dtype size (:issue:`40896`)
- Bug in :func:`read_excel` would raise an error when pandas could not determine the file type, even when user specified the ``engine`` argument (:issue:`41225`)
- Bug in :func:`read_clipboard` copying from an excel file shifts values into the wrong column if there are null values in first column (:issue:`41108`)
- Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)

Period
^^^^^^
Expand Down
7 changes: 4 additions & 3 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@
or ``StringIO``.
sheet_name : str, int, list, or None, default 0
Strings are used for sheet names. Integers are used in zero-indexed
sheet positions. Lists of strings/integers are used to request
multiple sheets. Specify None to get all sheets.
sheet positions (chart sheets do not count as a sheet position).
Lists of strings/integers are used to request multiple sheets.
Specify None to get all worksheets.

Available cases:

Expand All @@ -92,7 +93,7 @@
* ``"Sheet1"``: Load sheet with name "Sheet1"
* ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5"
as a dict of `DataFrame`
* None: All sheets.
* None: All worksheets.

header : int, list of int, default 0
Row (0-indexed) to use for the column labels of the parsed
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):

@property
def sheet_names(self) -> list[str]:
return self.book.sheetnames
return [sheet.title for sheet in self.book.worksheets]

def get_sheet_by_name(self, name: str):
self.raise_if_bad_sheet_by_name(name)
Expand Down
Binary file added pandas/tests/io/data/excel/chartsheet.xls
Binary file not shown.
Binary file added pandas/tests/io/data/excel/chartsheet.xlsb
Binary file not shown.
Binary file added pandas/tests/io/data/excel/chartsheet.xlsm
Binary file not shown.
Binary file added pandas/tests/io/data/excel/chartsheet.xlsx
Binary file not shown.
41 changes: 41 additions & 0 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,34 @@ def test_trailing_blanks(self, read_ext):
result = pd.read_excel(file_name)
assert result.shape == (3, 3)

def test_ignore_chartsheets_by_str(self, request, read_ext):
# GH 41448
if pd.read_excel.keywords["engine"] == "odf":
pytest.skip("chartsheets do not exist in the ODF format")
if pd.read_excel.keywords["engine"] == "pyxlsb":
request.node.add_marker(
pytest.mark.xfail(
reason="pyxlsb can't distinguish chartsheets from worksheets"
)
)
with pytest.raises(ValueError, match="Worksheet named 'Chart1' not found"):
pd.read_excel("chartsheet" + read_ext, sheet_name="Chart1")

def test_ignore_chartsheets_by_int(self, request, read_ext):
# GH 41448
if pd.read_excel.keywords["engine"] == "odf":
pytest.skip("chartsheets do not exist in the ODF format")
if pd.read_excel.keywords["engine"] == "pyxlsb":
request.node.add_marker(
pytest.mark.xfail(
reason="pyxlsb can't distinguish chartsheets from worksheets"
)
)
with pytest.raises(
ValueError, match="Worksheet index 1 is invalid, 1 worksheets found"
):
pd.read_excel("chartsheet" + read_ext, sheet_name=1)


class TestExcelFileRead:
@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -1501,6 +1529,19 @@ def test_engine_invalid_option(self, read_ext):
with pd.option_context(f"io.excel{read_ext}.reader", "abc"):
pass

def test_ignore_chartsheets(self, request, engine, read_ext):
# GH 41448
if engine == "odf":
pytest.skip("chartsheets do not exist in the ODF format")
if engine == "pyxlsb":
request.node.add_marker(
pytest.mark.xfail(
reason="pyxlsb can't distinguish chartsheets from worksheets"
)
)
with pd.ExcelFile("chartsheet" + read_ext) as excel:
assert excel.sheet_names == ["Sheet1"]

def test_corrupt_files_closed(self, request, engine, read_ext):
# GH41778
errors = (BadZipFile,)
Expand Down