Skip to content

Commit ddc75ab

Browse files
committed
TST: read_excel: test multiindex headers with skip rows between.
read_excel function has several bugs regarding how it handles combinations of header, skiprows and index_col arguments. The tests here showcase some of them.
1 parent b0c4194 commit ddc75ab

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed
12.8 KB
Binary file not shown.

pandas/tests/io/excel/test_readers.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,3 +1729,53 @@ def test_corrupt_files_closed(self, engine, tmp_excel):
17291729
pd.ExcelFile(tmp_excel, engine=engine)
17301730
except errors:
17311731
pass
1732+
1733+
def test_mi_header_skiprows1(self, engine, read_ext):
1734+
if engine is None and read_ext == ".xlsx":
1735+
with open("test_mi_holes.xlsx", "rb") as f:
1736+
expected = pd.read_excel(f,
1737+
sheet_name='expected',
1738+
header=[0, 1],
1739+
index_col=[0, 1])
1740+
1741+
with open("test_mi_holes.xlsx", "rb") as f:
1742+
actual = pd.read_excel(f,
1743+
sheet_name='skiprows1',
1744+
skiprows=1,
1745+
header=[0, 1],
1746+
index_col=[0, 1])
1747+
1748+
tm.assert_frame_equal(expected, actual)
1749+
1750+
def test_mi_header_hole(self, engine, read_ext):
1751+
if engine is None and read_ext == ".xlsx":
1752+
with open("test_mi_holes.xlsx", "rb") as f:
1753+
expected = pd.read_excel(f,
1754+
sheet_name='expected',
1755+
header=[0, 1],
1756+
index_col=[0, 1])
1757+
1758+
with open("test_mi_holes.xlsx", "rb") as f:
1759+
actual = pd.read_excel(f,
1760+
sheet_name='header_hole',
1761+
skiprows=[1],
1762+
header=[0, 1],
1763+
index_col=[0, 1])
1764+
1765+
tm.assert_frame_equal(expected, actual)
1766+
1767+
def test_mi_header_and_index_holes(self, engine, read_ext):
1768+
if engine is None and read_ext == ".xlsx":
1769+
with open("test_mi_holes.xlsx", "rb") as f:
1770+
expected = pd.read_excel(f,
1771+
sheet_name='expected_index_hole',
1772+
header=[0, 1],
1773+
index_col=[0, 1])
1774+
1775+
with open("test_mi_holes.xlsx", "rb") as f:
1776+
actual = pd.read_excel(f,
1777+
sheet_name='index_hole',
1778+
header=[0, 2],
1779+
index_col=[0, 1])
1780+
1781+
tm.assert_frame_equal(expected, actual)

0 commit comments

Comments
 (0)