Skip to content

Commit e2cf329

Browse files
committed
read_excel: call skiprows callback just once per row.
For callable skiprows arguments, the callback was being called several times in every row. Now, it is called just once and the result cached.
1 parent 0e47b18 commit e2cf329

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

pandas/io/excel/_base.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -817,20 +817,27 @@ def _parse_sheet(
817817
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
818818
**kwds,
819819
):
820-
try:
821820

821+
if callable(skiprows):
822+
# In order to avoid calling skiprows multiple times on
823+
# every row, we just do it here and keep the resulting
824+
# list for passing it down to the parser.
825+
skiprows = [ix for ix in range(len(data)) if skiprows(ix)]
826+
if len(skiprows) == 0:
827+
skiprows = None
828+
829+
try:
822830
# header indexes reference rows after removing skiprows, so we
823831
# create an index map from the without-skiprows to the
824832
# original indexes.
833+
ixmap: Union(range, list[int])
825834
if skiprows is None:
826-
ixmap = list(range(len(data)))
835+
ixmap = range(len(data))
827836
elif is_integer(skiprows):
828-
ixmap = list(range(skiprows, len(data)))
837+
ixmap = range(skiprows, len(data))
829838
elif is_list_like(skiprows):
830839
skiprows_set = set(cast(Sequence[int], skiprows))
831-
ixmap = [ix for ix, _ in enumerate(data) if ix not in skiprows_set]
832-
elif callable(skiprows):
833-
ixmap = [ix for ix, _ in enumerate(data) if not skiprows(ix)]
840+
ixmap = [ix for ix in range(len(data)) if ix not in skiprows_set]
834841
else:
835842
raise ValueError(
836843
"skiprows must be an integer or a list of integers"
Binary file not shown.

0 commit comments

Comments
 (0)