Skip to content

Commit b185fac

Browse files
committed
read_excel: call skiprows callback just once per row.
For callable skiprows arguments, the callback was being called several times in every row. Now, it is called just once and the result cached.
1 parent 173d27a commit b185fac

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

pandas/io/excel/_base.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -817,8 +817,16 @@ def _parse_sheet(
817817
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
818818
**kwds,
819819
):
820-
try:
821820

821+
if callable(skiprows):
822+
# In order to avoid calling skiprows multiple times on
823+
# every row, we just do it here and keep the resulting
824+
# list for passing it down to the parser.
825+
skiprows = [ix for ix in range(len(data)) if skiprows(ix)]
826+
if len(skiprows) == 0:
827+
skiprows = None
828+
829+
try:
822830
# header indexes reference rows after removing skiprows, so we
823831
# create an index map from the without-skiprows to the
824832
# original indexes.
@@ -828,9 +836,7 @@ def _parse_sheet(
828836
ixmap = list(range(skiprows, len(data)))
829837
elif is_list_like(skiprows):
830838
skiprows_set = set(cast(Sequence[int], skiprows))
831-
ixmap = [ix for ix, _ in enumerate(data) if ix not in skiprows_set]
832-
elif callable(skiprows):
833-
ixmap = [ix for ix, _ in enumerate(data) if not skiprows(ix)]
839+
ixmap = [ix for ix in range(len(data)) if ix not in skiprows_set]
834840
else:
835841
raise ValueError(
836842
"skiprows must be an integer or a list of integers"

0 commit comments

Comments
 (0)