Skip to content

gh-128641: Fix ConfigParser.read Perfomance Regression #129596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 24 additions & 29 deletions Lib/configparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@
import os
import re
import sys
import types

__all__ = ("NoSectionError", "DuplicateOptionError", "DuplicateSectionError",
"NoOptionError", "InterpolationError", "InterpolationDepthError",
Expand Down Expand Up @@ -562,35 +561,36 @@ def __init__(self):


class _Line(str):
__slots__ = 'clean', 'has_comments'

def __new__(cls, val, *args, **kwargs):
return super().__new__(cls, val)

def __init__(self, val, prefixes):
self.prefixes = prefixes
def __init__(self, val, comments):
trimmed = val.strip()
self.clean = comments.strip(trimmed)
self.has_comments = trimmed != self.clean

@functools.cached_property
def clean(self):
return self._strip_full() and self._strip_inline()

@property
def has_comments(self):
return self.strip() != self.clean

def _strip_inline(self):
"""
Search for the earliest prefix at the beginning of the line or following a space.
"""
matcher = re.compile(
'|'.join(fr'(^|\s)({re.escape(prefix)})' for prefix in self.prefixes.inline)
# match nothing if no prefixes
or '(?!)'
class _CommentSpec:
def __init__(self, full_prefixes, inline_prefixes):
full_patterns = (
# prefix at the beginning of a line
fr'^({re.escape(prefix)}).*'
for prefix in full_prefixes
)
match = matcher.search(self)
return self[:match.start() if match else None].strip()
inline_patterns = (
# prefix at the beginning of the line or following a space
fr'(^|\s)({re.escape(prefix)}.*)'
for prefix in inline_prefixes
)
self.pattern = re.compile('|'.join(itertools.chain(full_patterns, inline_patterns)))

def strip(self, text):
return self.pattern.sub('', text).rstrip()

def _strip_full(self):
return '' if any(map(self.strip().startswith, self.prefixes.full)) else True
def wrap(self, text):
return _Line(text, self)


class RawConfigParser(MutableMapping):
Expand Down Expand Up @@ -659,10 +659,7 @@ def __init__(self, defaults=None, dict_type=_default_dict,
else:
self._optcre = re.compile(self._OPT_TMPL.format(delim=d),
re.VERBOSE)
self._prefixes = types.SimpleNamespace(
full=tuple(comment_prefixes or ()),
inline=tuple(inline_comment_prefixes or ()),
)
self._comments = _CommentSpec(comment_prefixes or (), inline_comment_prefixes or ())
self._strict = strict
self._allow_no_value = allow_no_value
self._empty_lines_in_values = empty_lines_in_values
Expand Down Expand Up @@ -1057,7 +1054,6 @@ def _read(self, fp, fpname):
in an otherwise empty line or may be entered in lines holding values or
section names. Please note that comments get stripped off when reading configuration files.
"""

try:
ParsingError._raise_all(self._read_inner(fp, fpname))
finally:
Expand All @@ -1066,8 +1062,7 @@ def _read(self, fp, fpname):
def _read_inner(self, fp, fpname):
st = _ReadState()

Line = functools.partial(_Line, prefixes=self._prefixes)
for st.lineno, line in enumerate(map(Line, fp), start=1):
for st.lineno, line in enumerate(map(self._comments.wrap, fp), start=1):
if not line.clean:
if self._empty_lines_in_values:
# add empty line to the value, but only if there was no
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Restore :meth:`configparser.ConfigParser.read` performance.
Loading