Skip to content

[3.12] gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612) #108003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions Lib/re/_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,6 @@ def _compile(code, pattern, flags):
emit(ANY_ALL)
else:
emit(ANY)
elif op is POSSESSIVE_REPEAT:
# gh-106052: Possessive quantifiers do not work when the
# subpattern contains backtracking, i.e. "(?:ab?c)*+".
# Implement it as equivalent greedy qualifier in atomic group.
p = [(MAX_REPEAT, av)]
p = [(ATOMIC_GROUP, p)]
_compile(code, p, flags)
elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error("internal: unsupported template operator %r" % (op,))
Expand Down
12 changes: 10 additions & 2 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -2366,6 +2366,16 @@ def test_template_function_and_flag_is_deprecated(self):
self.assertFalse(template_re1.match('nope'))

def test_bug_gh106052(self):
# gh-100061
self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0))
self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0))
self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2))
self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2))
# gh-106052
self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
Expand Down Expand Up @@ -2471,7 +2481,6 @@ def test_atomic_group(self):
17: SUCCESS
''')

@unittest.expectedFailure # gh-106052
def test_possesive_repeat_one(self):
self.assertEqual(get_debug_out(r'a?+'), '''\
POSSESSIVE_REPEAT 0 1
Expand All @@ -2484,7 +2493,6 @@ def test_possesive_repeat_one(self):
12: SUCCESS
''')

@unittest.expectedFailure # gh-106052
def test_possesive_repeat(self):
self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
POSSESSIVE_REPEAT 0 1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix a bug that causes wrong matches for regular expressions with possessive
qualifier.
4 changes: 4 additions & 0 deletions Modules/_sre/sre_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,10 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();

/* Restore the global Input Stream pointer
since it can change after jumps. */
state->ptr = ptr;

/* We have sufficient matches, so exit loop. */
break;
}
Expand Down