Skip to content

Commit abd9cc5

Browse files
uyw4687Ma Lin
and
Ma Lin
authored
gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612)
Restore the global Input Stream pointer after trying to match a sub-pattern. Co-authored-by: Ma Lin <[email protected]>
1 parent a86df29 commit abd9cc5

File tree

4 files changed

+17
-10
lines changed

4 files changed

+17
-10
lines changed

Lib/re/_compiler.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,6 @@ def _compile(code, pattern, flags):
100100
emit(ANY_ALL)
101101
else:
102102
emit(ANY)
103-
elif op is POSSESSIVE_REPEAT:
104-
# gh-106052: Possessive quantifiers do not work when the
105-
# subpattern contains backtracking, i.e. "(?:ab?c)*+".
106-
# Implement it as equivalent greedy qualifier in atomic group.
107-
p = [(MAX_REPEAT, av)]
108-
p = [(ATOMIC_GROUP, p)]
109-
_compile(code, p, flags)
110103
elif op in REPEATING_CODES:
111104
if _simple(av[2]):
112105
emit(REPEATING_CODES[op][2])

Lib/test/test_re.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2342,7 +2342,17 @@ def test_bug_gh91616(self):
23422342
self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer
23432343
self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt"))
23442344

2345-
def test_bug_gh106052(self):
2345+
def test_bug_gh100061(self):
2346+
# gh-100061
2347+
self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2))
2348+
self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2))
2349+
self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2))
2350+
self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2))
2351+
self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0))
2352+
self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0))
2353+
self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2))
2354+
self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2))
2355+
# gh-106052
23462356
self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
23472357
self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
23482358
self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
@@ -2451,7 +2461,6 @@ def test_atomic_group(self):
24512461
17: SUCCESS
24522462
''')
24532463

2454-
@unittest.expectedFailure # gh-106052
24552464
def test_possesive_repeat_one(self):
24562465
self.assertEqual(get_debug_out(r'a?+'), '''\
24572466
POSSESSIVE_REPEAT 0 1
@@ -2464,7 +2473,6 @@ def test_possesive_repeat_one(self):
24642473
12: SUCCESS
24652474
''')
24662475

2467-
@unittest.expectedFailure # gh-106052
24682476
def test_possesive_repeat(self):
24692477
self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
24702478
POSSESSIVE_REPEAT 0 1
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a bug that causes wrong matches for regular expressions with possessive
2+
qualifier.

Modules/_sre/sre_lib.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,6 +1336,10 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
13361336
MARK_POP(ctx->lastmark);
13371337
LASTMARK_RESTORE();
13381338

1339+
/* Restore the global Input Stream pointer
1340+
since it can change after jumps. */
1341+
state->ptr = ptr;
1342+
13391343
/* We have sufficient matches, so exit loop. */
13401344
break;
13411345
}

0 commit comments

Comments
 (0)