Skip to content

Commit bd2ef82

Browse files
[3.12] gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612) (#108003)
Restore the global Input Stream pointer after trying to match a sub-pattern. . (cherry picked from commit abd9cc5) Co-authored-by: SKO <[email protected]>
1 parent 00bfed7 commit bd2ef82

File tree

4 files changed

+16
-9
lines changed

4 files changed

+16
-9
lines changed

Lib/re/_compiler.py

-7
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,6 @@ def _compile(code, pattern, flags):
100100
emit(ANY_ALL)
101101
else:
102102
emit(ANY)
103-
elif op is POSSESSIVE_REPEAT:
104-
# gh-106052: Possessive quantifiers do not work when the
105-
# subpattern contains backtracking, i.e. "(?:ab?c)*+".
106-
# Implement it as equivalent greedy qualifier in atomic group.
107-
p = [(MAX_REPEAT, av)]
108-
p = [(ATOMIC_GROUP, p)]
109-
_compile(code, p, flags)
110103
elif op in REPEATING_CODES:
111104
if flags & SRE_FLAG_TEMPLATE:
112105
raise error("internal: unsupported template operator %r" % (op,))

Lib/test/test_re.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -2366,6 +2366,16 @@ def test_template_function_and_flag_is_deprecated(self):
23662366
self.assertFalse(template_re1.match('nope'))
23672367

23682368
def test_bug_gh106052(self):
2369+
# gh-100061
2370+
self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2))
2371+
self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2))
2372+
self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2))
2373+
self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2))
2374+
self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0))
2375+
self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0))
2376+
self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2))
2377+
self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2))
2378+
# gh-106052
23692379
self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
23702380
self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
23712381
self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
@@ -2471,7 +2481,6 @@ def test_atomic_group(self):
24712481
17: SUCCESS
24722482
''')
24732483

2474-
@unittest.expectedFailure # gh-106052
24752484
def test_possesive_repeat_one(self):
24762485
self.assertEqual(get_debug_out(r'a?+'), '''\
24772486
POSSESSIVE_REPEAT 0 1
@@ -2484,7 +2493,6 @@ def test_possesive_repeat_one(self):
24842493
12: SUCCESS
24852494
''')
24862495

2487-
@unittest.expectedFailure # gh-106052
24882496
def test_possesive_repeat(self):
24892497
self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
24902498
POSSESSIVE_REPEAT 0 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a bug that causes wrong matches for regular expressions with possessive
2+
qualifier.

Modules/_sre/sre_lib.h

+4
Original file line numberDiff line numberDiff line change
@@ -1334,6 +1334,10 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
13341334
MARK_POP(ctx->lastmark);
13351335
LASTMARK_RESTORE();
13361336

1337+
/* Restore the global Input Stream pointer
1338+
since it can change after jumps. */
1339+
state->ptr = ptr;
1340+
13371341
/* We have sufficient matches, so exit loop. */
13381342
break;
13391343
}

0 commit comments

Comments
 (0)