Skip to content

Commit 305ccbe

Browse files
bpo-30298: Weaken the condition of deprecation warnings for inline modifiers. (#1490)
Now allowed several subsequential inline modifiers at the start of the pattern (e.g. '(?i)(?s)...'). In verbose mode whitespaces and comments now are allowed before and between inline modifiers (e.g. '(?x) (?i) (?s)...').
1 parent 211a392 commit 305ccbe

File tree

3 files changed

+56
-21
lines changed

3 files changed

+56
-21
lines changed

Lib/sre_parse.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True):
412412
sourcematch = source.match
413413
start = source.tell()
414414
while True:
415-
itemsappend(_parse(source, state, verbose))
415+
itemsappend(_parse(source, state, verbose, not nested and not items))
416416
if not sourcematch("|"):
417417
break
418418

@@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
466466
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
467467
return subpattern
468468

469-
def _parse(source, state, verbose):
469+
def _parse(source, state, verbose, first=False):
470470
# parse a simple pattern
471471
subpattern = SubPattern(state)
472472

@@ -730,10 +730,9 @@ def _parse(source, state, verbose):
730730
state.checklookbehindgroup(condgroup, source)
731731
elif char in FLAGS or char == "-":
732732
# flags
733-
pos = source.pos
734733
flags = _parse_flags(source, state, char)
735734
if flags is None: # global flags
736-
if pos != 3: # "(?x"
735+
if not first or subpattern:
737736
import warnings
738737
warnings.warn(
739738
'Flags not at the start of the expression %s%s' % (
@@ -742,6 +741,8 @@ def _parse(source, state, verbose):
742741
),
743742
DeprecationWarning, stacklevel=7
744743
)
744+
if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
745+
raise Verbose
745746
continue
746747
add_flags, del_flags = flags
747748
group = None
@@ -795,9 +796,6 @@ def _parse_flags(source, state, char):
795796
msg = "unknown flag" if char.isalpha() else "missing -, : or )"
796797
raise source.error(msg, len(char))
797798
if char == ")":
798-
if ((add_flags & SRE_FLAG_VERBOSE) and
799-
not (state.flags & SRE_FLAG_VERBOSE)):
800-
raise Verbose
801799
state.flags |= add_flags
802800
return None
803801
if add_flags & GLOBAL_FLAGS:

Lib/test/test_re.py

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,32 +1325,43 @@ def test_inline_flags(self):
13251325
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
13261326
lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
13271327

1328-
p = re.compile(upper_char, re.I | re.U)
1329-
q = p.match(lower_char)
1328+
p = re.compile('.' + upper_char, re.I | re.S)
1329+
q = p.match('\n' + lower_char)
13301330
self.assertTrue(q)
13311331

1332-
p = re.compile(lower_char, re.I | re.U)
1333-
q = p.match(upper_char)
1332+
p = re.compile('.' + lower_char, re.I | re.S)
1333+
q = p.match('\n' + upper_char)
13341334
self.assertTrue(q)
13351335

1336-
p = re.compile('(?i)' + upper_char, re.U)
1337-
q = p.match(lower_char)
1336+
p = re.compile('(?i).' + upper_char, re.S)
1337+
q = p.match('\n' + lower_char)
13381338
self.assertTrue(q)
13391339

1340-
p = re.compile('(?i)' + lower_char, re.U)
1341-
q = p.match(upper_char)
1340+
p = re.compile('(?i).' + lower_char, re.S)
1341+
q = p.match('\n' + upper_char)
13421342
self.assertTrue(q)
13431343

1344-
p = re.compile('(?iu)' + upper_char)
1345-
q = p.match(lower_char)
1344+
p = re.compile('(?is).' + upper_char)
1345+
q = p.match('\n' + lower_char)
13461346
self.assertTrue(q)
13471347

1348-
p = re.compile('(?iu)' + lower_char)
1349-
q = p.match(upper_char)
1348+
p = re.compile('(?is).' + lower_char)
1349+
q = p.match('\n' + upper_char)
13501350
self.assertTrue(q)
13511351

1352-
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
1353-
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
1352+
p = re.compile('(?s)(?i).' + upper_char)
1353+
q = p.match('\n' + lower_char)
1354+
self.assertTrue(q)
1355+
1356+
p = re.compile('(?s)(?i).' + lower_char)
1357+
q = p.match('\n' + upper_char)
1358+
self.assertTrue(q)
1359+
1360+
self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
1361+
self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
1362+
self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
1363+
self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
1364+
self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
13541365

13551366
p = upper_char + '(?i)'
13561367
with self.assertWarns(DeprecationWarning) as warns:
@@ -1368,6 +1379,26 @@ def test_inline_flags(self):
13681379
'Flags not at the start of the expression %s (truncated)' % p[:20]
13691380
)
13701381

1382+
with self.assertWarns(DeprecationWarning):
1383+
self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
1384+
with self.assertWarns(DeprecationWarning):
1385+
self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
1386+
with self.assertWarns(DeprecationWarning):
1387+
self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
1388+
with self.assertWarns(DeprecationWarning):
1389+
self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
1390+
with self.assertWarns(DeprecationWarning):
1391+
self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
1392+
with self.assertWarns(DeprecationWarning):
1393+
self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
1394+
with self.assertWarns(DeprecationWarning):
1395+
self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
1396+
lower_char))
1397+
with self.assertWarns(DeprecationWarning):
1398+
self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
1399+
lower_char))
1400+
1401+
13711402
def test_dollar_matches_twice(self):
13721403
"$ matches the end of string, and just before the terminating \n"
13731404
pattern = re.compile('$')

Misc/NEWS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,12 @@ Extension Modules
320320
Library
321321
-------
322322

323+
- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers.
324+
Now allowed several subsequential inline modifiers at the start of the
325+
pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments
326+
now are allowed before and between inline modifiers (e.g.
327+
``'(?x) (?i) (?s)...'``).
328+
323329
- bpo-30285: Optimized case-insensitive matching and searching of regular
324330
expressions.
325331

0 commit comments

Comments
 (0)