Skip to content

Commit e7303fe

Browse files
authored
[Python] Use raw string literals for regexes (#120401)
Previously these backslashes were not followed by a valid escape sequence character so were treated as literal backslashes, which was the intended behaviour of the code. However python as of 3.12 has started warning about these, so we should use raw string literals for regexes so that backslashes are always interpreted literally. I've done this for every regex in this file for consistency, including the ones which do not contain backslashes.
1 parent 6da676a commit e7303fe

File tree

1 file changed

+19
-19
lines changed

1 file changed

+19
-19
lines changed

llvm/utils/extract_symbols.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,12 @@ def nm_get_symbols(tool, lib):
5353
# The -P flag displays the size field for symbols only when applicable,
5454
# so the last field is optional. There's no space after the value field,
5555
# but \s+ match newline also, so \s+\S* will match the optional size field.
56-
match = re.match("^(\S+)\s+[BDGRSTuVW]\s+\S+\s+\S*$", line)
56+
match = re.match(r"^(\S+)\s+[BDGRSTuVW]\s+\S+\s+\S*$", line)
5757
if match:
5858
yield (match.group(1), True)
5959
# Look for undefined symbols, which have type U and may or may not
6060
# (depending on which nm is being used) have value and size.
61-
match = re.match("^(\S+)\s+U\s+(\S+\s+\S*)?$", line)
61+
match = re.match(r"^(\S+)\s+U\s+(\S+\s+\S*)?$", line)
6262
if match:
6363
yield (match.group(1), False)
6464
process.wait()
@@ -71,7 +71,7 @@ def readobj_is_32bit_windows(tool, lib):
7171
[tool, "--file-header", lib], universal_newlines=True
7272
)
7373
for line in output.splitlines():
74-
match = re.match("Format: (\S+)", line)
74+
match = re.match(r"Format: (\S+)", line)
7575
if match:
7676
return match.group(1) == "COFF-i386"
7777
return False
@@ -85,7 +85,7 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
8585
if not "?" in symbol:
8686
if calling_convention_decoration:
8787
# Remove calling convention decoration from names
88-
match = re.match("[_@]([^@]+)", symbol)
88+
match = re.match(r"[_@]([^@]+)", symbol)
8989
if match:
9090
symbol = match.group(1)
9191
# Discard floating point/SIMD constants.
@@ -100,10 +100,10 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
100100
# An anonymous namespace is mangled as ?A(maybe hex number)@. Any symbol
101101
# that mentions an anonymous namespace can be discarded, as the anonymous
102102
# namespace doesn't exist outside of that translation unit.
103-
elif re.search("\?A(0x\w+)?@", symbol):
103+
elif re.search(r"\?A(0x\w+)?@", symbol):
104104
return None
105105
# Skip X86GenMnemonicTables functions, they are not exposed from llvm/include/.
106-
elif re.match("\?is[A-Z0-9]*@X86@llvm", symbol):
106+
elif re.match(r"\?is[A-Z0-9]*@X86@llvm", symbol):
107107
return None
108108
# Keep mangled llvm:: and clang:: function symbols. How we detect these is a
109109
# bit of a mess and imprecise, but that avoids having to completely demangle
@@ -123,7 +123,7 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
123123
# ::= .+@ (list of types)
124124
# ::= .*Z (list of types, varargs)
125125
# <throw-spec> ::= exceptions are not allowed
126-
elif re.search("(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$", symbol):
126+
elif re.search(r"(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$", symbol):
127127
return symbol
128128
return None
129129

@@ -140,7 +140,7 @@ def should_keep_itanium_symbol(symbol, calling_convention_decoration):
140140
if not symbol.startswith("_") and not symbol.startswith("."):
141141
return symbol
142142
# Discard manglings that aren't nested names
143-
match = re.match("\.?_Z(T[VTIS])?(N.+)", symbol)
143+
match = re.match(r"\.?_Z(T[VTIS])?(N.+)", symbol)
144144
if not match:
145145
return None
146146
# Demangle the name. If the name is too complex then we don't need to keep
@@ -169,19 +169,19 @@ class TooComplexName(Exception):
169169
# (name, rest of string) pair.
170170
def parse_itanium_name(arg):
171171
# Check for a normal name
172-
match = re.match("(\d+)(.+)", arg)
172+
match = re.match(r"(\d+)(.+)", arg)
173173
if match:
174174
n = int(match.group(1))
175175
name = match.group(1) + match.group(2)[:n]
176176
rest = match.group(2)[n:]
177177
return name, rest
178178
# Check for constructor/destructor names
179-
match = re.match("([CD][123])(.+)", arg)
179+
match = re.match(r"([CD][123])(.+)", arg)
180180
if match:
181181
return match.group(1), match.group(2)
182182
# Assume that a sequence of characters that doesn't end a nesting is an
183183
# operator (this is very imprecise, but appears to be good enough)
184-
match = re.match("([^E]+)(.+)", arg)
184+
match = re.match(r"([^E]+)(.+)", arg)
185185
if match:
186186
return match.group(1), match.group(2)
187187
# Anything else: we can't handle it
@@ -196,13 +196,13 @@ def skip_itanium_template(arg):
196196
tmp = arg[1:]
197197
while tmp:
198198
# Check for names
199-
match = re.match("(\d+)(.+)", tmp)
199+
match = re.match(r"(\d+)(.+)", tmp)
200200
if match:
201201
n = int(match.group(1))
202202
tmp = match.group(2)[n:]
203203
continue
204204
# Check for substitutions
205-
match = re.match("S[A-Z0-9]*_(.+)", tmp)
205+
match = re.match(r"S[A-Z0-9]*_(.+)", tmp)
206206
if match:
207207
tmp = match.group(1)
208208
# Start of a template
@@ -231,14 +231,14 @@ def parse_itanium_nested_name(arg):
231231
ret = []
232232

233233
# Skip past the N, and possibly a substitution
234-
match = re.match("NS[A-Z0-9]*_(.+)", arg)
234+
match = re.match(r"NS[A-Z0-9]*_(.+)", arg)
235235
if match:
236236
tmp = match.group(1)
237237
else:
238238
tmp = arg[1:]
239239

240240
# Skip past CV-qualifiers and ref qualifiers
241-
match = re.match("[rVKRO]*(.+)", tmp)
241+
match = re.match(r"[rVKRO]*(.+)", tmp)
242242
if match:
243243
tmp = match.group(1)
244244

@@ -280,19 +280,19 @@ def parse_microsoft_mangling(arg):
280280
if arg.startswith("@"):
281281
return components
282282
# Check for a simple name
283-
match = re.match("(\w+)@(.+)", arg)
283+
match = re.match(r"(\w+)@(.+)", arg)
284284
if match:
285285
components.append((match.group(1), False))
286286
arg = match.group(2)
287287
continue
288288
# Check for a special function name
289-
match = re.match("(\?_?\w)(.+)", arg)
289+
match = re.match(r"(\?_?\w)(.+)", arg)
290290
if match:
291291
components.append((match.group(1), False))
292292
arg = match.group(2)
293293
continue
294294
# Check for a template name
295-
match = re.match("\?\$(\w+)@[^@]+@(.+)", arg)
295+
match = re.match(r"\?\$(\w+)@[^@]+@(.+)", arg)
296296
if match:
297297
components.append((match.group(1), True))
298298
arg = match.group(2)
@@ -323,7 +323,7 @@ def get_template_name(sym, mangling):
323323
if mangling == "microsoft":
324324
names = parse_microsoft_mangling(sym)
325325
else:
326-
match = re.match("\.?_Z(T[VTIS])?(N.+)", sym)
326+
match = re.match(r"\.?_Z(T[VTIS])?(N.+)", sym)
327327
if match:
328328
names, _ = parse_itanium_nested_name(match.group(2))
329329
else:

0 commit comments

Comments
 (0)