-
Notifications
You must be signed in to change notification settings - Fork 13.6k
fix(llvm/**.py): fix invalid escape sequences #94035
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not a huge deal, but LLVM uses a different commit naming style than whatever you're using here:
When the changes are restricted to a specific part of the code (e.g. a back-end or optimization pass), it is customary to add a tag to the beginning of the line in square brackets. For example, “[SCEV] …” or “[OpenMP] …”. This helps email filters and searches for post-commit reviews.
(From the developer policy)
@llvm/pr-subscribers-backend-nvptx Author: Eisuke Kawashima (e-kwsm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/94035.diff 12 Files Affected:
diff --git a/llvm/test/CodeGen/NVPTX/wmma.py b/llvm/test/CodeGen/NVPTX/wmma.py
index e1e46f0b8cab3..8f6378e3ce3cc 100644
--- a/llvm/test/CodeGen/NVPTX/wmma.py
+++ b/llvm/test/CodeGen/NVPTX/wmma.py
@@ -847,7 +847,7 @@ def gen_check_unsupported_ops(items):
)
print("; INTRINSICS: {{^; INTRINSICS_LIST_BEGIN}}")
print(
- """
+ r"""
; NOEXTGEOM-NOT: {{m8n32|m32n8}}
; NOINT-NOT: .{{s32|s8}}
diff --git a/llvm/tools/opt-viewer/opt-viewer.py b/llvm/tools/opt-viewer/opt-viewer.py
index c9c7db726f765..2d32d66bbe3ae 100755
--- a/llvm/tools/opt-viewer/opt-viewer.py
+++ b/llvm/tools/opt-viewer/opt-viewer.py
@@ -135,7 +135,7 @@ def render_inline_remarks(self, r, line):
# Column is the number of characters *including* tabs, keep those and
# replace everything else with spaces.
indent = line[: max(r.Column, 1) - 1]
- indent = re.sub("\S", " ", indent)
+ indent = re.sub(r"\S", " ", indent)
# Create expanded message and link if we have a multiline message.
lines = r.message.split("\n")
diff --git a/llvm/utils/DSAclean.py b/llvm/utils/DSAclean.py
index 1b833ff893248..6937ca9e614f5 100755
--- a/llvm/utils/DSAclean.py
+++ b/llvm/utils/DSAclean.py
@@ -24,7 +24,7 @@
# it would kill old computers
buffer = input.readline()
while buffer != "":
- if re.compile('label(\s*)=(\s*)"\s%tmp(.\w*)*(\s*)"').search(buffer):
+ if re.compile(r'label(\s*)=(\s*)"\s%tmp(.\w*)*(\s*)"').search(buffer):
# skip next line, write neither this line nor the next
buffer = input.readline()
else:
diff --git a/llvm/utils/DSAextract.py b/llvm/utils/DSAextract.py
index 96f818bd2a831..9787922e5f674 100755
--- a/llvm/utils/DSAextract.py
+++ b/llvm/utils/DSAextract.py
@@ -69,7 +69,7 @@
# if this name is for the current node, add the dot variable name
# for the node (it will be Node(hex number)) to our set of nodes
if regexp.search(buffer):
- node_set |= set([re.split("\s+", buffer, 2)[1]])
+ node_set |= set([re.split(r"\s+", buffer, 2)[1]])
break
buffer = input.readline()
@@ -105,7 +105,7 @@
if nodes[0][:13] in node_set and nodes[1][:13] in node_set:
output.write(buffer)
elif nodeexp.search(buffer): # this is a node line
- node = re.split("\s+", buffer, 2)[1]
+ node = re.split(r"\s+", buffer, 2)[1]
if node in node_set:
output.write(buffer)
else: # this is a support line
diff --git a/llvm/utils/add_argument_names.py b/llvm/utils/add_argument_names.py
index 2860dd4b90bcb..af2f1297d9ee7 100755
--- a/llvm/utils/add_argument_names.py
+++ b/llvm/utils/add_argument_names.py
@@ -4,7 +4,7 @@
def fix_string(s):
TYPE = re.compile(
- '\s*(i[0-9]+|float|double|x86_fp80|fp128|ppc_fp128|\[\[.*?\]\]|\[2 x \[\[[A-Z_0-9]+\]\]\]|<.*?>|{.*?}|\[[0-9]+ x .*?\]|%["a-z:A-Z0-9._]+({{.*?}})?|%{{.*?}}|{{.*?}}|\[\[.*?\]\])(\s*(\*|addrspace\(.*?\)|dereferenceable\(.*?\)|byval\(.*?\)|sret|zeroext|inreg|returned|signext|nocapture|align \d+|swiftself|swifterror|readonly|noalias|inalloca|nocapture))*\s*'
+ r'\s*(i[0-9]+|float|double|x86_fp80|fp128|ppc_fp128|\[\[.*?\]\]|\[2 x \[\[[A-Z_0-9]+\]\]\]|<.*?>|{.*?}|\[[0-9]+ x .*?\]|%["a-z:A-Z0-9._]+({{.*?}})?|%{{.*?}}|{{.*?}}|\[\[.*?\]\])(\s*(\*|addrspace\(.*?\)|dereferenceable\(.*?\)|byval\(.*?\)|sret|zeroext|inreg|returned|signext|nocapture|align \d+|swiftself|swifterror|readonly|noalias|inalloca|nocapture))*\s*'
)
counter = 0
diff --git a/llvm/utils/convert-constraint-log-to-z3.py b/llvm/utils/convert-constraint-log-to-z3.py
index a3c33f2ef4599..f96ac3106f68b 100755
--- a/llvm/utils/convert-constraint-log-to-z3.py
+++ b/llvm/utils/convert-constraint-log-to-z3.py
@@ -46,7 +46,7 @@ def main():
content = f.read()
groups = content.split("---")
- var_re = re.compile("x\d+")
+ var_re = re.compile(r"x\d+")
print("from z3 import *")
for group in groups:
diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py
index 10fdf14acd158..726196c28ce1c 100755
--- a/llvm/utils/extract_symbols.py
+++ b/llvm/utils/extract_symbols.py
@@ -53,12 +53,12 @@ def nm_get_symbols(tool, lib):
# The -P flag displays the size field for symbols only when applicable,
# so the last field is optional. There's no space after the value field,
# but \s+ match newline also, so \s+\S* will match the optional size field.
- match = re.match("^(\S+)\s+[BDGRSTuVW]\s+\S+\s+\S*$", line)
+ match = re.match(r"^(\S+)\s+[BDGRSTuVW]\s+\S+\s+\S*$", line)
if match:
yield (match.group(1), True)
# Look for undefined symbols, which have type U and may or may not
# (depending on which nm is being used) have value and size.
- match = re.match("^(\S+)\s+U\s+(\S+\s+\S*)?$", line)
+ match = re.match(r"^(\S+)\s+U\s+(\S+\s+\S*)?$", line)
if match:
yield (match.group(1), False)
process.wait()
@@ -71,7 +71,7 @@ def readobj_is_32bit_windows(tool, lib):
[tool, "--file-header", lib], universal_newlines=True
)
for line in output.splitlines():
- match = re.match("Format: (\S+)", line)
+ match = re.match(r"Format: (\S+)", line)
if match:
return match.group(1) == "COFF-i386"
return False
@@ -100,10 +100,10 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
# An anonymous namespace is mangled as ?A(maybe hex number)@. Any symbol
# that mentions an anonymous namespace can be discarded, as the anonymous
# namespace doesn't exist outside of that translation unit.
- elif re.search("\?A(0x\w+)?@", symbol):
+ elif re.search(r"\?A(0x\w+)?@", symbol):
return None
# Skip X86GenMnemonicTables functions, they are not exposed from llvm/include/.
- elif re.match("\?is[A-Z0-9]*@X86@llvm", symbol):
+ elif re.match(r"\?is[A-Z0-9]*@X86@llvm", symbol):
return None
# Keep mangled llvm:: and clang:: function symbols. How we detect these is a
# bit of a mess and imprecise, but that avoids having to completely demangle
@@ -169,7 +169,7 @@ class TooComplexName(Exception):
# (name, rest of string) pair.
def parse_itanium_name(arg):
# Check for a normal name
- match = re.match("(\d+)(.+)", arg)
+ match = re.match(r"(\d+)(.+)", arg)
if match:
n = int(match.group(1))
name = match.group(1) + match.group(2)[:n]
@@ -196,7 +196,7 @@ def skip_itanium_template(arg):
tmp = arg[1:]
while tmp:
# Check for names
- match = re.match("(\d+)(.+)", tmp)
+ match = re.match(r"(\d+)(.+)", tmp)
if match:
n = int(match.group(1))
tmp = match.group(2)[n:]
@@ -280,19 +280,19 @@ def parse_microsoft_mangling(arg):
if arg.startswith("@"):
return components
# Check for a simple name
- match = re.match("(\w+)@(.+)", arg)
+ match = re.match(r"(\w+)@(.+)", arg)
if match:
components.append((match.group(1), False))
arg = match.group(2)
continue
# Check for a special function name
- match = re.match("(\?_?\w)(.+)", arg)
+ match = re.match(r"(\?_?\w)(.+)", arg)
if match:
components.append((match.group(1), False))
arg = match.group(2)
continue
# Check for a template name
- match = re.match("\?\$(\w+)@[^@]+@(.+)", arg)
+ match = re.match(r"\?\$(\w+)@[^@]+@(.+)", arg)
if match:
components.append((match.group(1), True))
arg = match.group(2)
diff --git a/llvm/utils/extract_vplan.py b/llvm/utils/extract_vplan.py
index cff6f5074d771..96b9fd1b9a936 100755
--- a/llvm/utils/extract_vplan.py
+++ b/llvm/utils/extract_vplan.py
@@ -24,7 +24,7 @@
matches = re.findall(pattern, sys.stdin.read())
for vplan in matches:
- m = re.search("graph \[.+(VF=.+,UF.+)", vplan)
+ m = re.search(r"graph \[.+(VF=.+,UF.+)", vplan)
if not m:
raise ValueError("Can't get the right VPlan name")
name = re.sub("[^a-zA-Z0-9]", "", m.group(1))
diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py
index 1766ccb38ba25..be9e3ed4b611f 100755
--- a/llvm/utils/git/github-automation.py
+++ b/llvm/utils/git/github-automation.py
@@ -52,9 +52,9 @@ def escape_description(str):
# https://github.com/github/markup/issues/1168#issuecomment-494946168
str = html.escape(str, False)
# '@' followed by alphanum is a user name
- str = re.sub("@(?=\w)", "@<!-- -->", str)
+ str = re.sub(r"@(?=\w)", "@<!-- -->", str)
# '#' followed by digits is considered an issue number
- str = re.sub("#(?=\d)", "#<!-- -->", str)
+ str = re.sub(r"#(?=\d)", "#<!-- -->", str)
return str
@@ -483,7 +483,7 @@ def issue_remove_cherry_pick_failed_label(self):
def get_main_commit(self, cherry_pick_sha: str) -> github.Commit.Commit:
commit = self.repo.get_commit(cherry_pick_sha)
message = commit.commit.message
- m = re.search("\(cherry picked from commit ([0-9a-f]+)\)", message)
+ m = re.search(r"\(cherry picked from commit ([0-9a-f]+)\)", message)
if not m:
return None
return self.repo.get_commit(m.group(1))
diff --git a/llvm/utils/indirect_calls.py b/llvm/utils/indirect_calls.py
index 2bdabc8c4d74f..c96d780d9faa1 100755
--- a/llvm/utils/indirect_calls.py
+++ b/llvm/utils/indirect_calls.py
@@ -33,7 +33,7 @@ def look_for_indirect(file):
for line in stdout.splitlines():
if line.startswith(" ") == False:
function = line
- result = re.search("(call|jmp).*\*", line)
+ result = re.search(r"(call|jmp).*\*", line)
if result != None:
# TODO: Perhaps use cxxfilt to demangle functions?
print(function)
diff --git a/llvm/utils/relative_lines.py b/llvm/utils/relative_lines.py
index 86b190d5ea182..d4c2d9d0429b9 100755
--- a/llvm/utils/relative_lines.py
+++ b/llvm/utils/relative_lines.py
@@ -17,8 +17,8 @@
USAGE = """Example usage:
find -type f clang/test/CodeCompletion | grep -v /Inputs/ | \\
xargs relative_lines.py --dry-run --verbose --near=100 \\
- --pattern='-code-completion-at[ =]%s:(\d+)' \\
- --pattern='requires fix-it: {(\d+):\d+-(\d+):\d+}'
+ --pattern='-code-completion-at[ =]%s:(\\d+)' \\
+ --pattern='requires fix-it: {(\\d+):\\d+-(\\d+):\\d+}'
"""
import argparse
diff --git a/llvm/utils/update_test_prefix.py b/llvm/utils/update_test_prefix.py
index 434dc84fa4fc0..d11f2ae43abc4 100755
--- a/llvm/utils/update_test_prefix.py
+++ b/llvm/utils/update_test_prefix.py
@@ -17,13 +17,13 @@ def remove_prefix(i, d=0):
p = r.group(1)
s = re.sub("=" + p + ",", "=", s)
s = re.sub("," + p + "([, \n])", "\\1", s)
- s = re.sub("\s+-?-check-prefix=" + p + "([ \n])", "\\1", s)
+ s = re.sub("\\s+-?-check-prefix=" + p + "([ \n])", "\\1", s)
else:
s = re.sub(
- "-?-check-prefixes=([\w-]+)(\Z|[ \t\n])", "--check-prefix=\\1\\2", s
+ "-?-check-prefixes=([\\w-]+)(\\Z|[ \t\n])", "--check-prefix=\\1\\2", s
)
t = re.search(
- "-?-check-(?:prefix|prefixes)=([^ ]+)\s+-?-check-(?:prefix|prefixes)=([^ ]+)",
+ r"-?-check-(?:prefix|prefixes)=([^ ]+)\s+-?-check-(?:prefix|prefixes)=([^ ]+)",
s,
)
while t:
@@ -31,10 +31,10 @@ def remove_prefix(i, d=0):
t.group(), "--check-prefixes=" + t.group(1) + "," + t.group(2), s
)
t = re.search(
- "-?-check-(?:prefix|prefixes)=([^ ]+)\s+-?-check-(?:prefix|prefixes)=([^ ]+)",
+ r"-?-check-(?:prefix|prefixes)=([^ ]+)\s+-?-check-(?:prefix|prefixes)=([^ ]+)",
s,
)
- s = re.sub("\s+-?-check-prefix=CHECK[ \t]*\n", "\n", s)
+ s = re.sub("\\s+-?-check-prefix=CHECK[ \t]*\n", "\n", s)
f.truncate(0)
f.seek(0)
f.write(s)
|
I guess the context here is the second bullet point in https://docs.python.org/3/whatsnew/3.12.html#other-language-changes. |
@@ -17,24 +17,24 @@ def remove_prefix(i, d=0): | |||
p = r.group(1) | |||
s = re.sub("=" + p + ",", "=", s) | |||
s = re.sub("," + p + "([, \n])", "\\1", s) | |||
s = re.sub("\s+-?-check-prefix=" + p + "([ \n])", "\\1", s) | |||
s = re.sub("\\s+-?-check-prefix=" + p + "([ \n])", "\\1", s) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer to use raw strings here as well. \n
is both a python and regex escape sequence, and there's no need to use the python one here, if it makes things less similar to other places.
The wmma.py change in the NVPTX backend looks good to me. @Artem-B , Please let us know what do you think. |
--pattern='-code-completion-at[ =]%s:(\d+)' \\ | ||
--pattern='requires fix-it: {(\d+):\d+-(\d+):\d+}' | ||
--pattern='-code-completion-at[ =]%s:(\\d+)' \\ | ||
--pattern='requires fix-it: {(\\d+):\\d+-(\\d+):\\d+}' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can use raw string here as well if you replace the existing \\
with \
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 to using r""
everywhere. LGTM otherwise.
I think it's OK to include the change in the NVPTX test -- it fits with the rest of the string fixes and has nothing to do with the test functionality itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still think we should avoid this weird mix of python and regex escapes.
@@ -17,24 +17,24 @@ def remove_prefix(i, d=0): | |||
p = r.group(1) | |||
s = re.sub("=" + p + ",", "=", s) | |||
s = re.sub("," + p + "([, \n])", "\\1", s) | |||
s = re.sub("\s+-?-check-prefix=" + p + "([ \n])", "\\1", s) | |||
s = re.sub("\\s+-?-check-prefix=" + p + "([ \n])", "\\1", s) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s = re.sub("\\s+-?-check-prefix=" + p + "([ \n])", "\\1", s) | |
s = re.sub(r"\s+-?-check-prefix=" + p + "([ \n])", "\\1", s) |
else: | ||
s = re.sub( | ||
"-?-check-prefixes=([\w-]+)(\Z|[ \t\n])", "--check-prefix=\\1\\2", s | ||
"-?-check-prefixes=([\\w-]+)(\\Z|[ \t\n])", "--check-prefix=\\1\\2", s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"-?-check-prefixes=([\\w-]+)(\\Z|[ \t\n])", "--check-prefix=\\1\\2", s | |
r"-?-check-prefixes=([\w-]+)(\Z|[ \t\n])", "--check-prefix=\\1\\2", s |
s, | ||
) | ||
s = re.sub("\s+-?-check-prefix=CHECK[ \t]*\n", "\n", s) | ||
s = re.sub("\\s+-?-check-prefix=CHECK[ \t]*\n", "\n", s) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s = re.sub("\\s+-?-check-prefix=CHECK[ \t]*\n", "\n", s) | |
s = re.sub(r"\s+-?-check-prefix=CHECK[ \t]*\n", "\n", s) |
Use of invalid escape sequences is deprecated and generates a SyntaxWarning in Python 3.12. Use raw strings to avoid this.