Skip to content

gh-98401: Reject invalid escape sequences in strings #98404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ a literal backslash, one might have to write ``'\\\\'`` as the pattern
string, because the regular expression must be ``\\``, and each
backslash must be expressed as ``\\`` inside a regular Python string
literal. Also, please note that any invalid escape sequences in Python's
usage of the backslash in string literals now generate a :exc:`DeprecationWarning`
and in the future this will become a :exc:`SyntaxError`. This behaviour
will happen even if it is a valid escape sequence for a regular expression.
usage of the backslash in string literals now generate a :exc:`SyntaxError`.
This behaviour will happen even if it is a valid escape sequence for a regular
expression.

The solution is to use Python's raw string notation for regular expression
patterns; backslashes are not handled in any special way in a string literal
Expand Down
7 changes: 4 additions & 3 deletions Doc/reference/lexical_analysis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -646,9 +646,10 @@ escape sequences only recognized in string literals fall into the category of
unrecognized escapes for bytes literals.

.. versionchanged:: 3.6
Unrecognized escape sequences produce a :exc:`DeprecationWarning`. In
a future Python version they will be a :exc:`SyntaxWarning` and
eventually a :exc:`SyntaxError`.
Unrecognized escape sequences produce a :exc:`DeprecationWarning`.

.. versionchanged:: 3.12
Unrecognized escape sequences produce a :exc:`SyntaxError`.

Even in a raw literal, quotes can be escaped with a backslash, but the
backslash remains in the result; for example, ``r"\""`` is a valid string
Expand Down
4 changes: 4 additions & 0 deletions Doc/whatsnew/3.12.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ New Features
Other Language Changes
======================

* A backslash-character pair that is not a valid escape sequence now generates
a :exc:`SyntaxError`.
(Contributed by Victor Stinner in :gh:`98401`.)

* :class:`types.MappingProxyType` instances are now hashable if the underlying
mapping is hashable.
(Contributed by Serhiy Storchaka in :gh:`87995`.)
Expand Down
20 changes: 10 additions & 10 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,15 +1197,15 @@ def test_escape(self):
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtvx':
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(b"\\" + b, b"\\" + b)
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(b"\\" + b.upper(), b"\\" + b.upper())
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(br"\8", b"\\8")
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(br"\9", b"\\9")
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(b"\\\xfa", b"\\\xfa")
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
Expand Down Expand Up @@ -2425,16 +2425,16 @@ def test_escape_decode(self):
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtuvx':
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(b"\\" + b, "\\" + chr(i))
if b.upper() not in b'UN':
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(b"\\" + b.upper(), "\\" + chr(i-32))
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(br"\8", "\\8")
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(br"\9", "\\9")
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
check(b"\\\xfa", "\\\xfa")
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
Expand Down
10 changes: 5 additions & 5 deletions Lib/test/test_codeop.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def test_warning(self):
(".*literal", SyntaxWarning),
(".*invalid", DeprecationWarning),
) as w:
compile_command(r"'\e' is 0")
compile_command(r"'\777' is 0")
self.assertEqual(len(w.warnings), 2)

# bpo-41520: check SyntaxWarning treated as an SyntaxError
Expand All @@ -324,21 +324,21 @@ def test_warning(self):
# Check DeprecationWarning treated as an SyntaxError
with warnings.catch_warnings(), self.assertRaises(SyntaxError):
warnings.simplefilter('error', DeprecationWarning)
compile_command(r"'\e'", symbol='exec')
compile_command(r"'\777'", symbol='exec')

def test_incomplete_warning(self):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertIncomplete("'\\e' + (")
self.assertIncomplete("'\\777' + (")
self.assertEqual(w, [])

def test_invalid_warning(self):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
self.assertInvalid("'\\e' 1")
self.assertInvalid("'\\777' 1")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].category, DeprecationWarning)
self.assertRegex(str(w[0].message), 'invalid escape sequence')
self.assertRegex(str(w[0].message), 'invalid octal escape sequence')
self.assertEqual(w[0].filename, '<input>')


Expand Down
6 changes: 3 additions & 3 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,9 +776,9 @@ def test_backslashes_in_string_part(self):
self.assertEqual(f'2\x203', '2 3')
self.assertEqual(f'\x203', ' 3')

with self.assertWarns(DeprecationWarning): # invalid escape sequence
value = eval(r"f'\{6*7}'")
self.assertEqual(value, '\\42')
with self.assertRaisesRegex(SyntaxError, 'invalid escape sequence'):
eval(r"f'\{6*7}'")

self.assertEqual(f'\\{6*7}', '\\42')
self.assertEqual(fr'\{6*7}', '\\42')

Expand Down
30 changes: 9 additions & 21 deletions Lib/test/test_string_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,23 +109,12 @@ def test_eval_str_invalid_escape(self):
for b in range(1, 128):
if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
continue
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', category=DeprecationWarning)
with self.assertRaises(SyntaxError) as cm:
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 1)

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
with self.assertRaises(SyntaxError) as cm:
eval("'''\n\\z'''")
exc = cm.exception
self.assertEqual(w, [])
exc = cm.exception
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 1)
Expand Down Expand Up @@ -186,16 +175,15 @@ def test_eval_bytes_invalid_escape(self):
for b in range(1, 128):
if b in b"""\n\r"'01234567\\abfnrtvx""":
continue
with self.assertWarns(DeprecationWarning):
with self.assertRaises(SyntaxError):
self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', category=DeprecationWarning)
with self.assertRaises(SyntaxError) as cm:
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 1)
exc = cm.exception
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 1)

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
A backslash-character pair that is not a valid escape sequence now generates
a :exc:`SyntaxError`. Patch by Victor Stinner.
11 changes: 4 additions & 7 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1192,13 +1192,10 @@ PyObject *PyBytes_DecodeEscape(const char *s,
}
}
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
c) < 0)
{
Py_DECREF(result);
return NULL;
}
PyErr_Format(PyExc_SyntaxError,
"invalid escape sequence '\\%c'", c);
Py_DECREF(result);
return NULL;
}
}
return result;
Expand Down
11 changes: 4 additions & 7 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5967,13 +5967,10 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
}
}
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
c) < 0)
{
Py_DECREF(result);
return NULL;
}
PyErr_Format(PyExc_SyntaxError,
"invalid escape sequence '\\%c'", c);
Py_DECREF(result);
return NULL;
}
}
return result;
Expand Down
54 changes: 29 additions & 25 deletions Parser/string_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,42 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
{
unsigned char c = *first_invalid_escape;
int octal = ('4' <= c && c <= '7');
PyObject *msg =
octal
? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
first_invalid_escape)
: PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
if (msg == NULL) {
return -1;
}
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
t->lineno, NULL, NULL) < 0) {
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
/* Replace the DeprecationWarning exception with a SyntaxError
to get a more accurate error report */
PyErr_Clear();

/* This is needed, in order for the SyntaxError to point to the token t,
since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
error location, if p->known_err_token is not set. */
p->known_err_token = t;
if (octal) {

if (octal) {
PyObject *msg = PyUnicode_FromFormat(
"invalid octal escape sequence '\\%.3s'",
first_invalid_escape);
if (msg == NULL) {
return -1;
}
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
t->lineno, NULL, NULL) < 0) {
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
/* Replace the DeprecationWarning exception with a SyntaxError
to get a more accurate error report */
PyErr_Clear();

/* This is needed, in order for the SyntaxError to point to the token t,
since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
error location, if p->known_err_token is not set. */
p->known_err_token = t;
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
first_invalid_escape);
}
else {
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
}
Py_DECREF(msg);
return -1;
}
Py_DECREF(msg);
return 0;
}
else {
/* This is needed, in order for the SyntaxError to point to the token t,
since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
error location, if p->known_err_token is not set. */
p->known_err_token = t;
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
return -1;
}
Py_DECREF(msg);
return 0;
}

static PyObject *
Expand Down