1212// file (like "_PyPegen_raise_syntax_error").
1313
1414static int
15- warn_invalid_escape_sequence (Parser * p , unsigned char first_invalid_escape_char )
15+ warn_invalid_escape_sequence (Parser * p , unsigned char first_invalid_escape_char , Token * t )
1616{
1717 PyObject * msg =
1818 PyUnicode_FromFormat ("invalid escape sequence \\%c" , first_invalid_escape_char );
1919 if (msg == NULL ) {
2020 return -1 ;
2121 }
2222 if (PyErr_WarnExplicitObject (PyExc_DeprecationWarning , msg , p -> tok -> filename ,
23- p -> tok -> lineno , NULL , NULL ) < 0 ) {
23+ t -> lineno , NULL , NULL ) < 0 ) {
2424 if (PyErr_ExceptionMatches (PyExc_DeprecationWarning )) {
2525 /* Replace the DeprecationWarning exception with a SyntaxError
2626 to get a more accurate error report */
2727 PyErr_Clear ();
28+
29+ /* This is needed, in order for the SyntaxError to point to the token t,
30+ since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
31+ error location, if p->known_err_token is not set. */
32+ p -> known_err_token = t ;
2833 RAISE_SYNTAX_ERROR ("invalid escape sequence \\%c" , first_invalid_escape_char );
2934 }
3035 Py_DECREF (msg );
@@ -47,7 +52,7 @@ decode_utf8(const char **sPtr, const char *end)
4752}
4853
4954static PyObject *
50- decode_unicode_with_escapes (Parser * parser , const char * s , size_t len )
55+ decode_unicode_with_escapes (Parser * parser , const char * s , size_t len , Token * t )
5156{
5257 PyObject * v , * u ;
5358 char * buf ;
@@ -110,7 +115,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len)
110115 v = _PyUnicode_DecodeUnicodeEscape (s , len , NULL , & first_invalid_escape );
111116
112117 if (v != NULL && first_invalid_escape != NULL ) {
113- if (warn_invalid_escape_sequence (parser , * first_invalid_escape ) < 0 ) {
118+ if (warn_invalid_escape_sequence (parser , * first_invalid_escape , t ) < 0 ) {
114119 /* We have not decref u before because first_invalid_escape points
115120 inside u. */
116121 Py_XDECREF (u );
@@ -123,7 +128,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len)
123128}
124129
125130static PyObject *
126- decode_bytes_with_escapes (Parser * p , const char * s , Py_ssize_t len )
131+ decode_bytes_with_escapes (Parser * p , const char * s , Py_ssize_t len , Token * t )
127132{
128133 const char * first_invalid_escape ;
129134 PyObject * result = _PyBytes_DecodeEscape (s , len , NULL , & first_invalid_escape );
@@ -132,7 +137,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len)
132137 }
133138
134139 if (first_invalid_escape != NULL ) {
135- if (warn_invalid_escape_sequence (p , * first_invalid_escape ) < 0 ) {
140+ if (warn_invalid_escape_sequence (p , * first_invalid_escape , t ) < 0 ) {
136141 Py_DECREF (result );
137142 return NULL ;
138143 }
@@ -146,9 +151,14 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len)
146151 If the string is an f-string, set *fstr and *fstrlen to the unparsed
147152 string object. Return 0 if no errors occurred. */
148153int
149- _PyPegen_parsestr (Parser * p , const char * s , int * bytesmode , int * rawmode , PyObject * * result ,
150- const char * * fstr , Py_ssize_t * fstrlen )
154+ _PyPegen_parsestr (Parser * p , int * bytesmode , int * rawmode , PyObject * * result ,
155+ const char * * fstr , Py_ssize_t * fstrlen , Token * t )
151156{
157+ const char * s = PyBytes_AsString (t -> bytes );
158+ if (s == NULL ) {
159+ return -1 ;
160+ }
161+
152162 size_t len ;
153163 int quote = Py_CHARMASK (* s );
154164 int fmode = 0 ;
@@ -245,15 +255,15 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje
245255 * result = PyBytes_FromStringAndSize (s , len );
246256 }
247257 else {
248- * result = decode_bytes_with_escapes (p , s , len );
258+ * result = decode_bytes_with_escapes (p , s , len , t );
249259 }
250260 }
251261 else {
252262 if (* rawmode ) {
253263 * result = PyUnicode_DecodeUTF8Stateful (s , len , NULL , NULL );
254264 }
255265 else {
256- * result = decode_unicode_with_escapes (p , s , len );
266+ * result = decode_unicode_with_escapes (p , s , len , t );
257267 }
258268 }
259269 return * result == NULL ? -1 : 0 ;
@@ -637,7 +647,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
637647*/
638648static int
639649fstring_find_literal (Parser * p , const char * * str , const char * end , int raw ,
640- PyObject * * literal , int recurse_lvl )
650+ PyObject * * literal , int recurse_lvl , Token * t )
641651{
642652 /* Get any literal string. It ends when we hit an un-doubled left
643653 brace (which isn't part of a unicode name escape such as
@@ -660,7 +670,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
660670 }
661671 break ;
662672 }
663- if (ch == '{' && warn_invalid_escape_sequence (p , ch ) < 0 ) {
673+ if (ch == '{' && warn_invalid_escape_sequence (p , ch , t ) < 0 ) {
664674 return -1 ;
665675 }
666676 }
@@ -704,7 +714,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
704714 NULL , NULL );
705715 else
706716 * literal = decode_unicode_with_escapes (p , literal_start ,
707- s - literal_start );
717+ s - literal_start , t );
708718 if (!* literal )
709719 return -1 ;
710720 }
@@ -1041,7 +1051,7 @@ fstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int
10411051 assert (* literal == NULL && * expression == NULL );
10421052
10431053 /* Get any literal string. */
1044- result = fstring_find_literal (p , str , end , raw , literal , recurse_lvl );
1054+ result = fstring_find_literal (p , str , end , raw , literal , recurse_lvl , t );
10451055 if (result < 0 )
10461056 goto error ;
10471057
0 commit comments