Skip to content

gh-103656: Transfer f-string buffers to parser to avoid use-after-free #103896

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions Grammar/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -881,14 +881,13 @@ fstring_middle[expr_ty]:
| fstring_replacement_field
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
fstring_replacement_field[expr_ty]:
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] '}' {
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
}
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
| invalid_replacement_field
fstring_conversion[expr_ty]:
fstring_conversion[ResultTokenWithMetadata*]:
| conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
fstring_full_format_spec[expr_ty]:
| ':' spec=fstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
fstring_full_format_spec[ResultTokenWithMetadata*]:
| colon=':' spec=fstring_format_spec* { _PyPegen_setup_full_format_spec(p, colon, (asdl_expr_seq *) spec, EXTRA) }
fstring_format_spec[expr_ty]:
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
| fstring_replacement_field
Expand Down
14 changes: 14 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,5 +1535,19 @@ def test_not_closing_quotes(self):
self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal",
['f"""', "f'''"])

def test_syntax_error_after_debug(self):
self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'",
[
"f'{1=}{;'",
"f'{1=}{+;'",
"f'{1=}{2}{;'",
"f'{1=}{3}{;'",
])
self.assertAllRaise(SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'",
[
"f'{1=}{1;'",
"f'{1=}{1;}'",
])

if __name__ == '__main__':
unittest.main()
92 changes: 51 additions & 41 deletions Parser/action_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,17 +965,43 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
return 0;
}

expr_ty
_PyPegen_check_fstring_conversion(Parser *p, Token* symbol, expr_ty conv) {
if (symbol->lineno != conv->lineno || symbol->end_col_offset != conv->col_offset) {
static ResultTokenWithMetadata *
result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
{
ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
if (res == NULL) {
return NULL;
}
res->metadata = metadata;
res->result = result;
return res;
}

ResultTokenWithMetadata *
_PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
{
if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
symbol, conv,
conv_token, conv,
"f-string: conversion type must come right after the exclamanation mark"
);
}
return conv;
return result_token_with_metadata(p, conv, conv_token->metadata);
}

ResultTokenWithMetadata *
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
int end_lineno, int end_col_offset, PyArena *arena)
{
if (!spec) {
return NULL;
}
expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, end_col_offset, p->arena);
if (!res) {
return NULL;
}
return result_token_with_metadata(p, res, colon->metadata);
}

const char *
_PyPegen_get_expr_name(expr_ty e)
Expand Down Expand Up @@ -1197,27 +1223,6 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq

// Fstring stuff

static expr_ty
decode_fstring_buffer(Parser *p, int lineno, int col_offset, int end_lineno,
int end_col_offset)
{
tokenizer_mode *tok_mode = &(p->tok->tok_mode_stack[p->tok->tok_mode_stack_index]);
assert(tok_mode->last_expr_buffer != NULL);
assert(tok_mode->last_expr_size >= 0 && tok_mode->last_expr_end >= 0);

PyObject *res = PyUnicode_DecodeUTF8(
tok_mode->last_expr_buffer,
tok_mode->last_expr_size - tok_mode->last_expr_end,
NULL
);
if (!res || _PyArena_AddPyObject(p->arena, res) < 0) {
Py_XDECREF(res);
return NULL;
}

return _PyAST_Constant(res, NULL, lineno, col_offset, end_lineno, end_col_offset, p->arena);
}

static expr_ty
_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant) {
assert(PyUnicode_CheckExact(constant->v.Constant.value));
Expand Down Expand Up @@ -1386,19 +1391,20 @@ expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
}

expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, expr_ty conversion,
expr_ty format, int lineno, int col_offset, int end_lineno, int end_col_offset,
PyArena *arena) {
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
int end_lineno, int end_col_offset, PyArena *arena) {
int conversion_val = -1;
if (conversion != NULL) {
assert(conversion->kind == Name_kind);
Py_UCS4 first = PyUnicode_READ_CHAR(conversion->v.Name.id, 0);
expr_ty conversion_expr = (expr_ty) conversion->result;
assert(conversion_expr->kind == Name_kind);
Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);

if (PyUnicode_GET_LENGTH(conversion->v.Name.id) > 1 ||
if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 ||
!(first == 's' || first == 'r' || first == 'a')) {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion,
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr,
"f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
conversion->v.Name.id);
conversion_expr->v.Name.id);
return NULL;
}

Expand All @@ -1410,30 +1416,34 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ex
}

expr_ty formatted_value = _PyAST_FormattedValue(
expression, conversion_val, format,
expression, conversion_val, format ? (expr_ty) format->result : NULL,
lineno, col_offset, end_lineno,
end_col_offset, arena
);

if (debug) {
/* Find the non whitespace token after the "=" */
int debug_end_line, debug_end_offset;
PyObject *debug_metadata;

if (conversion) {
debug_end_line = conversion->lineno;
debug_end_offset = conversion->col_offset;
debug_end_line = ((expr_ty) conversion->result)->lineno;
debug_end_offset = ((expr_ty) conversion->result)->col_offset;
debug_metadata = conversion->metadata;
}
else if (format) {
debug_end_line = format->lineno;
debug_end_offset = format->col_offset + 1; // HACK: ??
debug_end_line = ((expr_ty) format->result)->lineno;
debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
debug_metadata = format->metadata;
}
else {
debug_end_line = end_lineno;
debug_end_offset = end_col_offset;
debug_metadata = closing_brace->metadata;
}

expr_ty debug_text = decode_fstring_buffer(p, lineno, col_offset + 1,
debug_end_line, debug_end_offset - 1);
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
debug_end_offset - 1, p->arena);
if (!debug_text) {
return NULL;
}
Expand Down
24 changes: 12 additions & 12 deletions Parser/parser.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 17 additions & 3 deletions Parser/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,16 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
return -1;
}

parser_token->metadata = NULL;
if (new_token->metadata != NULL) {
if (_PyArena_AddPyObject(p->arena, new_token->metadata) < 0) {
Py_DECREF(parser_token->metadata);
return -1;
}
parser_token->metadata = new_token->metadata;
new_token->metadata = NULL;
}

parser_token->level = new_token->level;
parser_token->lineno = new_token->lineno;
parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset
Expand Down Expand Up @@ -198,6 +208,7 @@ int
_PyPegen_fill_token(Parser *p)
{
struct token new_token;
new_token.metadata = NULL;
int type = _PyTokenizer_Get(p->tok, &new_token);

// Record and skip '# type: ignore' comments
Expand All @@ -206,14 +217,14 @@ _PyPegen_fill_token(Parser *p)
char *tag = PyMem_Malloc(len + 1);
if (tag == NULL) {
PyErr_NoMemory();
return -1;
goto error;
}
strncpy(tag, new_token.start, len);
tag[len] = '\0';
// Ownership of tag passes to the growable array
if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
PyErr_NoMemory();
return -1;
goto error;
}
type = _PyTokenizer_Get(p->tok, &new_token);
}
Expand All @@ -234,11 +245,14 @@ _PyPegen_fill_token(Parser *p)

// Check if we are at the limit of the token array capacity and resize if needed
if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
return -1;
goto error;
}

Token *t = p->tokens[p->fill];
return initialize_token(p, t, &new_token, type);
error:
Py_XDECREF(new_token.metadata);
return -1;
}

#if defined(Py_DEBUG)
Expand Down
13 changes: 11 additions & 2 deletions Parser/pegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ typedef struct {
int level;
int lineno, col_offset, end_lineno, end_col_offset;
Memo *memo;
PyObject *metadata;
} Token;

typedef struct {
Expand Down Expand Up @@ -118,6 +119,11 @@ typedef struct {
int is_keyword;
} KeywordOrStarred;

typedef struct {
void *result;
PyObject *metadata;
} ResultTokenWithMetadata;

// Internal parser functions
#if defined(Py_DEBUG)
void _PyPegen_clear_memo_statistics(void);
Expand Down Expand Up @@ -310,7 +316,8 @@ StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
asdl_arg_seq *, asdl_seq *, StarEtc *);
arguments_ty _PyPegen_empty_arguments(Parser *);
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, expr_ty, expr_ty, int, int, int, int, PyArena *);
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, ResultTokenWithMetadata *, ResultTokenWithMetadata *, Token *,
int, int, int, int, PyArena *);
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
Expand All @@ -329,7 +336,9 @@ expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
expr_ty _PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
int, int, PyArena *);
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
Expand Down
Loading