Skip to content

Commit fbb26f0

Browse files
authored
gh-121404: enforce that codegen doesn't access compiler, and compiler doesn't use codegen macros (#123575)
1 parent 57c471a commit fbb26f0

File tree

1 file changed

+129
-85
lines changed

1 file changed

+129
-85
lines changed

Python/compile.c

+129-85
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ static PySTEntryObject *compiler_symtable_entry(struct compiler *c);
9595
#define SCOPE_TYPE(C) compiler_scope_type(C)
9696
#define QUALNAME(C) compiler_qualname(C)
9797
#define METADATA(C) compiler_unit_metadata(C)
98+
#define ARENA(C) compiler_arena(C)
9899

99100
typedef _Py_SourceLocation location;
100101
typedef struct _PyCfgBuilder cfg_builder;
@@ -131,6 +132,7 @@ static void compiler_exit_scope(struct compiler *c);
131132
static Py_ssize_t compiler_add_const(struct compiler *c, PyObject *o);
132133
static int compiler_maybe_add_static_attribute_to_class(struct compiler *c, expr_ty e);
133134
static _PyCompile_CodeUnitMetadata *compiler_unit_metadata(struct compiler *c);
135+
static PyArena *compiler_arena(struct compiler *c);
134136

135137
#define LOCATION(LNO, END_LNO, COL, END_COL) \
136138
((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
@@ -250,60 +252,6 @@ _PyCompile_EnsureArrayLargeEnough(int idx, void **array, int *alloc,
250252
}
251253

252254

253-
/* The following items change on entry and exit of code blocks.
254-
They must be saved and restored when returning to a block.
255-
*/
256-
struct compiler_unit {
257-
PySTEntryObject *u_ste;
258-
259-
int u_scope_type;
260-
261-
PyObject *u_private; /* for private name mangling */
262-
PyObject *u_static_attributes; /* for class: attributes accessed via self.X */
263-
PyObject *u_deferred_annotations; /* AnnAssign nodes deferred to the end of compilation */
264-
265-
instr_sequence *u_instr_sequence; /* codegen output */
266-
267-
int u_nfblocks;
268-
int u_in_inlined_comp;
269-
270-
struct fblockinfo u_fblock[CO_MAXBLOCKS];
271-
272-
_PyCompile_CodeUnitMetadata u_metadata;
273-
};
274-
275-
/* This struct captures the global state of a compilation.
276-
277-
The u pointer points to the current compilation unit, while units
278-
for enclosing blocks are stored in c_stack. The u and c_stack are
279-
managed by compiler_enter_scope() and compiler_exit_scope().
280-
281-
Note that we don't track recursion levels during compilation - the
282-
task of detecting and rejecting excessive levels of nesting is
283-
handled by the symbol analysis pass.
284-
285-
*/
286-
287-
struct compiler {
288-
PyObject *c_filename;
289-
struct symtable *c_st;
290-
_PyFutureFeatures c_future; /* module's __future__ */
291-
PyCompilerFlags c_flags;
292-
293-
int c_optimize; /* optimization level */
294-
int c_interactive; /* true if in interactive mode */
295-
PyObject *c_const_cache; /* Python dict holding all constants,
296-
including names tuple */
297-
struct compiler_unit *u; /* compiler state for current block */
298-
PyObject *c_stack; /* Python list holding compiler_unit ptrs */
299-
PyArena *c_arena; /* pointer to memory allocation arena */
300-
301-
bool c_save_nested_seqs; /* if true, construct recursive instruction sequences
302-
* (including instructions for nested code objects)
303-
*/
304-
};
305-
306-
307255
typedef struct {
308256
// A list of strings corresponding to name captures. It is used to track:
309257
// - Repeated name assignments in the same pattern.
@@ -917,6 +865,21 @@ codegen_unwind_fblock_stack(struct compiler *c, location *ploc,
917865
return SUCCESS;
918866
}
919867

868+
static int
869+
codegen_enter_scope(struct compiler *c, identifier name, int scope_type,
870+
void *key, int lineno, PyObject *private,
871+
_PyCompile_CodeUnitMetadata *umd)
872+
{
873+
RETURN_IF_ERROR(
874+
compiler_enter_scope(c, name, scope_type, key, lineno, private, umd));
875+
location loc = LOCATION(lineno, lineno, 0, 0);
876+
if (scope_type == COMPILER_SCOPE_MODULE) {
877+
loc.lineno = 0;
878+
}
879+
ADDOP_I(c, loc, RESUME, RESUME_AT_FUNC_START);
880+
return SUCCESS;
881+
}
882+
920883
static int
921884
codegen_setup_annotations_scope(struct compiler *c, location loc,
922885
void *key, PyObject *name)
@@ -925,8 +888,8 @@ codegen_setup_annotations_scope(struct compiler *c, location loc,
925888
.u_posonlyargcount = 1,
926889
};
927890
RETURN_IF_ERROR(
928-
compiler_enter_scope(c, name, COMPILER_SCOPE_ANNOTATIONS,
929-
key, loc.lineno, NULL, &umd));
891+
codegen_enter_scope(c, name, COMPILER_SCOPE_ANNOTATIONS,
892+
key, loc.lineno, NULL, &umd));
930893

931894
// if .format != 1: raise NotImplementedError
932895
_Py_DECLARE_STR(format, ".format");
@@ -1006,6 +969,14 @@ codegen_process_deferred_annotations(struct compiler *c, location loc)
1006969
return SUCCESS;
1007970
}
1008971

972+
/* Compile an expression */
973+
static int
974+
codegen_expression(struct compiler *c, expr_ty e)
975+
{
976+
VISIT(c, expr, e);
977+
return SUCCESS;
978+
}
979+
1009980
/* Compile a sequence of statements, checking for a docstring
1010981
and for annotations. */
1011982

@@ -1073,8 +1044,8 @@ codegen_enter_anonymous_scope(struct compiler* c, mod_ty mod)
10731044
{
10741045
_Py_DECLARE_STR(anon_module, "<module>");
10751046
RETURN_IF_ERROR(
1076-
compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
1077-
mod, 1, NULL, NULL));
1047+
codegen_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
1048+
mod, 1, NULL, NULL));
10781049
return SUCCESS;
10791050
}
10801051

@@ -1504,7 +1475,7 @@ codegen_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t fu
15041475
.u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs),
15051476
};
15061477
RETURN_IF_ERROR(
1507-
compiler_enter_scope(c, name, scope_type, (void *)s, firstlineno, NULL, &umd));
1478+
codegen_enter_scope(c, name, scope_type, (void *)s, firstlineno, NULL, &umd));
15081479

15091480
Py_ssize_t first_instr = 0;
15101481
PyObject *docstring = _PyAST_GetDocString(body);
@@ -1617,8 +1588,8 @@ codegen_function(struct compiler *c, stmt_ty s, int is_async)
16171588
_PyCompile_CodeUnitMetadata umd = {
16181589
.u_argcount = num_typeparam_args,
16191590
};
1620-
int ret = compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS,
1621-
(void *)type_params, firstlineno, NULL, &umd);
1591+
int ret = codegen_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS,
1592+
(void *)type_params, firstlineno, NULL, &umd);
16221593
Py_DECREF(type_params_name);
16231594
RETURN_IF_ERROR(ret);
16241595
RETURN_IF_ERROR_IN_SCOPE(c, codegen_type_params(c, type_params));
@@ -1696,8 +1667,8 @@ codegen_class_body(struct compiler *c, stmt_ty s, int firstlineno)
16961667

16971668
/* 1. compile the class body into a code object */
16981669
RETURN_IF_ERROR(
1699-
compiler_enter_scope(c, s->v.ClassDef.name, COMPILER_SCOPE_CLASS,
1700-
(void *)s, firstlineno, s->v.ClassDef.name, NULL));
1670+
codegen_enter_scope(c, s->v.ClassDef.name, COMPILER_SCOPE_CLASS,
1671+
(void *)s, firstlineno, s->v.ClassDef.name, NULL));
17011672

17021673
location loc = LOCATION(firstlineno, firstlineno, 0, 0);
17031674
/* load (global) __name__ ... */
@@ -1805,8 +1776,8 @@ codegen_class(struct compiler *c, stmt_ty s)
18051776
if (!type_params_name) {
18061777
return ERROR;
18071778
}
1808-
int ret = compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS,
1809-
(void *)type_params, firstlineno, s->v.ClassDef.name, NULL);
1779+
int ret = codegen_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS,
1780+
(void *)type_params, firstlineno, s->v.ClassDef.name, NULL);
18101781
Py_DECREF(type_params_name);
18111782
RETURN_IF_ERROR(ret);
18121783
RETURN_IF_ERROR_IN_SCOPE(c, codegen_type_params(c, type_params));
@@ -1833,7 +1804,7 @@ codegen_class(struct compiler *c, stmt_ty s)
18331804

18341805
Py_ssize_t original_len = asdl_seq_LEN(s->v.ClassDef.bases);
18351806
asdl_expr_seq *bases = _Py_asdl_expr_seq_new(
1836-
original_len + 1, c->c_arena);
1807+
original_len + 1, ARENA(c));
18371808
if (bases == NULL) {
18381809
compiler_exit_scope(c);
18391810
return ERROR;
@@ -1843,7 +1814,7 @@ codegen_class(struct compiler *c, stmt_ty s)
18431814
}
18441815
expr_ty name_node = _PyAST_Name(
18451816
&_Py_STR(generic_base), Load,
1846-
loc.lineno, loc.col_offset, loc.end_lineno, loc.end_col_offset, c->c_arena
1817+
loc.lineno, loc.col_offset, loc.end_lineno, loc.end_col_offset, ARENA(c)
18471818
);
18481819
if (name_node == NULL) {
18491820
compiler_exit_scope(c);
@@ -1920,8 +1891,8 @@ codegen_typealias(struct compiler *c, stmt_ty s)
19201891
if (!type_params_name) {
19211892
return ERROR;
19221893
}
1923-
int ret = compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS,
1924-
(void *)type_params, loc.lineno, NULL, NULL);
1894+
int ret = codegen_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS,
1895+
(void *)type_params, loc.lineno, NULL, NULL);
19251896
Py_DECREF(type_params_name);
19261897
RETURN_IF_ERROR(ret);
19271898
ADDOP_LOAD_CONST_IN_SCOPE(c, loc, name);
@@ -2183,8 +2154,8 @@ codegen_lambda(struct compiler *c, expr_ty e)
21832154
};
21842155
_Py_DECLARE_STR(anon_lambda, "<lambda>");
21852156
RETURN_IF_ERROR(
2186-
compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA,
2187-
(void *)e, e->lineno, NULL, &umd));
2157+
codegen_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA,
2158+
(void *)e, e->lineno, NULL, &umd));
21882159

21892160
/* Make None the first constant, so the lambda can't have a
21902161
docstring. */
@@ -4765,7 +4736,6 @@ static int
47654736
pop_inlined_comprehension_state(struct compiler *c, location loc,
47664737
inlined_comprehension_state *state)
47674738
{
4768-
c->u->u_in_inlined_comp--;
47694739
RETURN_IF_ERROR(codegen_pop_inlined_comprehension_locals(c, loc, state));
47704740
RETURN_IF_ERROR(compiler_revert_inlined_comprehension_scopes(c, loc, state));
47714741
return SUCCESS;
@@ -4819,8 +4789,8 @@ codegen_comprehension(struct compiler *c, expr_ty e, int type,
48194789
_PyCompile_CodeUnitMetadata umd = {
48204790
.u_argcount = 1,
48214791
};
4822-
if (compiler_enter_scope(c, name, COMPILER_SCOPE_COMPREHENSION,
4823-
(void *)e, e->lineno, NULL, &umd) < 0) {
4792+
if (codegen_enter_scope(c, name, COMPILER_SCOPE_COMPREHENSION,
4793+
(void *)e, e->lineno, NULL, &umd) < 0) {
48244794
goto error;
48254795
}
48264796
}
@@ -6442,8 +6412,84 @@ codegen_add_return_at_end(struct compiler *c, int addNone)
64426412
return SUCCESS;
64436413
}
64446414

6415+
#undef ADDOP_I
6416+
#undef ADDOP_I_IN_SCOPE
6417+
#undef ADDOP
6418+
#undef ADDOP_IN_SCOPE
6419+
#undef ADDOP_LOAD_CONST
6420+
#undef ADDOP_LOAD_CONST_IN_SCOPE
6421+
#undef ADDOP_LOAD_CONST_NEW
6422+
#undef ADDOP_N
6423+
#undef ADDOP_N_IN_SCOPE
6424+
#undef ADDOP_NAME
6425+
#undef ADDOP_JUMP
6426+
#undef ADDOP_COMPARE
6427+
#undef ADDOP_BINARY
6428+
#undef ADDOP_INPLACE
6429+
#undef ADD_YIELD_FROM
6430+
#undef POP_EXCEPT_AND_RERAISE
6431+
#undef ADDOP_YIELD
6432+
#undef VISIT
6433+
#undef VISIT_IN_SCOPE
6434+
#undef VISIT_SEQ
6435+
#undef VISIT_SEQ_IN_SCOPE
6436+
64456437
/*** end of CODEGEN, start of compiler implementation ***/
64466438

6439+
/* The following items change on entry and exit of code blocks.
6440+
They must be saved and restored when returning to a block.
6441+
*/
6442+
struct compiler_unit {
6443+
PySTEntryObject *u_ste;
6444+
6445+
int u_scope_type;
6446+
6447+
PyObject *u_private; /* for private name mangling */
6448+
PyObject *u_static_attributes; /* for class: attributes accessed via self.X */
6449+
PyObject *u_deferred_annotations; /* AnnAssign nodes deferred to the end of compilation */
6450+
6451+
instr_sequence *u_instr_sequence; /* codegen output */
6452+
6453+
int u_nfblocks;
6454+
int u_in_inlined_comp;
6455+
6456+
struct fblockinfo u_fblock[CO_MAXBLOCKS];
6457+
6458+
_PyCompile_CodeUnitMetadata u_metadata;
6459+
};
6460+
6461+
/* This struct captures the global state of a compilation.
6462+
6463+
The u pointer points to the current compilation unit, while units
6464+
for enclosing blocks are stored in c_stack. The u and c_stack are
6465+
managed by compiler_enter_scope() and compiler_exit_scope().
6466+
6467+
Note that we don't track recursion levels during compilation - the
6468+
task of detecting and rejecting excessive levels of nesting is
6469+
handled by the symbol analysis pass.
6470+
6471+
*/
6472+
6473+
struct compiler {
6474+
PyObject *c_filename;
6475+
struct symtable *c_st;
6476+
_PyFutureFeatures c_future; /* module's __future__ */
6477+
PyCompilerFlags c_flags;
6478+
6479+
int c_optimize; /* optimization level */
6480+
int c_interactive; /* true if in interactive mode */
6481+
PyObject *c_const_cache; /* Python dict holding all constants,
6482+
including names tuple */
6483+
struct compiler_unit *u; /* compiler state for current block */
6484+
PyObject *c_stack; /* Python list holding compiler_unit ptrs */
6485+
PyArena *c_arena; /* pointer to memory allocation arena */
6486+
6487+
bool c_save_nested_seqs; /* if true, construct recursive instruction sequences
6488+
* (including instructions for nested code objects)
6489+
*/
6490+
};
6491+
6492+
64476493
static int
64486494
compiler_setup(struct compiler *c, mod_ty mod, PyObject *filename,
64496495
PyCompilerFlags *flags, int optimize, PyArena *arena)
@@ -6801,10 +6847,7 @@ compiler_enter_scope(struct compiler *c, identifier name, int scope_type,
68016847
void *key, int lineno, PyObject *private,
68026848
_PyCompile_CodeUnitMetadata *umd)
68036849
{
6804-
location loc = LOCATION(lineno, lineno, 0, 0);
6805-
68066850
struct compiler_unit *u;
6807-
68086851
u = (struct compiler_unit *)PyMem_Calloc(1, sizeof(struct compiler_unit));
68096852
if (!u) {
68106853
PyErr_NoMemory();
@@ -6918,15 +6961,9 @@ compiler_enter_scope(struct compiler *c, identifier name, int scope_type,
69186961
u->u_private = Py_XNewRef(private);
69196962

69206963
c->u = u;
6921-
6922-
if (u->u_scope_type == COMPILER_SCOPE_MODULE) {
6923-
loc.lineno = 0;
6924-
}
6925-
else {
6964+
if (scope_type != COMPILER_SCOPE_MODULE) {
69266965
RETURN_IF_ERROR(compiler_set_qualname(c));
69276966
}
6928-
ADDOP_I(c, loc, RESUME, RESUME_AT_FUNC_START);
6929-
69306967
return SUCCESS;
69316968
}
69326969

@@ -7032,7 +7069,7 @@ compiler_codegen(struct compiler *c, mod_ty mod)
70327069
break;
70337070
}
70347071
case Expression_kind: {
7035-
VISIT(c, expr, mod->v.Expression.body);
7072+
RETURN_IF_ERROR(codegen_expression(c, mod->v.Expression.body));
70367073
break;
70377074
}
70387075
default: {
@@ -7283,6 +7320,7 @@ static int
72837320
compiler_revert_inlined_comprehension_scopes(struct compiler *c, location loc,
72847321
inlined_comprehension_state *state)
72857322
{
7323+
c->u->u_in_inlined_comp--;
72867324
if (state->temp_symbols) {
72877325
PyObject *k, *v;
72887326
Py_ssize_t pos = 0;
@@ -7478,6 +7516,12 @@ compiler_unit_metadata(struct compiler *c)
74787516
return &c->u->u_metadata;
74797517
}
74807518

7519+
static PyArena *
7520+
compiler_arena(struct compiler *c)
7521+
{
7522+
return c->c_arena;
7523+
}
7524+
74817525
#ifndef NDEBUG
74827526
static int
74837527
compiler_is_top_level_await(struct compiler *c)

0 commit comments

Comments
 (0)