From 48f024b6d72daa9e9481f67b4c50f86795333f4c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 2 Sep 2024 02:08:45 +0800 Subject: [PATCH 01/18] skeleton --- Include/internal/pycore_optimizer.h | 1 + Makefile.pre.in | 11 +- Python/optimizer_analysis.c | 100 + Python/optimizer_symbols.c | 3 + Python/partial_evaluator_bytecodes.c | 283 +++ Python/partial_evaluator_cases.c.h | 1895 +++++++++++++++++ .../partial_evaluator_generator.py | 234 ++ 7 files changed, 2526 insertions(+), 1 deletion(-) create mode 100644 Python/partial_evaluator_bytecodes.c create mode 100644 Python/partial_evaluator_cases.c.h create mode 100644 Tools/cases_generator/partial_evaluator_generator.py diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 19e54bf122a8bb..2d648d79655954 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -152,6 +152,7 @@ struct _Py_UopsSymbol { PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) unsigned int type_version; // currently stores type version + bool is_static; // used for binding-time analysis }; #define UOP_FORMAT_TARGET 0 diff --git a/Makefile.pre.in b/Makefile.pre.in index 46733d0cb44f72..4f214eed60059e 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1951,7 +1951,7 @@ Objects/mimalloc/page.o: $(srcdir)/Objects/mimalloc/page-queue.c regen-cases: \ regen-opcode-ids regen-opcode-targets regen-uop-ids regen-opcode-metadata-py \ regen-generated-cases regen-executor-cases regen-optimizer-cases \ - regen-opcode-metadata regen-uop-metadata + regen-partial-evaluator-cases regen-opcode-metadata regen-uop-metadata .PHONY: regen-opcode-ids regen-opcode-ids: @@ -1997,6 +1997,15 @@ regen-optimizer-cases: $(srcdir)/Python/bytecodes.c $(UPDATE_FILE) $(srcdir)/Python/optimizer_cases.c.h $(srcdir)/Python/optimizer_cases.c.h.new +.PHONY: regen-partial-evaluator-cases +regen-partial-evaluator-cases: + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/partial_evaluator_generator.py \ + -o $(srcdir)/Python/partial_evaluator_cases.c.h.new \ + $(srcdir)/Python/partial_evaluator_bytecodes.c \ + $(srcdir)/Python/bytecodes.c + $(UPDATE_FILE) $(srcdir)/Python/partial_evaluator_cases.c.h $(srcdir)/Python/partial_evaluator_cases.c.h.new + + .PHONY: regen-opcode-metadata regen-opcode-metadata: $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/opcode_metadata_generator.py \ diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index f7adb44c9e09ef..caca5c397143c6 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -486,6 +486,106 @@ optimize_uops( } +/* 1 for success, 0 for not ready, cannot error at the moment. */ +static int +partial_evaluate_uops( + PyCodeObject *co, + _PyUOpInstruction *trace, + int trace_len, + int curr_stacklen, + _PyBloomFilter *dependencies +) +{ + + _Py_UOpsContext context; + _Py_UOpsContext *ctx = &context; + uint32_t opcode = UINT16_MAX; + int curr_space = 0; + int max_space = 0; + _PyUOpInstruction *first_valid_check_stack = NULL; + _PyUOpInstruction *corresponding_check_stack = NULL; + + _Py_uop_abstractcontext_init(ctx); + _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0); + if (frame == NULL) { + return -1; + } + ctx->curr_frame_depth++; + ctx->frame = frame; + ctx->done = false; + ctx->out_of_space = false; + ctx->contradiction = false; + + _PyUOpInstruction *this_instr = NULL; + for (int i = 0; !ctx->done; i++) { + assert(i < trace_len); + this_instr = &trace[i]; + + int oparg = this_instr->oparg; + opcode = this_instr->opcode; + _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; + +#ifdef Py_DEBUG + if (get_lltrace() >= 3) { + printf("%4d pe: ", (int)(this_instr - trace)); + _PyUOpPrint(this_instr); + printf(" "); + } +#endif + + switch (opcode) { + +#include "partial_evaluator_cases.c.h" + + default: + DPRINTF(1, "\nUnknown opcode in pe's abstract interpreter\n"); + Py_UNREACHABLE(); + } + assert(ctx->frame != NULL); + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); + } + if (ctx->out_of_space) { + DPRINTF(3, "\n"); + DPRINTF(1, "Out of space in pe's abstract interpreter\n"); + } + if (ctx->contradiction) { + // Attempted to push a "bottom" (contradiction) symbol onto the stack. + // This means that the abstract interpreter has hit unreachable code. + // We *could* generate an _EXIT_TRACE or _FATAL_ERROR here, but hitting + // bottom indicates type instability, so we are probably better off + // retrying later. + DPRINTF(3, "\n"); + DPRINTF(1, "Hit bottom in pe's abstract interpreter\n"); + _Py_uop_abstractcontext_fini(ctx); + return 0; + } + + /* Either reached the end or cannot optimize further, but there + * would be no benefit in retrying later */ + _Py_uop_abstractcontext_fini(ctx); + if (first_valid_check_stack != NULL) { + assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE); + assert(max_space > 0); + assert(max_space <= INT_MAX); + assert(max_space <= INT32_MAX); + first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND; + first_valid_check_stack->operand = max_space; + } + return trace_len; + + error: + DPRINTF(3, "\n"); + DPRINTF(1, "Encountered error in pe's abstract interpreter\n"); + if (opcode <= MAX_UOP_ID) { + OPT_ERROR_IN_OPCODE(opcode); + } + _Py_uop_abstractcontext_fini(ctx); + return -1; + +} + static int remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 40cbf95e3d6d39..9b0be091c5666d 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -77,6 +77,7 @@ sym_new(_Py_UOpsContext *ctx) self->typ = NULL; self->const_val = NULL; self->type_version = 0; + self->is_static = false; return self; } @@ -187,6 +188,7 @@ _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const sym->typ = typ; sym->const_val = Py_NewRef(const_val); } + sym->is_static = true; } void @@ -196,6 +198,7 @@ _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym) sym_set_bottom(ctx, sym); } sym_set_flag(sym, IS_NULL); + sym->is_static = true; } void diff --git a/Python/partial_evaluator_bytecodes.c b/Python/partial_evaluator_bytecodes.c new file mode 100644 index 00000000000000..30c20537afcdce --- /dev/null +++ b/Python/partial_evaluator_bytecodes.c @@ -0,0 +1,283 @@ +#include "Python.h" +#include "pycore_optimizer.h" +#include "pycore_uops.h" +#include "pycore_uop_ids.h" +#include "internal/pycore_moduleobject.h" + +#define op(name, ...) /* NAME is ignored */ + +typedef struct _Py_UopsSymbol _Py_UopsSymbol; +typedef struct _Py_UOpsContext _Py_UOpsContext; +typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; + +/* Shortened forms for convenience */ +#define sym_is_not_null _Py_uop_sym_is_not_null +#define sym_is_const _Py_uop_sym_is_const +#define sym_get_const _Py_uop_sym_get_const +#define sym_new_unknown _Py_uop_sym_new_unknown +#define sym_new_not_null _Py_uop_sym_new_not_null +#define sym_new_type _Py_uop_sym_new_type +#define sym_is_null _Py_uop_sym_is_null +#define sym_new_const _Py_uop_sym_new_const +#define sym_new_null _Py_uop_sym_new_null +#define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version +#define sym_get_type _Py_uop_sym_get_type +#define sym_has_type _Py_uop_sym_has_type +#define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) +#define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) +#define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) +#define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) +#define sym_is_bottom _Py_uop_sym_is_bottom +#define frame_new _Py_uop_frame_new +#define frame_pop _Py_uop_frame_pop + +extern int +optimize_to_bool( + _PyUOpInstruction *this_instr, + _Py_UOpsContext *ctx, + _Py_UopsSymbol *value, + _Py_UopsSymbol **result_ptr); + +extern void +eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit); + +extern PyCodeObject *get_code(_PyUOpInstruction *op); + +static int +dummy_func(void) { + + PyCodeObject *co; + int oparg; + _Py_UopsSymbol *flag; + _Py_UopsSymbol *left; + _Py_UopsSymbol *right; + _Py_UopsSymbol *value; + _Py_UopsSymbol *res; + _Py_UopsSymbol *iter; + _Py_UopsSymbol *top; + _Py_UopsSymbol *bottom; + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractFrame *new_frame; + _Py_UOpsContext *ctx; + _PyUOpInstruction *this_instr; + +// BEGIN BYTECODES // + + op(_LOAD_FAST_CHECK, (-- value)) { + value = GETLOCAL(oparg); + // We guarantee this will error - just bail and don't optimize it. + if (sym_is_null(value)) { + ctx->done = true; + } + } + + op(_LOAD_FAST, (-- value)) { + value = GETLOCAL(oparg); + } + + op(_LOAD_FAST_AND_CLEAR, (-- value)) { + value = GETLOCAL(oparg); + _Py_UopsSymbol *temp = sym_new_null(ctx); + GETLOCAL(oparg) = temp; + } + + op(_STORE_FAST, (value --)) { + GETLOCAL(oparg) = value; + } + + op(_PUSH_NULL, (-- res)) { + res = sym_new_null(ctx); + } + + op(_LOAD_CONST, (-- value)) { + // Should never happen. This should be run after the specializer pass. + Py_UNREACHABLE(); + } + + op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + value = sym_new_const(ctx, ptr); + } + + op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + value = sym_new_const(ctx, ptr); + } + + op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + value = sym_new_const(ctx, ptr); + null = sym_new_null(ctx); + } + + op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + value = sym_new_const(ctx, ptr); + null = sym_new_null(ctx); + } + + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { + assert(oparg > 0); + top = bottom; + } + + op(_SWAP, (bottom, unused[oparg-2], top -- + top, unused[oparg-2], bottom)) { + } + + op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + int argcount = oparg; + + (void)callable; + + PyCodeObject *co = NULL; + assert((this_instr + 2)->opcode == _PUSH_FRAME); + uint64_t push_operand = (this_instr + 2)->operand; + if (push_operand & 1) { + co = (PyCodeObject *)(push_operand & ~1); + DPRINTF(3, "code=%p ", co); + assert(PyCode_Check(co)); + } + else { + PyFunctionObject *func = (PyFunctionObject *)push_operand; + DPRINTF(3, "func=%p ", func); + if (func == NULL) { + DPRINTF(3, "\n"); + DPRINTF(1, "Missing function\n"); + ctx->done = true; + break; + } + co = (PyCodeObject *)func->func_code; + DPRINTF(3, "code=%p ", co); + } + + assert(self_or_null != NULL); + assert(args != NULL); + new_frame = frame_new(ctx, co, 0, NULL, 0); + } + + op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + /* The _Py_UOpsAbstractFrame design assumes that we can copy arguments across directly */ + (void)callable; + (void)self_or_null; + (void)args; + new_frame = NULL; + ctx->done = true; + } + + op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _Py_UOpsAbstractFrame *)) { + (void)callable; + (void)self_or_null; + (void)args; + (void)kwnames; + new_frame = NULL; + ctx->done = true; + } + + op(_CREATE_INIT_FRAME, (self, init, args[oparg] -- init_frame: _Py_UOpsAbstractFrame *)) { + (void)self; + (void)init; + (void)args; + init_frame = NULL; + ctx->done = true; + } + + op(_RETURN_VALUE, (retval -- res)) { + SYNC_SP(); + ctx->frame->stack_pointer = stack_pointer; + frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + res = retval; + + /* Stack space handling */ + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + + co = get_code(this_instr); + if (co == NULL) { + // might be impossible, but bailing is still safe + ctx->done = true; + } + } + + op(_RETURN_GENERATOR, ( -- res)) { + SYNC_SP(); + ctx->frame->stack_pointer = stack_pointer; + frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + res = sym_new_unknown(ctx); + + /* Stack space handling */ + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + + co = get_code(this_instr); + if (co == NULL) { + // might be impossible, but bailing is still safe + ctx->done = true; + } + } + + op(_YIELD_VALUE, (unused -- res)) { + res = sym_new_unknown(ctx); + } + + op(_FOR_ITER_GEN_FRAME, ( -- )) { + /* We are about to hit the end of the trace */ + ctx->done = true; + } + + op(_SEND_GEN_FRAME, ( -- )) { + // We are about to hit the end of the trace: + ctx->done = true; + } + + op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) { + SYNC_SP(); + ctx->frame->stack_pointer = stack_pointer; + ctx->frame = new_frame; + ctx->curr_frame_depth++; + stack_pointer = new_frame->stack_pointer; + co = get_code(this_instr); + if (co == NULL) { + // should be about to _EXIT_TRACE anyway + ctx->done = true; + break; + } + } + + op(_UNPACK_SEQUENCE, (seq -- values[oparg])) { + /* This has to be done manually */ + (void)seq; + for (int i = 0; i < oparg; i++) { + values[i] = sym_new_unknown(ctx); + } + } + + op(_UNPACK_EX, (seq -- values[oparg & 0xFF], unused, unused[oparg >> 8])) { + /* This has to be done manually */ + (void)seq; + int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; + for (int i = 0; i < totalargs; i++) { + values[i] = sym_new_unknown(ctx); + } + } + + op(_JUMP_TO_TOP, (--)) { + ctx->done = true; + } + + op(_EXIT_TRACE, (exit_p/4 --)) { + (void)exit_p; + ctx->done = true; + } + +// END BYTECODES // + +} diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h new file mode 100644 index 00000000000000..3b05eadd290111 --- /dev/null +++ b/Python/partial_evaluator_cases.c.h @@ -0,0 +1,1895 @@ +// This file is generated by Tools/cases_generator/partial_evaluator_generator.py +// from: +// Python/partial_evaluator_bytecodes.c +// Do not edit! + + case _NOP: { + break; + } + + case _CHECK_PERIODIC: { + break; + } + + case _CHECK_PERIODIC_IF_NOT_YIELD_FROM: { + break; + } + + /* _QUICKEN_RESUME is not a viable micro-op for tier 2 */ + + case _RESUME_CHECK: { + break; + } + + /* _MONITOR_RESUME is not a viable micro-op for tier 2 */ + + case _LOAD_FAST_CHECK: { + _Py_UopsSymbol *value; + value = GETLOCAL(oparg); + // We guarantee this will error - just bail and don't optimize it. + if (sym_is_null(value)) { + ctx->done = true; + } + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST: { + _Py_UopsSymbol *value; + value = GETLOCAL(oparg); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_AND_CLEAR: { + _Py_UopsSymbol *value; + value = GETLOCAL(oparg); + _Py_UopsSymbol *temp = sym_new_null(ctx); + GETLOCAL(oparg) = temp; + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST: { + _Py_UopsSymbol *value; + // Should never happen. This should be run after the specializer pass. + Py_UNREACHABLE(); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_FAST: { + _Py_UopsSymbol *value; + value = stack_pointer[-1]; + GETLOCAL(oparg) = value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _PUSH_NULL: { + _Py_UopsSymbol *res; + res = sym_new_null(ctx); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _END_SEND: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[-2] = value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _UNARY_NEGATIVE: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _UNARY_NOT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_BOOL: { + break; + } + + case _TO_BOOL_INT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_LIST: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_NONE: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _TO_BOOL_STR: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _REPLACE_WITH_TRUE: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _UNARY_INVERT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _GUARD_BOTH_INT: { + break; + } + + case _GUARD_NOS_INT: { + break; + } + + case _GUARD_TOS_INT: { + break; + } + + case _BINARY_OP_MULTIPLY_INT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_ADD_INT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_SUBTRACT_INT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_BOTH_FLOAT: { + break; + } + + case _GUARD_NOS_FLOAT: { + break; + } + + case _GUARD_TOS_FLOAT: { + break; + } + + case _BINARY_OP_MULTIPLY_FLOAT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_ADD_FLOAT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_SUBTRACT_FLOAT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_BOTH_UNICODE: { + break; + } + + case _BINARY_OP_ADD_UNICODE: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP_INPLACE_ADD_UNICODE: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_SUBSCR: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_SLICE: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-3] = res; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_SLICE: { + stack_pointer += -4; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_SUBSCR_LIST_INT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_SUBSCR_STR_INT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_SUBSCR_TUPLE_INT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_SUBSCR_DICT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_SUBSCR_CHECK_FUNC: { + break; + } + + case _BINARY_SUBSCR_INIT_CALL: { + _PyInterpreterFrame *new_frame; + new_frame = sym_new_not_null(ctx); + stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LIST_APPEND: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _SET_ADD: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_SUBSCR: { + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_SUBSCR_LIST_INT: { + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_SUBSCR_DICT: { + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _DELETE_SUBSCR: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_INTRINSIC_1: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _CALL_INTRINSIC_2: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _RETURN_VALUE: { + _Py_UopsSymbol *retval; + _Py_UopsSymbol *res; + retval = stack_pointer[-1]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + ctx->frame->stack_pointer = stack_pointer; + frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + res = retval; + /* Stack space handling */ + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + co = get_code(this_instr); + if (co == NULL) { + // might be impossible, but bailing is still safe + ctx->done = true; + } + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GET_AITER: { + _Py_UopsSymbol *iter; + iter = sym_new_not_null(ctx); + stack_pointer[-1] = iter; + break; + } + + case _GET_ANEXT: { + _Py_UopsSymbol *awaitable; + awaitable = sym_new_not_null(ctx); + stack_pointer[0] = awaitable; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GET_AWAITABLE: { + _Py_UopsSymbol *iter; + iter = sym_new_not_null(ctx); + stack_pointer[-1] = iter; + break; + } + + /* _SEND is not a viable micro-op for tier 2 */ + + case _SEND_GEN_FRAME: { + // We are about to hit the end of the trace: + ctx->done = true; + break; + } + + case _YIELD_VALUE: { + _Py_UopsSymbol *res; + res = sym_new_unknown(ctx); + stack_pointer[-1] = res; + break; + } + + case _POP_EXCEPT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_COMMON_CONSTANT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_BUILD_CLASS: { + _Py_UopsSymbol *bc; + bc = sym_new_not_null(ctx); + stack_pointer[0] = bc; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_NAME: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _DELETE_NAME: { + break; + } + + case _UNPACK_SEQUENCE: { + _Py_UopsSymbol *seq; + _Py_UopsSymbol **values; + seq = stack_pointer[-1]; + values = &stack_pointer[-1]; + /* This has to be done manually */ + (void)seq; + for (int i = 0; i < oparg; i++) { + values[i] = sym_new_unknown(ctx); + } + stack_pointer += -1 + oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _UNPACK_SEQUENCE_TWO_TUPLE: { + _Py_UopsSymbol *val1; + _Py_UopsSymbol *val0; + val1 = sym_new_not_null(ctx); + val0 = sym_new_not_null(ctx); + stack_pointer[-1] = val1; + stack_pointer[0] = val0; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _UNPACK_SEQUENCE_TUPLE: { + _Py_UopsSymbol **values; + values = &stack_pointer[-1]; + for (int _i = oparg; --_i >= 0;) { + values[_i] = sym_new_not_null(ctx); + } + stack_pointer += -1 + oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _UNPACK_SEQUENCE_LIST: { + _Py_UopsSymbol **values; + values = &stack_pointer[-1]; + for (int _i = oparg; --_i >= 0;) { + values[_i] = sym_new_not_null(ctx); + } + stack_pointer += -1 + oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _UNPACK_EX: { + _Py_UopsSymbol *seq; + _Py_UopsSymbol **values; + seq = stack_pointer[-1]; + values = &stack_pointer[-1]; + /* This has to be done manually */ + (void)seq; + int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; + for (int i = 0; i < totalargs; i++) { + values[i] = sym_new_unknown(ctx); + } + stack_pointer += (oparg & 0xFF) + (oparg >> 8); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_ATTR: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _DELETE_ATTR: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_GLOBAL: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _DELETE_GLOBAL: { + break; + } + + case _LOAD_LOCALS: { + _Py_UopsSymbol *locals; + locals = sym_new_not_null(ctx); + stack_pointer[0] = locals; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + /* _LOAD_FROM_DICT_OR_GLOBALS is not a viable micro-op for tier 2 */ + + case _LOAD_NAME: { + _Py_UopsSymbol *v; + v = sym_new_not_null(ctx); + stack_pointer[0] = v; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_GLOBAL: { + _Py_UopsSymbol *res; + _Py_UopsSymbol *null = NULL; + res = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_GLOBALS_VERSION: { + break; + } + + case _GUARD_BUILTINS_VERSION: { + break; + } + + case _LOAD_GLOBAL_MODULE: { + _Py_UopsSymbol *res; + _Py_UopsSymbol *null = NULL; + res = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + _Py_UopsSymbol *res; + _Py_UopsSymbol *null = NULL; + res = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[0] = res; + if (oparg & 1) stack_pointer[1] = null; + stack_pointer += 1 + (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _DELETE_FAST: { + break; + } + + case _MAKE_CELL: { + break; + } + + case _DELETE_DEREF: { + break; + } + + case _LOAD_FROM_DICT_OR_DEREF: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[-1] = value; + break; + } + + case _LOAD_DEREF: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_DEREF: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COPY_FREE_VARS: { + break; + } + + case _BUILD_STRING: { + _Py_UopsSymbol *str; + str = sym_new_not_null(ctx); + stack_pointer[-oparg] = str; + stack_pointer += 1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BUILD_TUPLE: { + _Py_UopsSymbol *tup; + tup = sym_new_not_null(ctx); + stack_pointer[-oparg] = tup; + stack_pointer += 1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BUILD_LIST: { + _Py_UopsSymbol *list; + list = sym_new_not_null(ctx); + stack_pointer[-oparg] = list; + stack_pointer += 1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LIST_EXTEND: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _SET_UPDATE: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BUILD_SET: { + _Py_UopsSymbol *set; + set = sym_new_not_null(ctx); + stack_pointer[-oparg] = set; + stack_pointer += 1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BUILD_MAP: { + _Py_UopsSymbol *map; + map = sym_new_not_null(ctx); + stack_pointer[-oparg*2] = map; + stack_pointer += 1 - oparg*2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _SETUP_ANNOTATIONS: { + break; + } + + case _DICT_UPDATE: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _DICT_MERGE: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _MAP_ADD: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ + + case _LOAD_SUPER_ATTR_ATTR: { + _Py_UopsSymbol *attr_st; + attr_st = sym_new_not_null(ctx); + stack_pointer[-3] = attr_st; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_SUPER_ATTR_METHOD: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *self_or_null; + attr = sym_new_not_null(ctx); + self_or_null = sym_new_not_null(ctx); + stack_pointer[-3] = attr; + stack_pointer[-2] = self_or_null; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_ATTR: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *self_or_null = NULL; + attr = sym_new_not_null(ctx); + self_or_null = sym_new_not_null(ctx); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = self_or_null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_TYPE_VERSION: { + break; + } + + case _CHECK_MANAGED_OBJECT_HAS_VALUES: { + break; + } + + case _LOAD_ATTR_INSTANCE_VALUE: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *null = NULL; + attr = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_ATTR_MODULE: { + break; + } + + case _LOAD_ATTR_MODULE: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *null = NULL; + attr = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_ATTR_WITH_HINT: { + break; + } + + case _LOAD_ATTR_WITH_HINT: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *null = NULL; + attr = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_ATTR_SLOT: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *null = NULL; + attr = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_ATTR_CLASS: { + break; + } + + case _LOAD_ATTR_CLASS: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *null = NULL; + attr = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_ATTR_PROPERTY_FRAME: { + _PyInterpreterFrame *new_frame; + new_frame = sym_new_not_null(ctx); + stack_pointer[-1] = (_Py_UopsSymbol *)new_frame; + break; + } + + /* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */ + + case _GUARD_DORV_NO_DICT: { + break; + } + + case _STORE_ATTR_INSTANCE_VALUE: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_ATTR_WITH_HINT: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _STORE_ATTR_SLOT: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COMPARE_OP: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COMPARE_OP_FLOAT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COMPARE_OP_INT: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COMPARE_OP_STR: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _IS_OP: { + _Py_UopsSymbol *b; + b = sym_new_not_null(ctx); + stack_pointer[-2] = b; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CONTAINS_OP: { + _Py_UopsSymbol *b; + b = sym_new_not_null(ctx); + stack_pointer[-2] = b; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CONTAINS_OP_SET: { + _Py_UopsSymbol *b; + b = sym_new_not_null(ctx); + stack_pointer[-2] = b; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CONTAINS_OP_DICT: { + _Py_UopsSymbol *b; + b = sym_new_not_null(ctx); + stack_pointer[-2] = b; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_EG_MATCH: { + _Py_UopsSymbol *rest; + _Py_UopsSymbol *match; + rest = sym_new_not_null(ctx); + match = sym_new_not_null(ctx); + stack_pointer[-2] = rest; + stack_pointer[-1] = match; + break; + } + + case _CHECK_EXC_MATCH: { + _Py_UopsSymbol *b; + b = sym_new_not_null(ctx); + stack_pointer[-1] = b; + break; + } + + case _IMPORT_NAME: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _IMPORT_FROM: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + /* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + case _IS_NONE: { + _Py_UopsSymbol *b; + b = sym_new_not_null(ctx); + stack_pointer[-1] = b; + break; + } + + case _GET_LEN: { + _Py_UopsSymbol *len; + len = sym_new_not_null(ctx); + stack_pointer[0] = len; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _MATCH_CLASS: { + _Py_UopsSymbol *attrs; + attrs = sym_new_not_null(ctx); + stack_pointer[-3] = attrs; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _MATCH_MAPPING: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _MATCH_SEQUENCE: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _MATCH_KEYS: { + _Py_UopsSymbol *values_or_none; + values_or_none = sym_new_not_null(ctx); + stack_pointer[0] = values_or_none; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GET_ITER: { + _Py_UopsSymbol *iter; + iter = sym_new_not_null(ctx); + stack_pointer[-1] = iter; + break; + } + + case _GET_YIELD_FROM_ITER: { + _Py_UopsSymbol *iter; + iter = sym_new_not_null(ctx); + stack_pointer[-1] = iter; + break; + } + + /* _FOR_ITER is not a viable micro-op for tier 2 */ + + case _FOR_ITER_TIER_TWO: { + _Py_UopsSymbol *next; + next = sym_new_not_null(ctx); + stack_pointer[0] = next; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + /* _INSTRUMENTED_FOR_ITER is not a viable micro-op for tier 2 */ + + case _ITER_CHECK_LIST: { + break; + } + + /* _ITER_JUMP_LIST is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_LIST: { + break; + } + + case _ITER_NEXT_LIST: { + _Py_UopsSymbol *next; + next = sym_new_not_null(ctx); + stack_pointer[0] = next; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _ITER_CHECK_TUPLE: { + break; + } + + /* _ITER_JUMP_TUPLE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_TUPLE: { + break; + } + + case _ITER_NEXT_TUPLE: { + _Py_UopsSymbol *next; + next = sym_new_not_null(ctx); + stack_pointer[0] = next; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _ITER_CHECK_RANGE: { + break; + } + + /* _ITER_JUMP_RANGE is not a viable micro-op for tier 2 */ + + case _GUARD_NOT_EXHAUSTED_RANGE: { + break; + } + + case _ITER_NEXT_RANGE: { + _Py_UopsSymbol *next; + next = sym_new_not_null(ctx); + stack_pointer[0] = next; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _FOR_ITER_GEN_FRAME: { + /* We are about to hit the end of the trace */ + ctx->done = true; + break; + } + + case _LOAD_SPECIAL: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *self_or_null; + attr = sym_new_not_null(ctx); + self_or_null = sym_new_not_null(ctx); + stack_pointer[-1] = attr; + stack_pointer[0] = self_or_null; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _WITH_EXCEPT_START: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _PUSH_EXC_INFO: { + _Py_UopsSymbol *prev_exc; + _Py_UopsSymbol *new_exc; + prev_exc = sym_new_not_null(ctx); + new_exc = sym_new_not_null(ctx); + stack_pointer[-1] = prev_exc; + stack_pointer[0] = new_exc; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: { + break; + } + + case _GUARD_KEYS_VERSION: { + break; + } + + case _LOAD_ATTR_METHOD_WITH_VALUES: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *self = NULL; + attr = sym_new_not_null(ctx); + self = sym_new_not_null(ctx); + stack_pointer[-1] = attr; + stack_pointer[0] = self; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_ATTR_METHOD_NO_DICT: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *self = NULL; + attr = sym_new_not_null(ctx); + self = sym_new_not_null(ctx); + stack_pointer[-1] = attr; + stack_pointer[0] = self; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + _Py_UopsSymbol *attr; + attr = sym_new_not_null(ctx); + stack_pointer[-1] = attr; + break; + } + + case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + _Py_UopsSymbol *attr; + attr = sym_new_not_null(ctx); + stack_pointer[-1] = attr; + break; + } + + case _CHECK_ATTR_METHOD_LAZY_DICT: { + break; + } + + case _LOAD_ATTR_METHOD_LAZY_DICT: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *self = NULL; + attr = sym_new_not_null(ctx); + self = sym_new_not_null(ctx); + stack_pointer[-1] = attr; + stack_pointer[0] = self; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _MAYBE_EXPAND_METHOD: { + _Py_UopsSymbol *func; + _Py_UopsSymbol *maybe_self; + _Py_UopsSymbol **args; + args = &stack_pointer[-oparg]; + func = sym_new_not_null(ctx); + maybe_self = sym_new_not_null(ctx); + for (int _i = oparg; --_i >= 0;) { + args[_i] = sym_new_not_null(ctx); + } + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = maybe_self; + break; + } + + /* _DO_CALL is not a viable micro-op for tier 2 */ + + /* _MONITOR_CALL is not a viable micro-op for tier 2 */ + + case _PY_FRAME_GENERAL: { + _Py_UopsSymbol **args; + _Py_UopsSymbol *self_or_null; + _Py_UopsSymbol *callable; + _Py_UOpsAbstractFrame *new_frame; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + /* The _Py_UOpsAbstractFrame design assumes that we can copy arguments across directly */ + (void)callable; + (void)self_or_null; + (void)args; + new_frame = NULL; + ctx->done = true; + stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_FUNCTION_VERSION: { + break; + } + + case _CHECK_METHOD_VERSION: { + break; + } + + case _EXPAND_METHOD: { + _Py_UopsSymbol *method; + _Py_UopsSymbol *self; + method = sym_new_not_null(ctx); + self = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = method; + stack_pointer[-1 - oparg] = self; + break; + } + + case _CHECK_IS_NOT_PY_CALLABLE: { + break; + } + + case _CALL_NON_PY_GENERAL: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + break; + } + + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UopsSymbol *func; + _Py_UopsSymbol *self; + func = sym_new_not_null(ctx); + self = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + break; + } + + case _CHECK_PEP_523: { + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS: { + break; + } + + case _CHECK_STACK_SPACE: { + break; + } + + case _INIT_CALL_PY_EXACT_ARGS: { + _Py_UopsSymbol **args; + _Py_UopsSymbol *self_or_null; + _Py_UopsSymbol *callable; + _Py_UOpsAbstractFrame *new_frame; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + (void)callable; + PyCodeObject *co = NULL; + assert((this_instr + 2)->opcode == _PUSH_FRAME); + uint64_t push_operand = (this_instr + 2)->operand; + if (push_operand & 1) { + co = (PyCodeObject *)(push_operand & ~1); + DPRINTF(3, "code=%p ", co); + assert(PyCode_Check(co)); + } + else { + PyFunctionObject *func = (PyFunctionObject *)push_operand; + DPRINTF(3, "func=%p ", func); + if (func == NULL) { + DPRINTF(3, "\n"); + DPRINTF(1, "Missing function\n"); + ctx->done = true; + break; + } + co = (PyCodeObject *)func->func_code; + DPRINTF(3, "code=%p ", co); + } + assert(self_or_null != NULL); + assert(args != NULL); + new_frame = frame_new(ctx, co, 0, NULL, 0); + stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _PUSH_FRAME: { + _Py_UOpsAbstractFrame *new_frame; + new_frame = (_Py_UOpsAbstractFrame *)stack_pointer[-1]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + ctx->frame->stack_pointer = stack_pointer; + ctx->frame = new_frame; + ctx->curr_frame_depth++; + stack_pointer = new_frame->stack_pointer; + co = get_code(this_instr); + if (co == NULL) { + // should be about to _EXIT_TRACE anyway + ctx->done = true; + break; + } + break; + } + + case _CALL_TYPE_1: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-3] = res; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_STR_1: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-3] = res; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_TUPLE_1: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-3] = res; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_AND_ALLOCATE_OBJECT: { + _Py_UopsSymbol *self; + _Py_UopsSymbol *init; + _Py_UopsSymbol **args; + self = sym_new_not_null(ctx); + init = sym_new_not_null(ctx); + for (int _i = oparg; --_i >= 0;) { + args[_i] = sym_new_not_null(ctx); + } + stack_pointer[-2 - oparg] = self; + stack_pointer[-1 - oparg] = init; + break; + } + + case _CREATE_INIT_FRAME: { + _Py_UopsSymbol **args; + _Py_UopsSymbol *init; + _Py_UopsSymbol *self; + _Py_UOpsAbstractFrame *init_frame; + args = &stack_pointer[-oparg]; + init = stack_pointer[-1 - oparg]; + self = stack_pointer[-2 - oparg]; + (void)self; + (void)init; + (void)args; + init_frame = NULL; + ctx->done = true; + stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)init_frame; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _EXIT_INIT_CHECK: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_BUILTIN_CLASS: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_BUILTIN_O: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_BUILTIN_FAST: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_LEN: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_ISINSTANCE: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_LIST_APPEND: { + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_METHOD_DESCRIPTOR_O: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_METHOD_DESCRIPTOR_NOARGS: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_METHOD_DESCRIPTOR_FAST: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = res; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + /* _INSTRUMENTED_CALL_KW is not a viable micro-op for tier 2 */ + + /* _DO_CALL_KW is not a viable micro-op for tier 2 */ + + case _PY_FRAME_KW: { + _Py_UopsSymbol *kwnames; + _Py_UopsSymbol **args; + _Py_UopsSymbol *self_or_null; + _Py_UopsSymbol *callable; + _Py_UOpsAbstractFrame *new_frame; + kwnames = stack_pointer[-1]; + args = &stack_pointer[-1 - oparg]; + self_or_null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + (void)callable; + (void)self_or_null; + (void)args; + (void)kwnames; + new_frame = NULL; + ctx->done = true; + stack_pointer[-3 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_FUNCTION_VERSION_KW: { + break; + } + + case _CHECK_METHOD_VERSION_KW: { + break; + } + + case _EXPAND_METHOD_KW: { + _Py_UopsSymbol *method; + _Py_UopsSymbol *self; + _Py_UopsSymbol *kwnames; + method = sym_new_not_null(ctx); + self = sym_new_not_null(ctx); + kwnames = sym_new_not_null(ctx); + stack_pointer[-3 - oparg] = method; + stack_pointer[-2 - oparg] = self; + stack_pointer[-1] = kwnames; + break; + } + + case _CHECK_IS_NOT_PY_CALLABLE_KW: { + break; + } + + case _CALL_KW_NON_PY: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-3 - oparg] = res; + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + /* _INSTRUMENTED_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + /* __DO_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ + + case _MAKE_FUNCTION: { + _Py_UopsSymbol *func; + func = sym_new_not_null(ctx); + stack_pointer[-1] = func; + break; + } + + case _SET_FUNCTION_ATTRIBUTE: { + _Py_UopsSymbol *func_st; + func_st = sym_new_not_null(ctx); + stack_pointer[-2] = func_st; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _RETURN_GENERATOR: { + _Py_UopsSymbol *res; + ctx->frame->stack_pointer = stack_pointer; + frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + res = sym_new_unknown(ctx); + /* Stack space handling */ + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + co = get_code(this_instr); + if (co == NULL) { + // might be impossible, but bailing is still safe + ctx->done = true; + } + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BUILD_SLICE: { + _Py_UopsSymbol *slice; + slice = sym_new_not_null(ctx); + stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; + stack_pointer += -1 - ((oparg == 3) ? 1 : 0); + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CONVERT_VALUE: { + _Py_UopsSymbol *result; + result = sym_new_not_null(ctx); + stack_pointer[-1] = result; + break; + } + + case _FORMAT_SIMPLE: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-1] = res; + break; + } + + case _FORMAT_WITH_SPEC: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COPY: { + _Py_UopsSymbol *bottom; + _Py_UopsSymbol *top; + bottom = stack_pointer[-1 - (oparg-1)]; + assert(oparg > 0); + top = bottom; + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _BINARY_OP: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _SWAP: { + _Py_UopsSymbol *top; + _Py_UopsSymbol *bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-2 - (oparg-2)]; + stack_pointer[-2 - (oparg-2)] = top; + stack_pointer[-1] = bottom; + break; + } + + /* _INSTRUMENTED_LINE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_INSTRUCTION is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_JUMP_FORWARD is not a viable micro-op for tier 2 */ + + /* _MONITOR_JUMP_BACKWARD is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NONE is not a viable micro-op for tier 2 */ + + /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ + + case _GUARD_IS_TRUE_POP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_IS_FALSE_POP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_IS_NONE_POP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_IS_NOT_NONE_POP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _JUMP_TO_TOP: { + ctx->done = true; + break; + } + + case _SET_IP: { + break; + } + + case _CHECK_STACK_SPACE_OPERAND: { + break; + } + + case _SAVE_RETURN_OFFSET: { + break; + } + + case _EXIT_TRACE: { + PyObject *exit_p = (PyObject *)this_instr->operand; + (void)exit_p; + ctx->done = true; + break; + } + + case _CHECK_VALIDITY: { + break; + } + + case _LOAD_CONST_INLINE: { + _Py_UopsSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_INLINE_BORROW: { + _Py_UopsSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[-1] = value; + break; + } + + case _LOAD_CONST_INLINE_WITH_NULL: { + _Py_UopsSymbol *value; + _Py_UopsSymbol *null; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + null = sym_new_null(ctx); + stack_pointer[0] = value; + stack_pointer[1] = null; + stack_pointer += 2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: { + _Py_UopsSymbol *value; + _Py_UopsSymbol *null; + PyObject *ptr = (PyObject *)this_instr->operand; + value = sym_new_const(ctx, ptr); + null = sym_new_null(ctx); + stack_pointer[0] = value; + stack_pointer[1] = null; + stack_pointer += 2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_FUNCTION: { + break; + } + + case _INTERNAL_INCREMENT_OPT_COUNTER: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _DYNAMIC_EXIT: { + break; + } + + case _START_EXECUTOR: { + break; + } + + case _FATAL_ERROR: { + break; + } + + case _CHECK_VALIDITY_AND_SET_IP: { + break; + } + + case _DEOPT: { + break; + } + + case _ERROR_POP_N: { + stack_pointer += -oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _TIER2_RESUME_CHECK: { + break; + } + diff --git a/Tools/cases_generator/partial_evaluator_generator.py b/Tools/cases_generator/partial_evaluator_generator.py new file mode 100644 index 00000000000000..67a96cc899fdf1 --- /dev/null +++ b/Tools/cases_generator/partial_evaluator_generator.py @@ -0,0 +1,234 @@ +"""Generate the cases for the tier 2 optimizer. +Reads the instruction definitions from bytecodes.c and optimizer_bytecodes.c +Writes the cases to optimizer_cases.c.h, which is #included in Python/optimizer_analysis.c. +""" + +import argparse + +from analyzer import ( + Analysis, + Instruction, + Uop, + analyze_files, + StackItem, + analysis_error, +) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, + Emitter, +) +from cwriter import CWriter +from typing import TextIO, Iterator +from lexer import Token +from stack import Local, Stack, StackError + +DEFAULT_OUTPUT = ROOT / "Python/partial_evaluator_cases.c.h" +DEFAULT_ABSTRACT_INPUT = (ROOT / "Python/partial_evaluator_bytecodes.c").absolute().as_posix() + + +def validate_uop(override: Uop, uop: Uop) -> None: + # To do + pass + + +def type_name(var: StackItem) -> str: + if var.is_array(): + return f"_Py_UopsSymbol **" + if var.type: + return var.type + return f"_Py_UopsSymbol *" + + +def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: + variables = {"unused"} + if not skip_inputs: + for var in reversed(uop.stack.inputs): + if var.name not in variables: + variables.add(var.name) + if var.condition: + out.emit(f"{type_name(var)}{var.name} = NULL;\n") + else: + out.emit(f"{type_name(var)}{var.name};\n") + for var in uop.stack.outputs: + if var.peek: + continue + if var.name not in variables: + variables.add(var.name) + if var.condition: + out.emit(f"{type_name(var)}{var.name} = NULL;\n") + else: + out.emit(f"{type_name(var)}{var.name};\n") + + +def decref_inputs( + out: CWriter, + tkn: Token, + tkn_iter: Iterator[Token], + uop: Uop, + stack: Stack, + inst: Instruction | None, +) -> None: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + out.emit_at("", tkn) + + +def emit_default(out: CWriter, uop: Uop) -> None: + for i, var in enumerate(uop.stack.outputs): + if var.name != "unused" and not var.peek: + if var.is_array(): + out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n") + out.emit(f"{var.name}[_i] = sym_new_not_null(ctx);\n") + out.emit("}\n") + elif var.name == "null": + out.emit(f"{var.name} = sym_new_null(ctx);\n") + else: + out.emit(f"{var.name} = sym_new_not_null(ctx);\n") + + +class OptimizerEmitter(Emitter): + pass + + +def write_uop( + override: Uop | None, + uop: Uop, + out: CWriter, + stack: Stack, + debug: bool, + skip_inputs: bool, +) -> None: + locals: dict[str, Local] = {} + try: + prototype = override if override else uop + is_override = override is not None + out.start_line() + for var in reversed(prototype.stack.inputs): + code, local = stack.pop(var, extract_bits=True) + if not skip_inputs: + out.emit(code) + if local.defined: + locals[local.name] = local + out.emit(stack.define_output_arrays(prototype.stack.outputs)) + if debug: + args = [] + for var in prototype.stack.inputs: + if not var.peek or is_override: + args.append(var.name) + out.emit(f'DEBUG_PRINTF({", ".join(args)});\n') + if override: + for cache in uop.caches: + if cache.name != "unused": + if cache.size == 4: + type = cast = "PyObject *" + else: + type = f"uint{cache.size*16}_t " + cast = f"uint{cache.size*16}_t" + out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n") + if override: + emitter = OptimizerEmitter(out) + emitter.emit_tokens(override, stack, None) + else: + emit_default(out, uop) + + for var in prototype.stack.outputs: + if var.name in locals: + local = locals[var.name] + else: + local = Local.local(var) + stack.push(local) + out.start_line() + stack.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=True) + except StackError as ex: + raise analysis_error(ex.args[0], uop.body[0]) + + +SKIPS = ("_EXTENDED_ARG",) + + +def generate_abstract_interpreter( + filenames: list[str], + abstract: Analysis, + base: Analysis, + outfile: TextIO, + debug: bool, +) -> None: + write_header(__file__, filenames, outfile) + out = CWriter(outfile, 2, False) + out.emit("\n") + base_uop_names = set([uop.name for uop in base.uops.values()]) + for abstract_uop_name in abstract.uops: + assert ( + abstract_uop_name in base_uop_names + ), f"All abstract uops should override base uops, but {abstract_uop_name} is not." + + for uop in base.uops.values(): + override: Uop | None = None + if uop.name in abstract.uops: + override = abstract.uops[uop.name] + validate_uop(override, uop) + if uop.properties.tier == 1: + continue + if uop.replicates: + continue + if uop.is_super(): + continue + if not uop.is_viable(): + out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n") + continue + out.emit(f"case {uop.name}: {{\n") + if override: + declare_variables(override, out, skip_inputs=False) + else: + declare_variables(uop, out, skip_inputs=True) + stack = Stack() + write_uop(override, uop, out, stack, debug, skip_inputs=(override is None)) + out.start_line() + out.emit("break;\n") + out.emit("}") + out.emit("\n\n") + + +def generate_tier2_abstract_from_files( + filenames: list[str], outfilename: str, debug: bool = False +) -> None: + assert len(filenames) == 2, "Need a base file and an abstract cases file." + base = analyze_files([filenames[0]]) + abstract = analyze_files([filenames[1]]) + with open(outfilename, "w") as outfile: + generate_abstract_interpreter(filenames, abstract, base, outfile, debug) + + +arg_parser = argparse.ArgumentParser( + description="Generate the code for the tier 2 interpreter.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT +) + + +arg_parser.add_argument("input", nargs="*", help="Abstract interpreter definition file") + +arg_parser.add_argument( + "base", nargs="*", help="The base instruction definition file(s)" +) + +arg_parser.add_argument("-d", "--debug", help="Insert debug calls", action="store_true") + +if __name__ == "__main__": + args = arg_parser.parse_args() + if not args.input: + args.base.append(DEFAULT_INPUT) + args.input.append(DEFAULT_ABSTRACT_INPUT) + else: + args.base.append(args.input[-1]) + args.input.pop() + abstract = analyze_files(args.input) + base = analyze_files(args.base) + with open(args.output, "w") as outfile: + generate_abstract_interpreter(args.input, abstract, base, outfile, args.debug) From dd5cfe79f145907f64559ec1640491ebfc8b86e6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 3 Sep 2024 02:23:10 +0800 Subject: [PATCH 02/18] generate the pe from the base optimizer --- Include/internal/pycore_optimizer.h | 2 + Makefile.pre.in | 1 + Python/optimizer_analysis.c | 19 +- Python/optimizer_symbols.c | 11 + Python/partial_evaluator_bytecodes.c | 196 +----- Python/partial_evaluator_cases.c.h | 589 ++++++++++++++++-- .../partial_evaluator_generator.py | 5 +- 7 files changed, 580 insertions(+), 243 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 2d648d79655954..54a08de252b4d0 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -153,6 +153,7 @@ struct _Py_UopsSymbol { PyObject *const_val; // Owned reference (!) unsigned int type_version; // currently stores type version bool is_static; // used for binding-time analysis + int locals_idx; }; #define UOP_FORMAT_TARGET 0 @@ -242,6 +243,7 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version); +extern void _Py_uop_sym_set_locals_idx(_Py_UopsSymbol *sym, int locals_idx); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); diff --git a/Makefile.pre.in b/Makefile.pre.in index 4f214eed60059e..4c72011a07b358 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2001,6 +2001,7 @@ regen-optimizer-cases: regen-partial-evaluator-cases: $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/partial_evaluator_generator.py \ -o $(srcdir)/Python/partial_evaluator_cases.c.h.new \ + $(srcdir)/Python/optimizer_bytecodes.c \ $(srcdir)/Python/partial_evaluator_bytecodes.c \ $(srcdir)/Python/bytecodes.c $(UPDATE_FILE) $(srcdir)/Python/partial_evaluator_cases.c.h $(srcdir)/Python/partial_evaluator_cases.c.h.new diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index caca5c397143c6..b871010ee6a141 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -324,6 +324,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) #define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) +#define sym_set_locals_idx _Py_uop_sym_set_locals_idx #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness #define frame_new _Py_uop_frame_new @@ -565,17 +566,9 @@ partial_evaluate_uops( /* Either reached the end or cannot optimize further, but there * would be no benefit in retrying later */ _Py_uop_abstractcontext_fini(ctx); - if (first_valid_check_stack != NULL) { - assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE); - assert(max_space > 0); - assert(max_space <= INT_MAX); - assert(max_space <= INT32_MAX); - first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND; - first_valid_check_stack->operand = max_space; - } return trace_len; - error: +error: DPRINTF(3, "\n"); DPRINTF(1, "Encountered error in pe's abstract interpreter\n"); if (opcode <= MAX_UOP_ID) { @@ -698,6 +691,14 @@ _Py_uop_analyze_and_optimize( return length; } + length = partial_evaluate_uops( + _PyFrame_GetCode(frame), buffer, + length, curr_stacklen, dependencies); + + if (length <= 0) { + return length; + } + length = remove_unneeded_uops(buffer, length); assert(length > 0); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 9b0be091c5666d..3962ced2dbecd2 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -78,6 +78,7 @@ sym_new(_Py_UOpsContext *ctx) self->const_val = NULL; self->type_version = 0; self->is_static = false; + self->locals_idx = -1; return self; } @@ -300,6 +301,12 @@ _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version) return _Py_uop_sym_get_type_version(sym) == version; } +void +_Py_uop_sym_set_locals_idx(_Py_UopsSymbol *sym, int locals_idx) +{ + assert(locals_idx >= 0); + sym->locals_idx = locals_idx; +} int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym) @@ -370,6 +377,10 @@ _Py_uop_frame_new( frame->locals[i] = local; } + for (int i = 0; i < co->co_nlocalsplus; i++) { + frame->locals[i]->locals_idx = i; + } + // Initialize the stack as well for (int i = 0; i < curr_stackentries; i++) { diff --git a/Python/partial_evaluator_bytecodes.c b/Python/partial_evaluator_bytecodes.c index 30c20537afcdce..2364db07f342fa 100644 --- a/Python/partial_evaluator_bytecodes.c +++ b/Python/partial_evaluator_bytecodes.c @@ -65,7 +65,7 @@ dummy_func(void) { // BEGIN BYTECODES // - op(_LOAD_FAST_CHECK, (-- value)) { + override op(_LOAD_FAST_CHECK, (-- value)) { value = GETLOCAL(oparg); // We guarantee this will error - just bail and don't optimize it. if (sym_is_null(value)) { @@ -73,209 +73,31 @@ dummy_func(void) { } } - op(_LOAD_FAST, (-- value)) { + override op(_LOAD_FAST, (-- value)) { value = GETLOCAL(oparg); } - op(_LOAD_FAST_AND_CLEAR, (-- value)) { + override op(_LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); _Py_UopsSymbol *temp = sym_new_null(ctx); GETLOCAL(oparg) = temp; } - op(_STORE_FAST, (value --)) { + override op(_STORE_FAST, (value --)) { GETLOCAL(oparg) = value; + sym_set_locals_idx(value, oparg); } - op(_PUSH_NULL, (-- res)) { + override op(_PUSH_NULL, (-- res)) { res = sym_new_null(ctx); } - op(_LOAD_CONST, (-- value)) { - // Should never happen. This should be run after the specializer pass. + override op(_LOAD_CONST, (-- value)) { + // Should've all been converted by specializer. Py_UNREACHABLE(); } - op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { - value = sym_new_const(ctx, ptr); - } - - op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { - value = sym_new_const(ctx, ptr); - } - - op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { - value = sym_new_const(ctx, ptr); - null = sym_new_null(ctx); - } - - op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { - value = sym_new_const(ctx, ptr); - null = sym_new_null(ctx); - } - - op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { - assert(oparg > 0); - top = bottom; - } - - op(_SWAP, (bottom, unused[oparg-2], top -- - top, unused[oparg-2], bottom)) { - } - - op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { - int argcount = oparg; - - (void)callable; - - PyCodeObject *co = NULL; - assert((this_instr + 2)->opcode == _PUSH_FRAME); - uint64_t push_operand = (this_instr + 2)->operand; - if (push_operand & 1) { - co = (PyCodeObject *)(push_operand & ~1); - DPRINTF(3, "code=%p ", co); - assert(PyCode_Check(co)); - } - else { - PyFunctionObject *func = (PyFunctionObject *)push_operand; - DPRINTF(3, "func=%p ", func); - if (func == NULL) { - DPRINTF(3, "\n"); - DPRINTF(1, "Missing function\n"); - ctx->done = true; - break; - } - co = (PyCodeObject *)func->func_code; - DPRINTF(3, "code=%p ", co); - } - - assert(self_or_null != NULL); - assert(args != NULL); - new_frame = frame_new(ctx, co, 0, NULL, 0); - } - - op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { - /* The _Py_UOpsAbstractFrame design assumes that we can copy arguments across directly */ - (void)callable; - (void)self_or_null; - (void)args; - new_frame = NULL; - ctx->done = true; - } - - op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _Py_UOpsAbstractFrame *)) { - (void)callable; - (void)self_or_null; - (void)args; - (void)kwnames; - new_frame = NULL; - ctx->done = true; - } - - op(_CREATE_INIT_FRAME, (self, init, args[oparg] -- init_frame: _Py_UOpsAbstractFrame *)) { - (void)self; - (void)init; - (void)args; - init_frame = NULL; - ctx->done = true; - } - - op(_RETURN_VALUE, (retval -- res)) { - SYNC_SP(); - ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); - stack_pointer = ctx->frame->stack_pointer; - res = retval; - - /* Stack space handling */ - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; - - co = get_code(this_instr); - if (co == NULL) { - // might be impossible, but bailing is still safe - ctx->done = true; - } - } - - op(_RETURN_GENERATOR, ( -- res)) { - SYNC_SP(); - ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); - stack_pointer = ctx->frame->stack_pointer; - res = sym_new_unknown(ctx); - - /* Stack space handling */ - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; - - co = get_code(this_instr); - if (co == NULL) { - // might be impossible, but bailing is still safe - ctx->done = true; - } - } - - op(_YIELD_VALUE, (unused -- res)) { - res = sym_new_unknown(ctx); - } - - op(_FOR_ITER_GEN_FRAME, ( -- )) { - /* We are about to hit the end of the trace */ - ctx->done = true; - } - - op(_SEND_GEN_FRAME, ( -- )) { - // We are about to hit the end of the trace: - ctx->done = true; - } - - op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) { - SYNC_SP(); - ctx->frame->stack_pointer = stack_pointer; - ctx->frame = new_frame; - ctx->curr_frame_depth++; - stack_pointer = new_frame->stack_pointer; - co = get_code(this_instr); - if (co == NULL) { - // should be about to _EXIT_TRACE anyway - ctx->done = true; - break; - } - } - - op(_UNPACK_SEQUENCE, (seq -- values[oparg])) { - /* This has to be done manually */ - (void)seq; - for (int i = 0; i < oparg; i++) { - values[i] = sym_new_unknown(ctx); - } - } - - op(_UNPACK_EX, (seq -- values[oparg & 0xFF], unused, unused[oparg >> 8])) { - /* This has to be done manually */ - (void)seq; - int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1; - for (int i = 0; i < totalargs; i++) { - values[i] = sym_new_unknown(ctx); - } - } - - op(_JUMP_TO_TOP, (--)) { - ctx->done = true; - } - - op(_EXIT_TRACE, (exit_p/4 --)) { - (void)exit_p; - ctx->done = true; + override op (_CHECK_STACK_SPACE_OPERAND, ( -- )) { } // END BYTECODES // diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index 3b05eadd290111..ddf5f18ee054fa 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -1,6 +1,6 @@ // This file is generated by Tools/cases_generator/partial_evaluator_generator.py // from: -// Python/partial_evaluator_bytecodes.c +// Python/optimizer_bytecodes.c, Python/partial_evaluator_bytecodes.c // Do not edit! case _NOP: { @@ -58,7 +58,7 @@ case _LOAD_CONST: { _Py_UopsSymbol *value; - // Should never happen. This should be run after the specializer pass. + // Should've all been converted by specializer. Py_UNREACHABLE(); stack_pointer[0] = value; stack_pointer += 1; @@ -70,6 +70,7 @@ _Py_UopsSymbol *value; value = stack_pointer[-1]; GETLOCAL(oparg) = value; + sym_set_locals_idx(value, oparg); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -114,40 +115,72 @@ } case _TO_BOOL: { + _Py_UopsSymbol *value; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (!optimize_to_bool(this_instr, ctx, value, &res)) { + res = sym_new_type(ctx, &PyBool_Type); + } stack_pointer[-1] = res; break; } case _TO_BOOL_BOOL: { + _Py_UopsSymbol *value; + _Py_UopsSymbol *res; + value = stack_pointer[-1]; + if (!optimize_to_bool(this_instr, ctx, value, &res)) { + sym_set_type(value, &PyBool_Type); + res = value; + } + stack_pointer[-1] = res; break; } case _TO_BOOL_INT: { + _Py_UopsSymbol *value; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (!optimize_to_bool(this_instr, ctx, value, &res)) { + sym_set_type(value, &PyLong_Type); + res = sym_new_type(ctx, &PyBool_Type); + } stack_pointer[-1] = res; break; } case _TO_BOOL_LIST: { + _Py_UopsSymbol *value; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (!optimize_to_bool(this_instr, ctx, value, &res)) { + sym_set_type(value, &PyList_Type); + res = sym_new_type(ctx, &PyBool_Type); + } stack_pointer[-1] = res; break; } case _TO_BOOL_NONE: { + _Py_UopsSymbol *value; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (!optimize_to_bool(this_instr, ctx, value, &res)) { + sym_set_const(value, Py_None); + res = sym_new_const(ctx, Py_False); + } stack_pointer[-1] = res; break; } case _TO_BOOL_STR: { + _Py_UopsSymbol *value; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + if (!optimize_to_bool(this_instr, ctx, value, &res)) { + res = sym_new_type(ctx, &PyBool_Type); + sym_set_type(value, &PyUnicode_Type); + } stack_pointer[-1] = res; break; } @@ -167,6 +200,25 @@ } case _GUARD_BOTH_INT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_matches_type(left, &PyLong_Type)) { + if (sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + REPLACE_OP(this_instr, _GUARD_TOS_INT, 0, 0); + } + } + else { + if (sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _GUARD_NOS_INT, 0, 0); + } + } + sym_set_type(left, &PyLong_Type); + sym_set_type(right, &PyLong_Type); break; } @@ -179,8 +231,29 @@ } case _BINARY_OP_MULTIPLY_INT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { + assert(PyLong_CheckExact(sym_get_const(left))); + assert(PyLong_CheckExact(sym_get_const(right))); + PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(left), + (PyLongObject *)sym_get_const(right)); + if (temp == NULL) { + goto error; + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + // TODO gh-115506: + // replace opcode with constant propagated one and add tests! + } + else { + res = sym_new_type(ctx, &PyLong_Type); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -188,8 +261,29 @@ } case _BINARY_OP_ADD_INT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { + assert(PyLong_CheckExact(sym_get_const(left))); + assert(PyLong_CheckExact(sym_get_const(right))); + PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), + (PyLongObject *)sym_get_const(right)); + if (temp == NULL) { + goto error; + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + // TODO gh-115506: + // replace opcode with constant propagated one and add tests! + } + else { + res = sym_new_type(ctx, &PyLong_Type); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -197,8 +291,29 @@ } case _BINARY_OP_SUBTRACT_INT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { + assert(PyLong_CheckExact(sym_get_const(left))); + assert(PyLong_CheckExact(sym_get_const(right))); + PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(left), + (PyLongObject *)sym_get_const(right)); + if (temp == NULL) { + goto error; + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + // TODO gh-115506: + // replace opcode with constant propagated one and add tests! + } + else { + res = sym_new_type(ctx, &PyLong_Type); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -206,6 +321,25 @@ } case _GUARD_BOTH_FLOAT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_matches_type(left, &PyFloat_Type)) { + if (sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + REPLACE_OP(this_instr, _GUARD_TOS_FLOAT, 0, 0); + } + } + else { + if (sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _GUARD_NOS_FLOAT, 0, 0); + } + } + sym_set_type(left, &PyFloat_Type); + sym_set_type(right, &PyFloat_Type); break; } @@ -218,8 +352,30 @@ } case _BINARY_OP_MULTIPLY_FLOAT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { + assert(PyFloat_CheckExact(sym_get_const(left))); + assert(PyFloat_CheckExact(sym_get_const(right))); + PyObject *temp = PyFloat_FromDouble( + PyFloat_AS_DOUBLE(sym_get_const(left)) * + PyFloat_AS_DOUBLE(sym_get_const(right))); + if (temp == NULL) { + goto error; + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + // TODO gh-115506: + // replace opcode with constant propagated one and update tests! + } + else { + res = sym_new_type(ctx, &PyFloat_Type); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -227,8 +383,30 @@ } case _BINARY_OP_ADD_FLOAT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { + assert(PyFloat_CheckExact(sym_get_const(left))); + assert(PyFloat_CheckExact(sym_get_const(right))); + PyObject *temp = PyFloat_FromDouble( + PyFloat_AS_DOUBLE(sym_get_const(left)) + + PyFloat_AS_DOUBLE(sym_get_const(right))); + if (temp == NULL) { + goto error; + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + // TODO gh-115506: + // replace opcode with constant propagated one and update tests! + } + else { + res = sym_new_type(ctx, &PyFloat_Type); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -236,8 +414,30 @@ } case _BINARY_OP_SUBTRACT_FLOAT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { + assert(PyFloat_CheckExact(sym_get_const(left))); + assert(PyFloat_CheckExact(sym_get_const(right))); + PyObject *temp = PyFloat_FromDouble( + PyFloat_AS_DOUBLE(sym_get_const(left)) - + PyFloat_AS_DOUBLE(sym_get_const(right))); + if (temp == NULL) { + goto error; + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + // TODO gh-115506: + // replace opcode with constant propagated one and update tests! + } + else { + res = sym_new_type(ctx, &PyFloat_Type); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -245,12 +445,37 @@ } case _GUARD_BOTH_UNICODE: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_matches_type(left, &PyUnicode_Type) && + sym_matches_type(right, &PyUnicode_Type)) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + sym_set_type(left, &PyUnicode_Type); + sym_set_type(left, &PyUnicode_Type); break; } case _BINARY_OP_ADD_UNICODE: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyUnicode_Type) && sym_matches_type(right, &PyUnicode_Type)) { + PyObject *temp = PyUnicode_Concat(sym_get_const(left), sym_get_const(right)); + if (temp == NULL) { + goto error; + } + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + } + else { + res = sym_new_type(ctx, &PyUnicode_Type); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -328,8 +553,15 @@ } case _BINARY_SUBSCR_INIT_CALL: { - _PyInterpreterFrame *new_frame; - new_frame = sym_new_not_null(ctx); + _Py_UopsSymbol *sub; + _Py_UopsSymbol *container; + _Py_UOpsAbstractFrame *new_frame; + sub = stack_pointer[-1]; + container = stack_pointer[-2]; + (void)container; + (void)sub; + new_frame = NULL; + ctx->done = true; stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -780,10 +1012,15 @@ } case _LOAD_ATTR: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *self_or_null = NULL; + owner = stack_pointer[-1]; + (void)owner; attr = sym_new_not_null(ctx); - self_or_null = sym_new_not_null(ctx); + if (oparg & 1) { + self_or_null = sym_new_unknown(ctx); + } stack_pointer[-1] = attr; if (oparg & 1) stack_pointer[0] = self_or_null; stack_pointer += (oparg & 1); @@ -792,6 +1029,28 @@ } case _GUARD_TYPE_VERSION: { + _Py_UopsSymbol *owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand; + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } else { + // add watcher so that whenever the type changes we invalidate this + PyTypeObject *type = _PyType_LookupByVersion(type_version); + // if the type is null, it was not found in the cache (there was a conflict) + // with the key, in which case we can't trust the version + if (type) { + // if the type version was set properly, then add a watcher + // if it wasn't this means that the type version was previously set to something else + // and we set the owner to bottom, so we don't need to add a watcher because we must have + // already added one earlier. + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + } + } break; } @@ -800,10 +1059,15 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *null = NULL; + owner = stack_pointer[-1]; + uint16_t offset = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); null = sym_new_null(ctx); + (void)offset; + (void)owner; stack_pointer[-1] = attr; if (oparg & 1) stack_pointer[0] = null; stack_pointer += (oparg & 1); @@ -812,14 +1076,51 @@ } case _CHECK_ATTR_MODULE: { + _Py_UopsSymbol *owner; + owner = stack_pointer[-1]; + uint32_t dict_version = (uint32_t)this_instr->operand; + (void)dict_version; + if (sym_is_const(owner)) { + PyObject *cnst = sym_get_const(owner); + if (PyModule_CheckExact(cnst)) { + PyModuleObject *mod = (PyModuleObject *)cnst; + PyObject *dict = mod->md_dict; + uint64_t watched_mutations = get_mutations(dict); + if (watched_mutations < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { + PyDict_Watch(GLOBALS_WATCHER_ID, dict); + _Py_BloomFilter_Add(dependencies, dict); + this_instr->opcode = _NOP; + } + } + } break; } case _LOAD_ATTR_MODULE: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *null = NULL; - attr = sym_new_not_null(ctx); + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)this_instr->operand; + (void)index; null = sym_new_null(ctx); + attr = NULL; + if (this_instr[-1].opcode == _NOP) { + // Preceding _CHECK_ATTR_MODULE was removed: mod is const and dict is watched. + assert(sym_is_const(owner)); + PyModuleObject *mod = (PyModuleObject *)sym_get_const(owner); + assert(PyModule_CheckExact(mod)); + PyObject *dict = mod->md_dict; + PyObject *res = convert_global_to_const(this_instr, dict); + if (res != NULL) { + this_instr[-1].opcode = _POP_TOP; + attr = sym_new_const(ctx, res); + } + } + if (attr == NULL) { + /* No conversion made. We don't know what `attr` is. */ + attr = sym_new_not_null(ctx); + } stack_pointer[-1] = attr; if (oparg & 1) stack_pointer[0] = null; stack_pointer += (oparg & 1); @@ -832,10 +1133,15 @@ } case _LOAD_ATTR_WITH_HINT: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *null = NULL; + owner = stack_pointer[-1]; + uint16_t hint = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); null = sym_new_null(ctx); + (void)hint; + (void)owner; stack_pointer[-1] = attr; if (oparg & 1) stack_pointer[0] = null; stack_pointer += (oparg & 1); @@ -844,10 +1150,15 @@ } case _LOAD_ATTR_SLOT: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *null = NULL; + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); null = sym_new_null(ctx); + (void)index; + (void)owner; stack_pointer[-1] = attr; if (oparg & 1) stack_pointer[0] = null; stack_pointer += (oparg & 1); @@ -860,10 +1171,15 @@ } case _LOAD_ATTR_CLASS: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *null = NULL; + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand; attr = sym_new_not_null(ctx); null = sym_new_null(ctx); + (void)descr; + (void)owner; stack_pointer[-1] = attr; if (oparg & 1) stack_pointer[0] = null; stack_pointer += (oparg & 1); @@ -872,8 +1188,14 @@ } case _LOAD_ATTR_PROPERTY_FRAME: { - _PyInterpreterFrame *new_frame; - new_frame = sym_new_not_null(ctx); + _Py_UopsSymbol *owner; + _Py_UOpsAbstractFrame *new_frame; + owner = stack_pointer[-1]; + PyObject *fget = (PyObject *)this_instr->operand; + (void)fget; + (void)owner; + new_frame = NULL; + ctx->done = true; stack_pointer[-1] = (_Py_UopsSymbol *)new_frame; break; } @@ -903,8 +1225,19 @@ } case _COMPARE_OP: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + (void)left; + (void)right; + if (oparg & 16) { + res = sym_new_type(ctx, &PyBool_Type); + } + else { + res = _Py_uop_sym_new_not_null(ctx); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -912,8 +1245,14 @@ } case _COMPARE_OP_FLOAT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + (void)left; + (void)right; + res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -921,8 +1260,14 @@ } case _COMPARE_OP_INT: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + (void)left; + (void)right; + res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -930,8 +1275,14 @@ } case _COMPARE_OP_STR: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + (void)left; + (void)right; + res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -939,18 +1290,30 @@ } case _IS_OP: { - _Py_UopsSymbol *b; - b = sym_new_not_null(ctx); - stack_pointer[-2] = b; + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; + _Py_UopsSymbol *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + (void)left; + (void)right; + res = sym_new_type(ctx, &PyBool_Type); + stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _CONTAINS_OP: { - _Py_UopsSymbol *b; - b = sym_new_not_null(ctx); - stack_pointer[-2] = b; + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; + _Py_UopsSymbol *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + (void)left; + (void)right; + res = sym_new_type(ctx, &PyBool_Type); + stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -1141,8 +1504,11 @@ } case _ITER_NEXT_RANGE: { + _Py_UopsSymbol *iter; _Py_UopsSymbol *next; - next = sym_new_not_null(ctx); + iter = stack_pointer[-1]; + next = sym_new_type(ctx, &PyLong_Type); + (void)iter; stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -1156,10 +1522,13 @@ } case _LOAD_SPECIAL: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *self_or_null; + owner = stack_pointer[-1]; + (void)owner; attr = sym_new_not_null(ctx); - self_or_null = sym_new_not_null(ctx); + self_or_null = sym_new_unknown(ctx); stack_pointer[-1] = attr; stack_pointer[0] = self_or_null; stack_pointer += 1; @@ -1197,10 +1566,14 @@ } case _LOAD_ATTR_METHOD_WITH_VALUES: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *self = NULL; + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand; + (void)descr; attr = sym_new_not_null(ctx); - self = sym_new_not_null(ctx); + self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; stack_pointer += 1; @@ -1209,10 +1582,14 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *self = NULL; + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand; + (void)descr; attr = sym_new_not_null(ctx); - self = sym_new_not_null(ctx); + self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; stack_pointer += 1; @@ -1239,10 +1616,14 @@ } case _LOAD_ATTR_METHOD_LAZY_DICT: { + _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *self = NULL; + owner = stack_pointer[-1]; + PyObject *descr = (PyObject *)this_instr->operand; + (void)descr; attr = sym_new_not_null(ctx); - self = sym_new_not_null(ctx); + self = owner; stack_pointer[-1] = attr; stack_pointer[0] = self; stack_pointer += 1; @@ -1251,15 +1632,20 @@ } case _MAYBE_EXPAND_METHOD: { + _Py_UopsSymbol **args; + _Py_UopsSymbol *self_or_null; + _Py_UopsSymbol *callable; _Py_UopsSymbol *func; _Py_UopsSymbol *maybe_self; - _Py_UopsSymbol **args; args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + args = &stack_pointer[-oparg]; + (void)callable; + (void)self_or_null; + (void)args; func = sym_new_not_null(ctx); maybe_self = sym_new_not_null(ctx); - for (int _i = oparg; --_i >= 0;) { - args[_i] = sym_new_not_null(ctx); - } stack_pointer[-2 - oparg] = func; stack_pointer[-1 - oparg] = maybe_self; break; @@ -1321,12 +1707,21 @@ } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UopsSymbol *null; + _Py_UopsSymbol *callable; + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + sym_set_null(null); + sym_set_type(callable, &PyMethod_Type); break; } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + _Py_UopsSymbol *callable; _Py_UopsSymbol *func; _Py_UopsSymbol *self; + callable = stack_pointer[-2 - oparg]; + (void)callable; func = sym_new_not_null(ctx); self = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = func; @@ -1335,14 +1730,27 @@ } case _CHECK_PEP_523: { + /* Setting the eval frame function invalidates + * all executors, so no need to check dynamically */ + if (_PyInterpreterState_GET()->eval_frame == NULL) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } break; } case _CHECK_FUNCTION_EXACT_ARGS: { + _Py_UopsSymbol *self_or_null; + _Py_UopsSymbol *callable; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + sym_set_type(callable, &PyFunction_Type); + (void)self_or_null; break; } case _CHECK_STACK_SPACE: { + assert(corresponding_check_stack == NULL); + corresponding_check_stack = this_instr; break; } @@ -1378,7 +1786,16 @@ } assert(self_or_null != NULL); assert(args != NULL); - new_frame = frame_new(ctx, co, 0, NULL, 0); + if (sym_is_not_null(self_or_null)) { + // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM + args--; + argcount++; + } + if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { + new_frame = frame_new(ctx, co, 0, args, argcount); + } else { + new_frame = frame_new(ctx, co, 0, NULL, 0); + } stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -1400,6 +1817,24 @@ ctx->done = true; break; } + /* Stack space handling */ + int framesize = co->co_framesize; + assert(framesize > 0); + curr_space += framesize; + if (curr_space < 0 || curr_space > INT32_MAX) { + // won't fit in signed 32-bit int + ctx->done = true; + break; + } + max_space = curr_space > max_space ? curr_space : max_space; + if (first_valid_check_stack == NULL) { + first_valid_check_stack = corresponding_check_stack; + } + else if (corresponding_check_stack) { + // delete all but the first valid _CHECK_STACK_SPACE + corresponding_check_stack->opcode = _NOP; + } + corresponding_check_stack = NULL; break; } @@ -1431,14 +1866,22 @@ } case _CHECK_AND_ALLOCATE_OBJECT: { + _Py_UopsSymbol **args; + _Py_UopsSymbol *null; + _Py_UopsSymbol *callable; _Py_UopsSymbol *self; _Py_UopsSymbol *init; - _Py_UopsSymbol **args; + args = &stack_pointer[-oparg]; + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + args = &stack_pointer[-oparg]; + uint32_t type_version = (uint32_t)this_instr->operand; + (void)type_version; + (void)callable; + (void)null; + (void)args; self = sym_new_not_null(ctx); init = sym_new_not_null(ctx); - for (int _i = oparg; --_i >= 0;) { - args[_i] = sym_new_not_null(ctx); - } stack_pointer[-2 - oparg] = self; stack_pointer[-1 - oparg] = init; break; @@ -1714,8 +2157,27 @@ } case _BINARY_OP: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyTypeObject *ltype = sym_get_type(left); + PyTypeObject *rtype = sym_get_type(right); + if (ltype != NULL && (ltype == &PyLong_Type || ltype == &PyFloat_Type) && + rtype != NULL && (rtype == &PyLong_Type || rtype == &PyFloat_Type)) + { + if (oparg != NB_TRUE_DIVIDE && oparg != NB_INPLACE_TRUE_DIVIDE && + ltype == &PyLong_Type && rtype == &PyLong_Type) { + /* If both inputs are ints and the op is not division the result is an int */ + res = sym_new_type(ctx, &PyLong_Type); + } + else { + /* For any other op combining ints/floats the result is a float */ + res = sym_new_type(ctx, &PyFloat_Type); + } + } + res = sym_new_unknown(ctx); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1749,24 +2211,60 @@ /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ case _GUARD_IS_TRUE_POP: { + _Py_UopsSymbol *flag; + flag = stack_pointer[-1]; + if (sym_is_const(flag)) { + PyObject *value = sym_get_const(flag); + assert(value != NULL); + eliminate_pop_guard(this_instr, value != Py_True); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _GUARD_IS_FALSE_POP: { + _Py_UopsSymbol *flag; + flag = stack_pointer[-1]; + if (sym_is_const(flag)) { + PyObject *value = sym_get_const(flag); + assert(value != NULL); + eliminate_pop_guard(this_instr, value != Py_False); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _GUARD_IS_NONE_POP: { + _Py_UopsSymbol *flag; + flag = stack_pointer[-1]; + if (sym_is_const(flag)) { + PyObject *value = sym_get_const(flag); + assert(value != NULL); + eliminate_pop_guard(this_instr, !Py_IsNone(value)); + } + else if (sym_has_type(flag)) { + assert(!sym_matches_type(flag, &_PyNone_Type)); + eliminate_pop_guard(this_instr, true); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _GUARD_IS_NOT_NONE_POP: { + _Py_UopsSymbol *flag; + flag = stack_pointer[-1]; + if (sym_is_const(flag)) { + PyObject *value = sym_get_const(flag); + assert(value != NULL); + eliminate_pop_guard(this_instr, Py_IsNone(value)); + } + else if (sym_has_type(flag)) { + assert(!sym_matches_type(flag, &_PyNone_Type)); + eliminate_pop_guard(this_instr, false); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -1782,6 +2280,7 @@ } case _CHECK_STACK_SPACE_OPERAND: { + uint32_t framesize = (uint32_t)this_instr->operand; break; } diff --git a/Tools/cases_generator/partial_evaluator_generator.py b/Tools/cases_generator/partial_evaluator_generator.py index 67a96cc899fdf1..4e278b86136833 100644 --- a/Tools/cases_generator/partial_evaluator_generator.py +++ b/Tools/cases_generator/partial_evaluator_generator.py @@ -25,6 +25,7 @@ from stack import Local, Stack, StackError DEFAULT_OUTPUT = ROOT / "Python/partial_evaluator_cases.c.h" +DEFAULT_SPECIALIZER_INPUT = (ROOT / "Python/optimizer_bytecodes.c").absolute().as_posix() DEFAULT_ABSTRACT_INPUT = (ROOT / "Python/partial_evaluator_bytecodes.c").absolute().as_posix() @@ -211,8 +212,7 @@ def generate_tier2_abstract_from_files( "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT ) - -arg_parser.add_argument("input", nargs="*", help="Abstract interpreter definition file") +arg_parser.add_argument("input", nargs="*", help="Partial evaluator definition file") arg_parser.add_argument( "base", nargs="*", help="The base instruction definition file(s)" @@ -224,6 +224,7 @@ def generate_tier2_abstract_from_files( args = arg_parser.parse_args() if not args.input: args.base.append(DEFAULT_INPUT) + args.input.append(DEFAULT_SPECIALIZER_INPUT) args.input.append(DEFAULT_ABSTRACT_INPUT) else: args.base.append(args.input[-1]) From 2d6884d2bb2c5246f2769668fc0c01778441e8f7 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 3 Sep 2024 02:40:05 +0800 Subject: [PATCH 03/18] cleanup --- Makefile.pre.in | 2 +- Python/partial_evaluator_cases.c.h | 2 +- .../partial_evaluator_generator.py | 235 ------------------ 3 files changed, 2 insertions(+), 237 deletions(-) delete mode 100644 Tools/cases_generator/partial_evaluator_generator.py diff --git a/Makefile.pre.in b/Makefile.pre.in index 4c72011a07b358..d53866d8fb3ae2 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1999,7 +1999,7 @@ regen-optimizer-cases: .PHONY: regen-partial-evaluator-cases regen-partial-evaluator-cases: - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/partial_evaluator_generator.py \ + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/optimizer_generator.py \ -o $(srcdir)/Python/partial_evaluator_cases.c.h.new \ $(srcdir)/Python/optimizer_bytecodes.c \ $(srcdir)/Python/partial_evaluator_bytecodes.c \ diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index ddf5f18ee054fa..eb73d2d68b8cb2 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -1,4 +1,4 @@ -// This file is generated by Tools/cases_generator/partial_evaluator_generator.py +// This file is generated by Tools/cases_generator/optimizer_generator.py // from: // Python/optimizer_bytecodes.c, Python/partial_evaluator_bytecodes.c // Do not edit! diff --git a/Tools/cases_generator/partial_evaluator_generator.py b/Tools/cases_generator/partial_evaluator_generator.py deleted file mode 100644 index 4e278b86136833..00000000000000 --- a/Tools/cases_generator/partial_evaluator_generator.py +++ /dev/null @@ -1,235 +0,0 @@ -"""Generate the cases for the tier 2 optimizer. -Reads the instruction definitions from bytecodes.c and optimizer_bytecodes.c -Writes the cases to optimizer_cases.c.h, which is #included in Python/optimizer_analysis.c. -""" - -import argparse - -from analyzer import ( - Analysis, - Instruction, - Uop, - analyze_files, - StackItem, - analysis_error, -) -from generators_common import ( - DEFAULT_INPUT, - ROOT, - write_header, - Emitter, -) -from cwriter import CWriter -from typing import TextIO, Iterator -from lexer import Token -from stack import Local, Stack, StackError - -DEFAULT_OUTPUT = ROOT / "Python/partial_evaluator_cases.c.h" -DEFAULT_SPECIALIZER_INPUT = (ROOT / "Python/optimizer_bytecodes.c").absolute().as_posix() -DEFAULT_ABSTRACT_INPUT = (ROOT / "Python/partial_evaluator_bytecodes.c").absolute().as_posix() - - -def validate_uop(override: Uop, uop: Uop) -> None: - # To do - pass - - -def type_name(var: StackItem) -> str: - if var.is_array(): - return f"_Py_UopsSymbol **" - if var.type: - return var.type - return f"_Py_UopsSymbol *" - - -def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: - variables = {"unused"} - if not skip_inputs: - for var in reversed(uop.stack.inputs): - if var.name not in variables: - variables.add(var.name) - if var.condition: - out.emit(f"{type_name(var)}{var.name} = NULL;\n") - else: - out.emit(f"{type_name(var)}{var.name};\n") - for var in uop.stack.outputs: - if var.peek: - continue - if var.name not in variables: - variables.add(var.name) - if var.condition: - out.emit(f"{type_name(var)}{var.name} = NULL;\n") - else: - out.emit(f"{type_name(var)}{var.name};\n") - - -def decref_inputs( - out: CWriter, - tkn: Token, - tkn_iter: Iterator[Token], - uop: Uop, - stack: Stack, - inst: Instruction | None, -) -> None: - next(tkn_iter) - next(tkn_iter) - next(tkn_iter) - out.emit_at("", tkn) - - -def emit_default(out: CWriter, uop: Uop) -> None: - for i, var in enumerate(uop.stack.outputs): - if var.name != "unused" and not var.peek: - if var.is_array(): - out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n") - out.emit(f"{var.name}[_i] = sym_new_not_null(ctx);\n") - out.emit("}\n") - elif var.name == "null": - out.emit(f"{var.name} = sym_new_null(ctx);\n") - else: - out.emit(f"{var.name} = sym_new_not_null(ctx);\n") - - -class OptimizerEmitter(Emitter): - pass - - -def write_uop( - override: Uop | None, - uop: Uop, - out: CWriter, - stack: Stack, - debug: bool, - skip_inputs: bool, -) -> None: - locals: dict[str, Local] = {} - try: - prototype = override if override else uop - is_override = override is not None - out.start_line() - for var in reversed(prototype.stack.inputs): - code, local = stack.pop(var, extract_bits=True) - if not skip_inputs: - out.emit(code) - if local.defined: - locals[local.name] = local - out.emit(stack.define_output_arrays(prototype.stack.outputs)) - if debug: - args = [] - for var in prototype.stack.inputs: - if not var.peek or is_override: - args.append(var.name) - out.emit(f'DEBUG_PRINTF({", ".join(args)});\n') - if override: - for cache in uop.caches: - if cache.name != "unused": - if cache.size == 4: - type = cast = "PyObject *" - else: - type = f"uint{cache.size*16}_t " - cast = f"uint{cache.size*16}_t" - out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n") - if override: - emitter = OptimizerEmitter(out) - emitter.emit_tokens(override, stack, None) - else: - emit_default(out, uop) - - for var in prototype.stack.outputs: - if var.name in locals: - local = locals[var.name] - else: - local = Local.local(var) - stack.push(local) - out.start_line() - stack.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=True) - except StackError as ex: - raise analysis_error(ex.args[0], uop.body[0]) - - -SKIPS = ("_EXTENDED_ARG",) - - -def generate_abstract_interpreter( - filenames: list[str], - abstract: Analysis, - base: Analysis, - outfile: TextIO, - debug: bool, -) -> None: - write_header(__file__, filenames, outfile) - out = CWriter(outfile, 2, False) - out.emit("\n") - base_uop_names = set([uop.name for uop in base.uops.values()]) - for abstract_uop_name in abstract.uops: - assert ( - abstract_uop_name in base_uop_names - ), f"All abstract uops should override base uops, but {abstract_uop_name} is not." - - for uop in base.uops.values(): - override: Uop | None = None - if uop.name in abstract.uops: - override = abstract.uops[uop.name] - validate_uop(override, uop) - if uop.properties.tier == 1: - continue - if uop.replicates: - continue - if uop.is_super(): - continue - if not uop.is_viable(): - out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n") - continue - out.emit(f"case {uop.name}: {{\n") - if override: - declare_variables(override, out, skip_inputs=False) - else: - declare_variables(uop, out, skip_inputs=True) - stack = Stack() - write_uop(override, uop, out, stack, debug, skip_inputs=(override is None)) - out.start_line() - out.emit("break;\n") - out.emit("}") - out.emit("\n\n") - - -def generate_tier2_abstract_from_files( - filenames: list[str], outfilename: str, debug: bool = False -) -> None: - assert len(filenames) == 2, "Need a base file and an abstract cases file." - base = analyze_files([filenames[0]]) - abstract = analyze_files([filenames[1]]) - with open(outfilename, "w") as outfile: - generate_abstract_interpreter(filenames, abstract, base, outfile, debug) - - -arg_parser = argparse.ArgumentParser( - description="Generate the code for the tier 2 interpreter.", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, -) - -arg_parser.add_argument( - "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT -) - -arg_parser.add_argument("input", nargs="*", help="Partial evaluator definition file") - -arg_parser.add_argument( - "base", nargs="*", help="The base instruction definition file(s)" -) - -arg_parser.add_argument("-d", "--debug", help="Insert debug calls", action="store_true") - -if __name__ == "__main__": - args = arg_parser.parse_args() - if not args.input: - args.base.append(DEFAULT_INPUT) - args.input.append(DEFAULT_SPECIALIZER_INPUT) - args.input.append(DEFAULT_ABSTRACT_INPUT) - else: - args.base.append(args.input[-1]) - args.input.pop() - abstract = analyze_files(args.input) - base = analyze_files(args.base) - with open(args.output, "w") as outfile: - generate_abstract_interpreter(args.input, abstract, base, outfile, args.debug) From e5e736422d8196c9f6352081778908c6662c9242 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 3 Sep 2024 04:10:24 +0800 Subject: [PATCH 04/18] basic copying setup --- Include/internal/pycore_optimizer.h | 3 +++ Python/optimizer_analysis.c | 28 ++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 54a08de252b4d0..8bd4d3201b2e9a 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -226,6 +226,9 @@ struct _Py_UOpsContext { _Py_UopsSymbol **n_consumed; _Py_UopsSymbol **limit; _Py_UopsSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; + + _PyUOpInstruction *trace_dest; + int n_trace_dest; }; typedef struct _Py_UOpsContext _Py_UOpsContext; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b871010ee6a141..945cce3ea06988 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -486,6 +486,10 @@ optimize_uops( } +#define WRITE_OP(INST, OP, ARG, OPERAND) \ + (INST)->opcode = OP; \ + (INST)->oparg = ARG; \ + (INST)->operand = OPERAND; /* 1 for success, 0 for not ready, cannot error at the moment. */ static int @@ -498,7 +502,10 @@ partial_evaluate_uops( ) { + _PyUOpInstruction trace_dest[UOP_MAX_TRACE_LENGTH]; _Py_UOpsContext context; + context.trace_dest = trace_dest; + context.n_trace_dest = 0; _Py_UOpsContext *ctx = &context; uint32_t opcode = UINT16_MAX; int curr_space = 0; @@ -521,9 +528,11 @@ partial_evaluate_uops( for (int i = 0; !ctx->done; i++) { assert(i < trace_len); this_instr = &trace[i]; + trace_dest[ctx->n_trace_dest] = *this_instr; int oparg = this_instr->oparg; opcode = this_instr->opcode; + uint64_t operand = this_instr->operand; _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; #ifdef Py_DEBUG @@ -546,6 +555,8 @@ partial_evaluate_uops( DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); + WRITE_OP(&trace_dest[ctx->n_trace_dest], opcode, oparg, operand); + ctx->n_trace_dest++; } if (ctx->out_of_space) { DPRINTF(3, "\n"); @@ -563,10 +574,19 @@ partial_evaluate_uops( return 0; } - /* Either reached the end or cannot optimize further, but there - * would be no benefit in retrying later */ - _Py_uop_abstractcontext_fini(ctx); - return trace_len; + if (ctx->out_of_space || !is_terminator(this_instr)) { + _Py_uop_abstractcontext_fini(ctx); + return trace_len; + } + else { + // We MUST not have bailed early here. + // That's the only time the PE's residual is valid. + assert(ctx->n_trace_dest < UOP_MAX_TRACE_LENGTH); + assert(is_terminator(this_instr)); + memcpy(trace, trace_dest, ctx->n_trace_dest * sizeof(_PyUOpInstruction)); + _Py_uop_abstractcontext_fini(ctx); + return trace_len; + } error: DPRINTF(3, "\n"); From 9722b40db3d00803fbb12a1e735733e797dfa8fd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:39:15 +0800 Subject: [PATCH 05/18] fix compilation --- Include/internal/pycore_optimizer.h | 64 ++- Lib/test/test_capi/test_opt.py | 13 + Python/optimizer_analysis.c | 94 +++- Python/optimizer_bytecodes.c | 40 +- Python/optimizer_cases.c.h | 544 +++++++++--------- Python/optimizer_symbols.c | 157 +++--- Python/partial_evaluator_bytecodes.c | 16 +- Python/partial_evaluator_cases.c.h | 550 ++++++++++--------- Tools/cases_generator/optimizer_generator.py | 10 +- Tools/cases_generator/stack.py | 2 +- 10 files changed, 800 insertions(+), 690 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 8bd4d3201b2e9a..68b59d1c877785 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -152,8 +152,8 @@ struct _Py_UopsSymbol { PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) unsigned int type_version; // currently stores type version - bool is_static; // used for binding-time analysis int locals_idx; + char is_static; // used for binding-time analysis }; #define UOP_FORMAT_TARGET 0 @@ -191,16 +191,22 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) // handle before rejoining the rest of the program. #define MAX_CHAIN_DEPTH 4 + typedef struct _Py_UopsSymbol _Py_UopsSymbol; +typedef struct _Py_UopsLocalsPlusSlot { + _Py_UopsSymbol *sym; + char is_virtual; +} _Py_UopsLocalsPlusSlot; + struct _Py_UOpsAbstractFrame { // Max stacklen int stack_len; int locals_len; - _Py_UopsSymbol **stack_pointer; - _Py_UopsSymbol **stack; - _Py_UopsSymbol **locals; + _Py_UopsLocalsPlusSlot *stack_pointer; + _Py_UopsLocalsPlusSlot *stack; + _Py_UopsLocalsPlusSlot *locals; }; typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; @@ -223,9 +229,9 @@ struct _Py_UOpsContext { // Arena for the symbolic types. ty_arena t_arena; - _Py_UopsSymbol **n_consumed; - _Py_UopsSymbol **limit; - _Py_UopsSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; + _Py_UopsLocalsPlusSlot *n_consumed; + _Py_UopsLocalsPlusSlot *limit; + _Py_UopsLocalsPlusSlot locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; _PyUOpInstruction *trace_dest; int n_trace_dest; @@ -233,28 +239,28 @@ struct _Py_UOpsContext { typedef struct _Py_UOpsContext _Py_UOpsContext; -extern bool _Py_uop_sym_is_null(_Py_UopsSymbol *sym); -extern bool _Py_uop_sym_is_not_null(_Py_UopsSymbol *sym); -extern bool _Py_uop_sym_is_const(_Py_UopsSymbol *sym); -extern PyObject *_Py_uop_sym_get_const(_Py_UopsSymbol *sym); -extern _Py_UopsSymbol *_Py_uop_sym_new_unknown(_Py_UOpsContext *ctx); -extern _Py_UopsSymbol *_Py_uop_sym_new_not_null(_Py_UOpsContext *ctx); -extern _Py_UopsSymbol *_Py_uop_sym_new_type( +extern bool _Py_uop_sym_is_null(_Py_UopsLocalsPlusSlot sym); +extern bool _Py_uop_sym_is_not_null(_Py_UopsLocalsPlusSlot sym); +extern bool _Py_uop_sym_is_const(_Py_UopsLocalsPlusSlot sym); +extern PyObject *_Py_uop_sym_get_const(_Py_UopsLocalsPlusSlot sym); +extern _Py_UopsLocalsPlusSlot _Py_uop_sym_new_unknown(_Py_UOpsContext *ctx); +extern _Py_UopsLocalsPlusSlot _Py_uop_sym_new_not_null(_Py_UOpsContext *ctx); +extern _Py_UopsLocalsPlusSlot _Py_uop_sym_new_type( _Py_UOpsContext *ctx, PyTypeObject *typ); -extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val); -extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); -extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); -extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); -extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version); -extern void _Py_uop_sym_set_locals_idx(_Py_UopsSymbol *sym, int locals_idx); -extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); -extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); -extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); -extern bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version); -extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); -extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); -extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); -extern PyTypeObject *_Py_uop_sym_get_type(_Py_UopsSymbol *sym); +extern _Py_UopsLocalsPlusSlot _Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val); +extern _Py_UopsLocalsPlusSlot _Py_uop_sym_new_null(_Py_UOpsContext *ctx); +extern bool _Py_uop_sym_has_type(_Py_UopsLocalsPlusSlot sym); +extern bool _Py_uop_sym_matches_type(_Py_UopsLocalsPlusSlot sym, PyTypeObject *typ); +extern bool _Py_uop_sym_matches_type_version(_Py_UopsLocalsPlusSlot sym, unsigned int version); +extern void _Py_uop_sym_set_locals_idx(_Py_UopsLocalsPlusSlot sym, int locals_idx); +extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym); +extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym); +extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym, PyTypeObject *typ); +extern bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym, unsigned int version); +extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym, PyObject *const_val); +extern bool _Py_uop_sym_is_bottom(_Py_UopsLocalsPlusSlot sym); +extern int _Py_uop_sym_truthiness(_Py_UopsLocalsPlusSlot sym); +extern PyTypeObject *_Py_uop_sym_get_type(_Py_UopsLocalsPlusSlot sym); extern void _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx); @@ -264,7 +270,7 @@ extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( _Py_UOpsContext *ctx, PyCodeObject *co, int curr_stackentries, - _Py_UopsSymbol **args, + _Py_UopsLocalsPlusSlot *args, int arg_len); extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx); diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f1ab72180d714d..449d589b984de8 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1481,6 +1481,19 @@ def fn(a): fn(A()) + def test_pe_load_fast_pop_top(self): + def thing(a): + x = 0 + for i in range(20): + i + return i + + + res, ex = self._run_with_optimizer(thing, 1) + self.assertEqual(res, 19) + self.assertIsNotNone(ex) + self.assertEqual(list(iter_opnames(ex)).count("_POP_TOP"), 0) + self.assertTrue(ex.is_valid()) if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 945cce3ea06988..5758d404553565 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -334,8 +334,8 @@ static int optimize_to_bool( _PyUOpInstruction *this_instr, _Py_UOpsContext *ctx, - _Py_UopsSymbol *value, - _Py_UopsSymbol **result_ptr) + _Py_UopsLocalsPlusSlot value, + _Py_UopsLocalsPlusSlot *result_ptr) { if (sym_matches_type(value, &PyBool_Type)) { REPLACE_OP(this_instr, _NOP, 0, 0); @@ -386,6 +386,12 @@ get_code(_PyUOpInstruction *op) return co; } +static inline _Py_UopsLocalsPlusSlot +sym_to_slot(_Py_UopsSymbol *sym) +{ + return (_Py_UopsLocalsPlusSlot){sym, 0}; +} + /* 1 for success, 0 for not ready, cannot error at the moment. */ static int optimize_uops( @@ -423,7 +429,7 @@ optimize_uops( int oparg = this_instr->oparg; opcode = this_instr->opcode; - _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; + _Py_UopsLocalsPlusSlot *stack_pointer = ctx->frame->stack_pointer; #ifdef Py_DEBUG if (get_lltrace() >= 3) { @@ -491,6 +497,44 @@ optimize_uops( (INST)->oparg = ARG; \ (INST)->operand = OPERAND; +#define SET_STATIC_INST() instr_is_truly_static = true; + +static void +reify_shadow_stack(_Py_UOpsContext *ctx) +{ + _PyUOpInstruction *trace_dest = ctx->trace_dest; + for (_Py_UopsLocalsPlusSlot *sp = ctx->frame->stack; sp < ctx->frame->stack_pointer; sp++) { + _Py_UopsSymbol *sym = sp->sym; + assert(sym != NULL); + // Need reifying. +// if (sym->is_virtual) { +// if (sym->const_val) { +// WRITE_OP(&trace_dest[ctx->n_trace_dest], _Py_IsImmortal(sym->const_val) ? +// _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE, sym->locals_idx, (uint64_t)sym->const_val); +// } +// else if (sym->locals_idx >= 0) { +// printf("pe reified LOAD_FAST %d\n", sym->locals_idx); +// WRITE_OP(&trace_dest[ctx->n_trace_dest], _LOAD_FAST, sym->locals_idx, 0); +// } +// else if (sym_is_null(sym)) { +// WRITE_OP(&trace_dest[ctx->n_trace_dest], _PUSH_NULL, sym->locals_idx, 0); +// } +// else { +// // Is static but not a constant value of locals or NULL. +// // How is that possible? +// Py_UNREACHABLE(); +// } +// ctx->n_trace_dest++; +// sym->is_virtual = false; +// } +// if (ctx->n_trace_dest >= UOP_MAX_TRACE_LENGTH) { +// ctx->out_of_space = true; +// ctx->done = true; +// return; +// } + } +} + /* 1 for success, 0 for not ready, cannot error at the moment. */ static int partial_evaluate_uops( @@ -525,7 +569,9 @@ partial_evaluate_uops( ctx->contradiction = false; _PyUOpInstruction *this_instr = NULL; - for (int i = 0; !ctx->done; i++) { + int i = 0; + bool prev_instr_is_truly_static = false; + for (; !ctx->done; i++) { assert(i < trace_len); this_instr = &trace[i]; trace_dest[ctx->n_trace_dest] = *this_instr; @@ -533,7 +579,12 @@ partial_evaluate_uops( int oparg = this_instr->oparg; opcode = this_instr->opcode; uint64_t operand = this_instr->operand; - _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; + _Py_UopsLocalsPlusSlot *stack_pointer = ctx->frame->stack_pointer; + + // An instruction is candidate static if it has no escapes, and all its inputs + // are static. + // If so, whether it can be eliminated is up to whether it has an implementation. + bool instr_is_truly_static = false; #ifdef Py_DEBUG if (get_lltrace() >= 3) { @@ -555,8 +606,32 @@ partial_evaluate_uops( DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); - WRITE_OP(&trace_dest[ctx->n_trace_dest], opcode, oparg, operand); - ctx->n_trace_dest++; + if (ctx->done) { + break; + } + // Always write these instructions for bookkeeping. + if (opcode == _CHECK_VALIDITY_AND_SET_IP || opcode == _SET_IP || opcode == _CHECK_VALIDITY) { + WRITE_OP(&trace_dest[ctx->n_trace_dest], opcode, oparg, operand); + ctx->n_trace_dest++; + } + // If the instruction is not static, + // reify the shadow stack, and write the op. + else if (!instr_is_truly_static) { + reify_shadow_stack(ctx); + WRITE_OP(&trace_dest[ctx->n_trace_dest], opcode, oparg, operand); + ctx->n_trace_dest++; + } + else { +//#ifdef Py_DEBUG +// if (get_lltrace() >= 3) { + printf("%4d pe STATIC: ", (int) (this_instr - trace)); + _PyUOpPrint(this_instr); + printf("\n"); +// } +//#endif + // Inst is static. Nothing written :)! + } + prev_instr_is_truly_static = instr_is_truly_static; } if (ctx->out_of_space) { DPRINTF(3, "\n"); @@ -583,7 +658,8 @@ partial_evaluate_uops( // That's the only time the PE's residual is valid. assert(ctx->n_trace_dest < UOP_MAX_TRACE_LENGTH); assert(is_terminator(this_instr)); - memcpy(trace, trace_dest, ctx->n_trace_dest * sizeof(_PyUOpInstruction)); + // Copy rest of trace to dest + memcpy(trace, trace_dest, ctx->n_trace_dest); _Py_uop_abstractcontext_fini(ctx); return trace_len; } @@ -644,7 +720,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } if (last->opcode == _LOAD_CONST_INLINE || last->opcode == _LOAD_CONST_INLINE_BORROW || - last->opcode == _LOAD_FAST || +// last->opcode == _LOAD_FAST || last->opcode == _COPY ) { last->opcode = _NOP; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 9a1b9da52f4bb5..77514cfd0627ff 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -85,7 +85,7 @@ dummy_func(void) { op(_LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); - _Py_UopsSymbol *temp = sym_new_null(ctx); + _Py_UopsLocalsPlusSlot temp = sym_new_null(ctx); GETLOCAL(oparg) = temp; } @@ -329,10 +329,10 @@ dummy_func(void) { } } - op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame)) { (void)container; (void)sub; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; } @@ -487,7 +487,7 @@ dummy_func(void) { op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { (void)index; null = sym_new_null(ctx); - attr = NULL; + attr = (_Py_UopsLocalsPlusSlot){NULL, 0}; if (this_instr[-1].opcode == _NOP) { // Preceding _CHECK_ATTR_MODULE was removed: mod is const and dict is watched. assert(sym_is_const(owner)); @@ -500,7 +500,7 @@ dummy_func(void) { attr = sym_new_const(ctx, res); } } - if (attr == NULL) { + if (attr.sym == NULL) { /* No conversion made. We don't know what `attr` is. */ attr = sym_new_not_null(ctx); } @@ -545,10 +545,10 @@ dummy_func(void) { self = owner; } - op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_LOAD_ATTR_PROPERTY_FRAME, (fget/4, owner -- new_frame)) { (void)fget; (void)owner; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; } @@ -568,7 +568,7 @@ dummy_func(void) { sym_set_type(callable, &PyMethod_Type); } - op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame)) { int argcount = oparg; (void)callable; @@ -594,7 +594,7 @@ dummy_func(void) { DPRINTF(3, "code=%p ", co); } - assert(self_or_null != NULL); + assert(self_or_null.sym != NULL); assert(args != NULL); if (sym_is_not_null(self_or_null)) { // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM @@ -603,9 +603,9 @@ dummy_func(void) { } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = frame_new(ctx, co, 0, args, argcount); + new_frame.sym = (_Py_UopsSymbol *)frame_new(ctx, co, 0, args, argcount); } else { - new_frame = frame_new(ctx, co, 0, NULL, 0); + new_frame.sym = (_Py_UopsSymbol *)frame_new(ctx, co, 0, NULL, 0); } } @@ -618,21 +618,21 @@ dummy_func(void) { maybe_self = sym_new_not_null(ctx); } - op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame)) { /* The _Py_UOpsAbstractFrame design assumes that we can copy arguments across directly */ (void)callable; (void)self_or_null; (void)args; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; } - op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) { (void)callable; (void)self_or_null; (void)args; (void)kwnames; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; } @@ -645,11 +645,11 @@ dummy_func(void) { init = sym_new_not_null(ctx); } - op(_CREATE_INIT_FRAME, (self, init, args[oparg] -- init_frame: _Py_UOpsAbstractFrame *)) { + op(_CREATE_INIT_FRAME, (self, init, args[oparg] -- init_frame)) { (void)self; (void)init; (void)args; - init_frame = NULL; + init_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; } @@ -723,12 +723,12 @@ dummy_func(void) { Py_UNREACHABLE(); } - op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) { + op(_PUSH_FRAME, (new_frame -- unused if (0))) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)new_frame.sym; ctx->curr_frame_depth++; - stack_pointer = new_frame->stack_pointer; + stack_pointer = ((_Py_UOpsAbstractFrame *)new_frame.sym)->stack_pointer; co = get_code(this_instr); if (co == NULL) { // should be about to _EXIT_TRACE anyway diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 672fec3946f2fb..86ea241d0cb048 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -24,7 +24,7 @@ /* _MONITOR_RESUME is not a viable micro-op for tier 2 */ case _LOAD_FAST_CHECK: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = GETLOCAL(oparg); // We guarantee this will error - just bail and don't optimize it. if (sym_is_null(value)) { @@ -37,7 +37,7 @@ } case _LOAD_FAST: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = GETLOCAL(oparg); stack_pointer[0] = value; stack_pointer += 1; @@ -46,9 +46,9 @@ } case _LOAD_FAST_AND_CLEAR: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = GETLOCAL(oparg); - _Py_UopsSymbol *temp = sym_new_null(ctx); + _Py_UopsLocalsPlusSlot temp = sym_new_null(ctx); GETLOCAL(oparg) = temp; stack_pointer[0] = value; stack_pointer += 1; @@ -57,7 +57,7 @@ } case _LOAD_CONST: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); @@ -69,7 +69,7 @@ } case _STORE_FAST: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = stack_pointer[-1]; GETLOCAL(oparg) = value; stack_pointer += -1; @@ -84,7 +84,7 @@ } case _PUSH_NULL: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -93,7 +93,7 @@ } case _END_SEND: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[-2] = value; stack_pointer += -1; @@ -102,22 +102,22 @@ } case _UNARY_NEGATIVE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _UNARY_NOT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _TO_BOOL: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { res = sym_new_type(ctx, &PyBool_Type); @@ -127,8 +127,8 @@ } case _TO_BOOL_BOOL: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { sym_set_type(value, &PyBool_Type); @@ -139,8 +139,8 @@ } case _TO_BOOL_INT: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { sym_set_type(value, &PyLong_Type); @@ -151,8 +151,8 @@ } case _TO_BOOL_LIST: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { sym_set_type(value, &PyList_Type); @@ -163,8 +163,8 @@ } case _TO_BOOL_NONE: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { sym_set_const(value, Py_None); @@ -175,8 +175,8 @@ } case _TO_BOOL_STR: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { res = sym_new_type(ctx, &PyBool_Type); @@ -187,22 +187,22 @@ } case _REPLACE_WITH_TRUE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _UNARY_INVERT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _GUARD_BOTH_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_matches_type(left, &PyLong_Type)) { @@ -232,9 +232,9 @@ } case _BINARY_OP_MULTIPLY_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -262,9 +262,9 @@ } case _BINARY_OP_ADD_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -292,9 +292,9 @@ } case _BINARY_OP_SUBTRACT_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -322,8 +322,8 @@ } case _GUARD_BOTH_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_matches_type(left, &PyFloat_Type)) { @@ -353,9 +353,9 @@ } case _BINARY_OP_MULTIPLY_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -384,9 +384,9 @@ } case _BINARY_OP_ADD_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -415,9 +415,9 @@ } case _BINARY_OP_SUBTRACT_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -446,8 +446,8 @@ } case _GUARD_BOTH_UNICODE: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_matches_type(left, &PyUnicode_Type) && @@ -460,9 +460,9 @@ } case _BINARY_OP_ADD_UNICODE: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -490,7 +490,7 @@ } case _BINARY_SUBSCR: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -499,7 +499,7 @@ } case _BINARY_SLICE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3] = res; stack_pointer += -2; @@ -514,7 +514,7 @@ } case _BINARY_SUBSCR_LIST_INT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -523,7 +523,7 @@ } case _BINARY_SUBSCR_STR_INT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -532,7 +532,7 @@ } case _BINARY_SUBSCR_TUPLE_INT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -541,7 +541,7 @@ } case _BINARY_SUBSCR_DICT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -554,16 +554,16 @@ } case _BINARY_SUBSCR_INIT_CALL: { - _Py_UopsSymbol *sub; - _Py_UopsSymbol *container; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot sub; + _Py_UopsLocalsPlusSlot container; + _Py_UopsLocalsPlusSlot new_frame; sub = stack_pointer[-1]; container = stack_pointer[-2]; (void)container; (void)sub; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-2] = new_frame; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -606,14 +606,14 @@ } case _CALL_INTRINSIC_1: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _CALL_INTRINSIC_2: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -622,8 +622,8 @@ } case _RETURN_VALUE: { - _Py_UopsSymbol *retval; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot retval; + _Py_UopsLocalsPlusSlot res; retval = stack_pointer[-1]; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -650,14 +650,14 @@ } case _GET_AITER: { - _Py_UopsSymbol *iter; + _Py_UopsLocalsPlusSlot iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; } case _GET_ANEXT: { - _Py_UopsSymbol *awaitable; + _Py_UopsLocalsPlusSlot awaitable; awaitable = sym_new_not_null(ctx); stack_pointer[0] = awaitable; stack_pointer += 1; @@ -666,7 +666,7 @@ } case _GET_AWAITABLE: { - _Py_UopsSymbol *iter; + _Py_UopsLocalsPlusSlot iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; @@ -681,7 +681,7 @@ } case _YIELD_VALUE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_unknown(ctx); stack_pointer[-1] = res; break; @@ -694,7 +694,7 @@ } case _LOAD_COMMON_CONSTANT: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[0] = value; stack_pointer += 1; @@ -703,7 +703,7 @@ } case _LOAD_BUILD_CLASS: { - _Py_UopsSymbol *bc; + _Py_UopsLocalsPlusSlot bc; bc = sym_new_not_null(ctx); stack_pointer[0] = bc; stack_pointer += 1; @@ -722,8 +722,8 @@ } case _UNPACK_SEQUENCE: { - _Py_UopsSymbol *seq; - _Py_UopsSymbol **values; + _Py_UopsLocalsPlusSlot seq; + _Py_UopsLocalsPlusSlot *values; seq = stack_pointer[-1]; values = &stack_pointer[-1]; /* This has to be done manually */ @@ -737,8 +737,8 @@ } case _UNPACK_SEQUENCE_TWO_TUPLE: { - _Py_UopsSymbol *val1; - _Py_UopsSymbol *val0; + _Py_UopsLocalsPlusSlot val1; + _Py_UopsLocalsPlusSlot val0; val1 = sym_new_not_null(ctx); val0 = sym_new_not_null(ctx); stack_pointer[-1] = val1; @@ -749,7 +749,7 @@ } case _UNPACK_SEQUENCE_TUPLE: { - _Py_UopsSymbol **values; + _Py_UopsLocalsPlusSlot *values; values = &stack_pointer[-1]; for (int _i = oparg; --_i >= 0;) { values[_i] = sym_new_not_null(ctx); @@ -760,7 +760,7 @@ } case _UNPACK_SEQUENCE_LIST: { - _Py_UopsSymbol **values; + _Py_UopsLocalsPlusSlot *values; values = &stack_pointer[-1]; for (int _i = oparg; --_i >= 0;) { values[_i] = sym_new_not_null(ctx); @@ -771,8 +771,8 @@ } case _UNPACK_EX: { - _Py_UopsSymbol *seq; - _Py_UopsSymbol **values; + _Py_UopsLocalsPlusSlot seq; + _Py_UopsLocalsPlusSlot *values; seq = stack_pointer[-1]; values = &stack_pointer[-1]; /* This has to be done manually */ @@ -809,7 +809,7 @@ } case _LOAD_LOCALS: { - _Py_UopsSymbol *locals; + _Py_UopsLocalsPlusSlot locals; locals = sym_new_not_null(ctx); stack_pointer[0] = locals; stack_pointer += 1; @@ -820,7 +820,7 @@ /* _LOAD_FROM_DICT_OR_GLOBALS is not a viable micro-op for tier 2 */ case _LOAD_NAME: { - _Py_UopsSymbol *v; + _Py_UopsLocalsPlusSlot v; v = sym_new_not_null(ctx); stack_pointer[0] = v; stack_pointer += 1; @@ -829,8 +829,8 @@ } case _LOAD_GLOBAL: { - _Py_UopsSymbol *res; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot res; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; res = sym_new_not_null(ctx); null = sym_new_null(ctx); stack_pointer[0] = res; @@ -849,8 +849,8 @@ } case _LOAD_GLOBAL_MODULE: { - _Py_UopsSymbol *res; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot res; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; res = sym_new_not_null(ctx); null = sym_new_null(ctx); stack_pointer[0] = res; @@ -861,8 +861,8 @@ } case _LOAD_GLOBAL_BUILTINS: { - _Py_UopsSymbol *res; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot res; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; res = sym_new_not_null(ctx); null = sym_new_null(ctx); stack_pointer[0] = res; @@ -885,14 +885,14 @@ } case _LOAD_FROM_DICT_OR_DEREF: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[-1] = value; break; } case _LOAD_DEREF: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[0] = value; stack_pointer += 1; @@ -911,7 +911,7 @@ } case _BUILD_STRING: { - _Py_UopsSymbol *str; + _Py_UopsLocalsPlusSlot str; str = sym_new_not_null(ctx); stack_pointer[-oparg] = str; stack_pointer += 1 - oparg; @@ -920,7 +920,7 @@ } case _BUILD_TUPLE: { - _Py_UopsSymbol *tup; + _Py_UopsLocalsPlusSlot tup; tup = sym_new_not_null(ctx); stack_pointer[-oparg] = tup; stack_pointer += 1 - oparg; @@ -929,7 +929,7 @@ } case _BUILD_LIST: { - _Py_UopsSymbol *list; + _Py_UopsLocalsPlusSlot list; list = sym_new_not_null(ctx); stack_pointer[-oparg] = list; stack_pointer += 1 - oparg; @@ -950,7 +950,7 @@ } case _BUILD_SET: { - _Py_UopsSymbol *set; + _Py_UopsLocalsPlusSlot set; set = sym_new_not_null(ctx); stack_pointer[-oparg] = set; stack_pointer += 1 - oparg; @@ -959,7 +959,7 @@ } case _BUILD_MAP: { - _Py_UopsSymbol *map; + _Py_UopsLocalsPlusSlot map; map = sym_new_not_null(ctx); stack_pointer[-oparg*2] = map; stack_pointer += 1 - oparg*2; @@ -992,7 +992,7 @@ /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ case _LOAD_SUPER_ATTR_ATTR: { - _Py_UopsSymbol *attr_st; + _Py_UopsLocalsPlusSlot attr_st; attr_st = sym_new_not_null(ctx); stack_pointer[-3] = attr_st; stack_pointer += -2; @@ -1001,8 +1001,8 @@ } case _LOAD_SUPER_ATTR_METHOD: { - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self_or_null; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self_or_null; attr = sym_new_not_null(ctx); self_or_null = sym_new_not_null(ctx); stack_pointer[-3] = attr; @@ -1013,9 +1013,9 @@ } case _LOAD_ATTR: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self_or_null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self_or_null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; (void)owner; attr = sym_new_not_null(ctx); @@ -1030,7 +1030,7 @@ } case _GUARD_TYPE_VERSION: { - _Py_UopsSymbol *owner; + _Py_UopsLocalsPlusSlot owner; owner = stack_pointer[-1]; uint32_t type_version = (uint32_t)this_instr->operand; assert(type_version); @@ -1060,9 +1060,9 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; uint16_t offset = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); @@ -1077,7 +1077,7 @@ } case _CHECK_ATTR_MODULE: { - _Py_UopsSymbol *owner; + _Py_UopsLocalsPlusSlot owner; owner = stack_pointer[-1]; uint32_t dict_version = (uint32_t)this_instr->operand; (void)dict_version; @@ -1098,14 +1098,14 @@ } case _LOAD_ATTR_MODULE: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; uint16_t index = (uint16_t)this_instr->operand; (void)index; null = sym_new_null(ctx); - attr = NULL; + attr = (_Py_UopsLocalsPlusSlot){NULL, 0}; if (this_instr[-1].opcode == _NOP) { // Preceding _CHECK_ATTR_MODULE was removed: mod is const and dict is watched. assert(sym_is_const(owner)); @@ -1118,7 +1118,7 @@ attr = sym_new_const(ctx, res); } } - if (attr == NULL) { + if (attr.sym == NULL) { /* No conversion made. We don't know what `attr` is. */ attr = sym_new_not_null(ctx); } @@ -1134,9 +1134,9 @@ } case _LOAD_ATTR_WITH_HINT: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; uint16_t hint = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); @@ -1151,9 +1151,9 @@ } case _LOAD_ATTR_SLOT: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; uint16_t index = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); @@ -1172,9 +1172,9 @@ } case _LOAD_ATTR_CLASS: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand; attr = sym_new_not_null(ctx); @@ -1189,15 +1189,15 @@ } case _LOAD_ATTR_PROPERTY_FRAME: { - _Py_UopsSymbol *owner; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot new_frame; owner = stack_pointer[-1]; PyObject *fget = (PyObject *)this_instr->operand; (void)fget; (void)owner; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-1] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-1] = new_frame; break; } @@ -1226,9 +1226,9 @@ } case _COMPARE_OP: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1246,9 +1246,9 @@ } case _COMPARE_OP_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1261,9 +1261,9 @@ } case _COMPARE_OP_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1276,9 +1276,9 @@ } case _COMPARE_OP_STR: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1291,9 +1291,9 @@ } case _IS_OP: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1306,9 +1306,9 @@ } case _CONTAINS_OP: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1321,7 +1321,7 @@ } case _CONTAINS_OP_SET: { - _Py_UopsSymbol *b; + _Py_UopsLocalsPlusSlot b; b = sym_new_not_null(ctx); stack_pointer[-2] = b; stack_pointer += -1; @@ -1330,7 +1330,7 @@ } case _CONTAINS_OP_DICT: { - _Py_UopsSymbol *b; + _Py_UopsLocalsPlusSlot b; b = sym_new_not_null(ctx); stack_pointer[-2] = b; stack_pointer += -1; @@ -1339,8 +1339,8 @@ } case _CHECK_EG_MATCH: { - _Py_UopsSymbol *rest; - _Py_UopsSymbol *match; + _Py_UopsLocalsPlusSlot rest; + _Py_UopsLocalsPlusSlot match; rest = sym_new_not_null(ctx); match = sym_new_not_null(ctx); stack_pointer[-2] = rest; @@ -1349,14 +1349,14 @@ } case _CHECK_EXC_MATCH: { - _Py_UopsSymbol *b; + _Py_UopsLocalsPlusSlot b; b = sym_new_not_null(ctx); stack_pointer[-1] = b; break; } case _IMPORT_NAME: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -1365,7 +1365,7 @@ } case _IMPORT_FROM: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1378,14 +1378,14 @@ /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ case _IS_NONE: { - _Py_UopsSymbol *b; + _Py_UopsLocalsPlusSlot b; b = sym_new_not_null(ctx); stack_pointer[-1] = b; break; } case _GET_LEN: { - _Py_UopsSymbol *len; + _Py_UopsLocalsPlusSlot len; len = sym_new_not_null(ctx); stack_pointer[0] = len; stack_pointer += 1; @@ -1394,7 +1394,7 @@ } case _MATCH_CLASS: { - _Py_UopsSymbol *attrs; + _Py_UopsLocalsPlusSlot attrs; attrs = sym_new_not_null(ctx); stack_pointer[-3] = attrs; stack_pointer += -2; @@ -1403,7 +1403,7 @@ } case _MATCH_MAPPING: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1412,7 +1412,7 @@ } case _MATCH_SEQUENCE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1421,7 +1421,7 @@ } case _MATCH_KEYS: { - _Py_UopsSymbol *values_or_none; + _Py_UopsLocalsPlusSlot values_or_none; values_or_none = sym_new_not_null(ctx); stack_pointer[0] = values_or_none; stack_pointer += 1; @@ -1430,14 +1430,14 @@ } case _GET_ITER: { - _Py_UopsSymbol *iter; + _Py_UopsLocalsPlusSlot iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; } case _GET_YIELD_FROM_ITER: { - _Py_UopsSymbol *iter; + _Py_UopsLocalsPlusSlot iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; @@ -1446,7 +1446,7 @@ /* _FOR_ITER is not a viable micro-op for tier 2 */ case _FOR_ITER_TIER_TWO: { - _Py_UopsSymbol *next; + _Py_UopsLocalsPlusSlot next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1467,7 +1467,7 @@ } case _ITER_NEXT_LIST: { - _Py_UopsSymbol *next; + _Py_UopsLocalsPlusSlot next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1486,7 +1486,7 @@ } case _ITER_NEXT_TUPLE: { - _Py_UopsSymbol *next; + _Py_UopsLocalsPlusSlot next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1505,8 +1505,8 @@ } case _ITER_NEXT_RANGE: { - _Py_UopsSymbol *iter; - _Py_UopsSymbol *next; + _Py_UopsLocalsPlusSlot iter; + _Py_UopsLocalsPlusSlot next; iter = stack_pointer[-1]; next = sym_new_type(ctx, &PyLong_Type); (void)iter; @@ -1523,9 +1523,9 @@ } case _LOAD_SPECIAL: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self_or_null; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self_or_null; owner = stack_pointer[-1]; (void)owner; attr = sym_new_not_null(ctx); @@ -1538,7 +1538,7 @@ } case _WITH_EXCEPT_START: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1547,8 +1547,8 @@ } case _PUSH_EXC_INFO: { - _Py_UopsSymbol *prev_exc; - _Py_UopsSymbol *new_exc; + _Py_UopsLocalsPlusSlot prev_exc; + _Py_UopsLocalsPlusSlot new_exc; prev_exc = sym_new_not_null(ctx); new_exc = sym_new_not_null(ctx); stack_pointer[-1] = prev_exc; @@ -1567,9 +1567,9 @@ } case _LOAD_ATTR_METHOD_WITH_VALUES: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand; (void)descr; @@ -1583,9 +1583,9 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand; (void)descr; @@ -1599,14 +1599,14 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - _Py_UopsSymbol *attr; + _Py_UopsLocalsPlusSlot attr; attr = sym_new_not_null(ctx); stack_pointer[-1] = attr; break; } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - _Py_UopsSymbol *attr; + _Py_UopsLocalsPlusSlot attr; attr = sym_new_not_null(ctx); stack_pointer[-1] = attr; break; @@ -1617,9 +1617,9 @@ } case _LOAD_ATTR_METHOD_LAZY_DICT: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand; (void)descr; @@ -1633,11 +1633,11 @@ } case _MAYBE_EXPAND_METHOD: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; - _Py_UopsSymbol *func; - _Py_UopsSymbol *maybe_self; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot func; + _Py_UopsLocalsPlusSlot maybe_self; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1657,10 +1657,10 @@ /* _MONITOR_CALL is not a viable micro-op for tier 2 */ case _PY_FRAME_GENERAL: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot new_frame; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1668,9 +1668,9 @@ (void)callable; (void)self_or_null; (void)args; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -1685,8 +1685,8 @@ } case _EXPAND_METHOD: { - _Py_UopsSymbol *method; - _Py_UopsSymbol *self; + _Py_UopsLocalsPlusSlot method; + _Py_UopsLocalsPlusSlot self; method = sym_new_not_null(ctx); self = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = method; @@ -1699,7 +1699,7 @@ } case _CALL_NON_PY_GENERAL: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1708,8 +1708,8 @@ } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - _Py_UopsSymbol *null; - _Py_UopsSymbol *callable; + _Py_UopsLocalsPlusSlot null; + _Py_UopsLocalsPlusSlot callable; null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; sym_set_null(null); @@ -1718,9 +1718,9 @@ } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - _Py_UopsSymbol *callable; - _Py_UopsSymbol *func; - _Py_UopsSymbol *self; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot func; + _Py_UopsLocalsPlusSlot self; callable = stack_pointer[-2 - oparg]; (void)callable; func = sym_new_not_null(ctx); @@ -1740,8 +1740,8 @@ } case _CHECK_FUNCTION_EXACT_ARGS: { - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; sym_set_type(callable, &PyFunction_Type); @@ -1756,10 +1756,10 @@ } case _INIT_CALL_PY_EXACT_ARGS: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot new_frame; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1785,7 +1785,7 @@ co = (PyCodeObject *)func->func_code; DPRINTF(3, "code=%p ", co); } - assert(self_or_null != NULL); + assert(self_or_null.sym != NULL); assert(args != NULL); if (sym_is_not_null(self_or_null)) { // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM @@ -1793,25 +1793,25 @@ argcount++; } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = frame_new(ctx, co, 0, args, argcount); + new_frame.sym = (_Py_UopsSymbol *)frame_new(ctx, co, 0, args, argcount); } else { - new_frame = frame_new(ctx, co, 0, NULL, 0); + new_frame.sym = (_Py_UopsSymbol *)frame_new(ctx, co, 0, NULL, 0); } - stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; } case _PUSH_FRAME: { - _Py_UOpsAbstractFrame *new_frame; - new_frame = (_Py_UOpsAbstractFrame *)stack_pointer[-1]; + _Py_UopsLocalsPlusSlot new_frame; + new_frame = stack_pointer[-1]; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)new_frame.sym; ctx->curr_frame_depth++; - stack_pointer = new_frame->stack_pointer; + stack_pointer = ((_Py_UOpsAbstractFrame *)new_frame.sym)->stack_pointer; co = get_code(this_instr); if (co == NULL) { // should be about to _EXIT_TRACE anyway @@ -1840,7 +1840,7 @@ } case _CALL_TYPE_1: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3] = res; stack_pointer += -2; @@ -1849,7 +1849,7 @@ } case _CALL_STR_1: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3] = res; stack_pointer += -2; @@ -1858,7 +1858,7 @@ } case _CALL_TUPLE_1: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3] = res; stack_pointer += -2; @@ -1867,11 +1867,11 @@ } case _CHECK_AND_ALLOCATE_OBJECT: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *null; - _Py_UopsSymbol *callable; - _Py_UopsSymbol *self; - _Py_UopsSymbol *init; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot self; + _Py_UopsLocalsPlusSlot init; args = &stack_pointer[-oparg]; null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1889,19 +1889,19 @@ } case _CREATE_INIT_FRAME: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *init; - _Py_UopsSymbol *self; - _Py_UOpsAbstractFrame *init_frame; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot init; + _Py_UopsLocalsPlusSlot self; + _Py_UopsLocalsPlusSlot init_frame; args = &stack_pointer[-oparg]; init = stack_pointer[-1 - oparg]; self = stack_pointer[-2 - oparg]; (void)self; (void)init; (void)args; - init_frame = NULL; + init_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)init_frame; + stack_pointer[-2 - oparg] = init_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -1914,7 +1914,7 @@ } case _CALL_BUILTIN_CLASS: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1923,7 +1923,7 @@ } case _CALL_BUILTIN_O: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1932,7 +1932,7 @@ } case _CALL_BUILTIN_FAST: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1941,7 +1941,7 @@ } case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1950,7 +1950,7 @@ } case _CALL_LEN: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1959,7 +1959,7 @@ } case _CALL_ISINSTANCE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1974,7 +1974,7 @@ } case _CALL_METHOD_DESCRIPTOR_O: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1983,7 +1983,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1992,7 +1992,7 @@ } case _CALL_METHOD_DESCRIPTOR_NOARGS: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2001,7 +2001,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2014,11 +2014,11 @@ /* _DO_CALL_KW is not a viable micro-op for tier 2 */ case _PY_FRAME_KW: { - _Py_UopsSymbol *kwnames; - _Py_UopsSymbol **args; - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot kwnames; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot new_frame; kwnames = stack_pointer[-1]; args = &stack_pointer[-1 - oparg]; self_or_null = stack_pointer[-2 - oparg]; @@ -2027,9 +2027,9 @@ (void)self_or_null; (void)args; (void)kwnames; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-3 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-3 - oparg] = new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -2044,9 +2044,9 @@ } case _EXPAND_METHOD_KW: { - _Py_UopsSymbol *method; - _Py_UopsSymbol *self; - _Py_UopsSymbol *kwnames; + _Py_UopsLocalsPlusSlot method; + _Py_UopsLocalsPlusSlot self; + _Py_UopsLocalsPlusSlot kwnames; method = sym_new_not_null(ctx); self = sym_new_not_null(ctx); kwnames = sym_new_not_null(ctx); @@ -2061,7 +2061,7 @@ } case _CALL_KW_NON_PY: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3 - oparg] = res; stack_pointer += -2 - oparg; @@ -2074,14 +2074,14 @@ /* __DO_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ case _MAKE_FUNCTION: { - _Py_UopsSymbol *func; + _Py_UopsLocalsPlusSlot func; func = sym_new_not_null(ctx); stack_pointer[-1] = func; break; } case _SET_FUNCTION_ATTRIBUTE: { - _Py_UopsSymbol *func_st; + _Py_UopsLocalsPlusSlot func_st; func_st = sym_new_not_null(ctx); stack_pointer[-2] = func_st; stack_pointer += -1; @@ -2090,7 +2090,7 @@ } case _RETURN_GENERATOR: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; ctx->frame->stack_pointer = stack_pointer; frame_pop(ctx); stack_pointer = ctx->frame->stack_pointer; @@ -2114,7 +2114,7 @@ } case _BUILD_SLICE: { - _Py_UopsSymbol *slice; + _Py_UopsLocalsPlusSlot slice; slice = sym_new_not_null(ctx); stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; stack_pointer += -1 - ((oparg == 3) ? 1 : 0); @@ -2123,21 +2123,21 @@ } case _CONVERT_VALUE: { - _Py_UopsSymbol *result; + _Py_UopsLocalsPlusSlot result; result = sym_new_not_null(ctx); stack_pointer[-1] = result; break; } case _FORMAT_SIMPLE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -2146,8 +2146,8 @@ } case _COPY: { - _Py_UopsSymbol *bottom; - _Py_UopsSymbol *top; + _Py_UopsLocalsPlusSlot bottom; + _Py_UopsLocalsPlusSlot top; bottom = stack_pointer[-1 - (oparg-1)]; assert(oparg > 0); top = bottom; @@ -2158,9 +2158,9 @@ } case _BINARY_OP: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; PyTypeObject *ltype = sym_get_type(left); @@ -2186,8 +2186,8 @@ } case _SWAP: { - _Py_UopsSymbol *top; - _Py_UopsSymbol *bottom; + _Py_UopsLocalsPlusSlot top; + _Py_UopsLocalsPlusSlot bottom; top = stack_pointer[-1]; bottom = stack_pointer[-2 - (oparg-2)]; stack_pointer[-2 - (oparg-2)] = top; @@ -2212,7 +2212,7 @@ /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ case _GUARD_IS_TRUE_POP: { - _Py_UopsSymbol *flag; + _Py_UopsLocalsPlusSlot flag; flag = stack_pointer[-1]; if (sym_is_const(flag)) { PyObject *value = sym_get_const(flag); @@ -2225,7 +2225,7 @@ } case _GUARD_IS_FALSE_POP: { - _Py_UopsSymbol *flag; + _Py_UopsLocalsPlusSlot flag; flag = stack_pointer[-1]; if (sym_is_const(flag)) { PyObject *value = sym_get_const(flag); @@ -2238,7 +2238,7 @@ } case _GUARD_IS_NONE_POP: { - _Py_UopsSymbol *flag; + _Py_UopsLocalsPlusSlot flag; flag = stack_pointer[-1]; if (sym_is_const(flag)) { PyObject *value = sym_get_const(flag); @@ -2255,7 +2255,7 @@ } case _GUARD_IS_NOT_NONE_POP: { - _Py_UopsSymbol *flag; + _Py_UopsLocalsPlusSlot flag; flag = stack_pointer[-1]; if (sym_is_const(flag)) { PyObject *value = sym_get_const(flag); @@ -2305,7 +2305,7 @@ } case _LOAD_CONST_INLINE: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); stack_pointer[0] = value; @@ -2315,7 +2315,7 @@ } case _LOAD_CONST_INLINE_BORROW: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); stack_pointer[0] = value; @@ -2325,15 +2325,15 @@ } case _POP_TOP_LOAD_CONST_INLINE_BORROW: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[-1] = value; break; } case _LOAD_CONST_INLINE_WITH_NULL: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *null; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot null; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); null = sym_new_null(ctx); @@ -2345,8 +2345,8 @@ } case _LOAD_CONST_INLINE_BORROW_WITH_NULL: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *null; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot null; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); null = sym_new_null(ctx); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 3962ced2dbecd2..2be4477cc11e34 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -55,12 +55,17 @@ static _Py_UopsSymbol NO_SPACE_SYMBOL = { .type_version = 0, }; -_Py_UopsSymbol * +static _Py_UopsLocalsPlusSlot NO_SPACE_SLOT = { + .sym = &NO_SPACE_SYMBOL, + .is_virtual = 0, +}; + +_Py_UopsLocalsPlusSlot out_of_space(_Py_UOpsContext *ctx) { ctx->done = true; ctx->out_of_space = true; - return &NO_SPACE_SYMBOL; + return NO_SPACE_SLOT; } static _Py_UopsSymbol * @@ -84,24 +89,25 @@ sym_new(_Py_UOpsContext *ctx) } static inline void -sym_set_flag(_Py_UopsSymbol *sym, int flag) +sym_set_flag(_Py_UopsLocalsPlusSlot sym, int flag) { - sym->flags |= flag; + sym.sym->flags |= flag; } static inline void -sym_set_bottom(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym) +sym_set_bottom(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym) { sym_set_flag(sym, IS_NULL | NOT_NULL); - sym->typ = NULL; - Py_CLEAR(sym->const_val); + sym.sym->typ = NULL; + Py_CLEAR(sym.sym->const_val); ctx->done = true; ctx->contradiction = true; } bool -_Py_uop_sym_is_bottom(_Py_UopsSymbol *sym) +_Py_uop_sym_is_bottom(_Py_UopsLocalsPlusSlot sym_l) { + _Py_UopsSymbol *sym = sym_l.sym; if ((sym->flags & IS_NULL) && (sym->flags & NOT_NULL)) { assert(sym->flags == (IS_NULL | NOT_NULL)); assert(sym->typ == NULL); @@ -112,98 +118,98 @@ _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym) } bool -_Py_uop_sym_is_not_null(_Py_UopsSymbol *sym) +_Py_uop_sym_is_not_null(_Py_UopsLocalsPlusSlot sym) { - return sym->flags == NOT_NULL; + return sym.sym->flags == NOT_NULL; } bool -_Py_uop_sym_is_null(_Py_UopsSymbol *sym) +_Py_uop_sym_is_null(_Py_UopsLocalsPlusSlot sym) { - return sym->flags == IS_NULL; + return sym.sym->flags == IS_NULL; } bool -_Py_uop_sym_is_const(_Py_UopsSymbol *sym) +_Py_uop_sym_is_const(_Py_UopsLocalsPlusSlot sym) { - return sym->const_val != NULL; + return sym.sym->const_val != NULL; } PyObject * -_Py_uop_sym_get_const(_Py_UopsSymbol *sym) +_Py_uop_sym_get_const(_Py_UopsLocalsPlusSlot sym) { - return sym->const_val; + return sym.sym->const_val; } void -_Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ) +_Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym, PyTypeObject *typ) { assert(typ != NULL && PyType_Check(typ)); - if (sym->flags & IS_NULL) { + if (sym.sym->flags & IS_NULL) { sym_set_bottom(ctx, sym); return; } - if (sym->typ != NULL) { - if (sym->typ != typ) { + if (sym.sym->typ != NULL) { + if (sym.sym->typ != typ) { sym_set_bottom(ctx, sym); return; } } else { sym_set_flag(sym, NOT_NULL); - sym->typ = typ; + sym.sym->typ = typ; } } bool -_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version) +_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym, unsigned int version) { // if the type version was already set, then it must be different and we should set it to bottom - if (sym->type_version) { + if (sym.sym->type_version) { sym_set_bottom(ctx, sym); return false; } - sym->type_version = version; + sym.sym->type_version = version; return true; } void -_Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val) +_Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym, PyObject *const_val) { assert(const_val != NULL); - if (sym->flags & IS_NULL) { + if (sym.sym->flags & IS_NULL) { sym_set_bottom(ctx, sym); } PyTypeObject *typ = Py_TYPE(const_val); - if (sym->typ != NULL && sym->typ != typ) { + if (sym.sym->typ != NULL && sym.sym->typ != typ) { sym_set_bottom(ctx, sym); } - if (sym->const_val != NULL) { - if (sym->const_val != const_val) { + if (sym.sym->const_val != NULL) { + if (sym.sym->const_val != const_val) { // TODO: What if they're equal? sym_set_bottom(ctx, sym); } } else { sym_set_flag(sym, NOT_NULL); - sym->typ = typ; - sym->const_val = Py_NewRef(const_val); + sym.sym->typ = typ; + sym.sym->const_val = Py_NewRef(const_val); } - sym->is_static = true; + sym.sym->is_static = true; } void -_Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym) +_Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym) { if (_Py_uop_sym_is_not_null(sym)) { sym_set_bottom(ctx, sym); } sym_set_flag(sym, IS_NULL); - sym->is_static = true; + sym.sym->is_static = true; } void -_Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym) +_Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym) { if (_Py_uop_sym_is_null(sym)) { sym_set_bottom(ctx, sym); @@ -212,36 +218,37 @@ _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym) } -_Py_UopsSymbol * +_Py_UopsLocalsPlusSlot _Py_uop_sym_new_unknown(_Py_UOpsContext *ctx) { - return sym_new(ctx); + return (_Py_UopsLocalsPlusSlot){sym_new(ctx), 0}; } -_Py_UopsSymbol * +_Py_UopsLocalsPlusSlot _Py_uop_sym_new_not_null(_Py_UOpsContext *ctx) { - _Py_UopsSymbol *res = _Py_uop_sym_new_unknown(ctx); - if (res == NULL) { + _Py_UopsLocalsPlusSlot res = _Py_uop_sym_new_unknown(ctx); + if (res.sym == NULL) { return out_of_space(ctx); } sym_set_flag(res, NOT_NULL); return res; } -_Py_UopsSymbol * +_Py_UopsLocalsPlusSlot _Py_uop_sym_new_type(_Py_UOpsContext *ctx, PyTypeObject *typ) { _Py_UopsSymbol *res = sym_new(ctx); if (res == NULL) { return out_of_space(ctx); } - _Py_uop_sym_set_type(ctx, res, typ); - return res; + _Py_UopsLocalsPlusSlot sym = {res, 0}; + _Py_uop_sym_set_type(ctx, sym, typ); + return sym; } // Adds a new reference to const_val, owned by the symbol. -_Py_UopsSymbol * +_Py_UopsLocalsPlusSlot _Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val) { assert(const_val != NULL); @@ -249,15 +256,16 @@ _Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val) if (res == NULL) { return out_of_space(ctx); } - _Py_uop_sym_set_const(ctx, res, const_val); - return res; + _Py_UopsLocalsPlusSlot sym = {res, 0}; + _Py_uop_sym_set_const(ctx, sym, const_val); + return sym; } -_Py_UopsSymbol * +_Py_UopsLocalsPlusSlot _Py_uop_sym_new_null(_Py_UOpsContext *ctx) { - _Py_UopsSymbol *null_sym = _Py_uop_sym_new_unknown(ctx); - if (null_sym == NULL) { + _Py_UopsLocalsPlusSlot null_sym = _Py_uop_sym_new_unknown(ctx); + if (null_sym.sym == NULL) { return out_of_space(ctx); } _Py_uop_sym_set_null(ctx, null_sym); @@ -265,51 +273,51 @@ _Py_uop_sym_new_null(_Py_UOpsContext *ctx) } PyTypeObject * -_Py_uop_sym_get_type(_Py_UopsSymbol *sym) +_Py_uop_sym_get_type(_Py_UopsLocalsPlusSlot sym) { if (_Py_uop_sym_is_bottom(sym)) { return NULL; } - return sym->typ; + return sym.sym->typ; } unsigned int -_Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) +_Py_uop_sym_get_type_version(_Py_UopsLocalsPlusSlot sym) { - return sym->type_version; + return sym.sym->type_version; } bool -_Py_uop_sym_has_type(_Py_UopsSymbol *sym) +_Py_uop_sym_has_type(_Py_UopsLocalsPlusSlot sym) { if (_Py_uop_sym_is_bottom(sym)) { return false; } - return sym->typ != NULL; + return sym.sym->typ != NULL; } bool -_Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) +_Py_uop_sym_matches_type(_Py_UopsLocalsPlusSlot sym, PyTypeObject *typ) { assert(typ != NULL && PyType_Check(typ)); return _Py_uop_sym_get_type(sym) == typ; } bool -_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version) +_Py_uop_sym_matches_type_version(_Py_UopsLocalsPlusSlot sym, unsigned int version) { return _Py_uop_sym_get_type_version(sym) == version; } void -_Py_uop_sym_set_locals_idx(_Py_UopsSymbol *sym, int locals_idx) +_Py_uop_sym_set_locals_idx(_Py_UopsLocalsPlusSlot sym, int locals_idx) { assert(locals_idx >= 0); - sym->locals_idx = locals_idx; + sym.sym->locals_idx = locals_idx; } int -_Py_uop_sym_truthiness(_Py_UopsSymbol *sym) +_Py_uop_sym_truthiness(_Py_UopsLocalsPlusSlot sym) { /* There are some non-constant values for * which `bool(val)` always evaluates to @@ -348,7 +356,7 @@ _Py_uop_frame_new( _Py_UOpsContext *ctx, PyCodeObject *co, int curr_stackentries, - _Py_UopsSymbol **args, + _Py_UopsLocalsPlusSlot *args, int arg_len) { assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); @@ -373,19 +381,17 @@ _Py_uop_frame_new( } for (int i = arg_len; i < co->co_nlocalsplus; i++) { - _Py_UopsSymbol *local = _Py_uop_sym_new_unknown(ctx); - frame->locals[i] = local; + frame->locals[i] = _Py_uop_sym_new_unknown(ctx);; } for (int i = 0; i < co->co_nlocalsplus; i++) { - frame->locals[i]->locals_idx = i; + frame->locals[i].sym->locals_idx = i; } // Initialize the stack as well for (int i = 0; i < curr_stackentries; i++) { - _Py_UopsSymbol *stackvar = _Py_uop_sym_new_unknown(ctx); - frame->stack[i] = stackvar; + frame->stack[i] = _Py_uop_sym_new_unknown(ctx); } return frame; @@ -411,7 +417,8 @@ _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx) ctx->n_consumed = ctx->locals_and_stack; #ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { - ctx->locals_and_stack[i] = NULL; + _Py_UopsLocalsPlusSlot slot = {NULL, 0}; + ctx->locals_and_stack[i] = slot; } #endif @@ -445,10 +452,10 @@ do { \ } \ } while (0) -static _Py_UopsSymbol * +static _Py_UopsLocalsPlusSlot make_bottom(_Py_UOpsContext *ctx) { - _Py_UopsSymbol *sym = _Py_uop_sym_new_unknown(ctx); + _Py_UopsLocalsPlusSlot sym = _Py_uop_sym_new_unknown(ctx); _Py_uop_sym_set_null(ctx, sym); _Py_uop_sym_set_non_null(ctx, sym); return sym; @@ -464,8 +471,8 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) PyObject *val_43 = NULL; // Use a single 'sym' variable so copy-pasting tests is easier. - _Py_UopsSymbol *sym = _Py_uop_sym_new_unknown(ctx); - if (sym == NULL) { + _Py_UopsLocalsPlusSlot sym = _Py_uop_sym_new_unknown(ctx); + if (sym.sym == NULL) { goto fail; } TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "top is NULL"); @@ -476,7 +483,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(!_Py_uop_sym_is_bottom(sym), "top is bottom"); sym = make_bottom(ctx); - if (sym == NULL) { + if (sym.sym == NULL) { goto fail; } TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "bottom is NULL is not false"); @@ -487,7 +494,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "bottom isn't bottom"); sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + if (sym.sym == NULL) { goto fail; } TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "int is NULL"); @@ -512,7 +519,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) assert(_Py_IsImmortal(val_43)); sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + if (sym.sym == NULL) { goto fail; } _Py_uop_sym_set_const(ctx, sym, val_42); @@ -533,7 +540,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and float) isn't bottom"); sym = _Py_uop_sym_new_type(ctx, &PyLong_Type); - if (sym == NULL) { + if (sym.sym == NULL) { goto fail; } _Py_uop_sym_set_const(ctx, sym, val_42); diff --git a/Python/partial_evaluator_bytecodes.c b/Python/partial_evaluator_bytecodes.c index 2364db07f342fa..a5aff06f442e71 100644 --- a/Python/partial_evaluator_bytecodes.c +++ b/Python/partial_evaluator_bytecodes.c @@ -75,12 +75,12 @@ dummy_func(void) { override op(_LOAD_FAST, (-- value)) { value = GETLOCAL(oparg); + SET_STATIC_INST(); } override op(_LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); - _Py_UopsSymbol *temp = sym_new_null(ctx); - GETLOCAL(oparg) = temp; + GETLOCAL(oparg) = sym_new_null(ctx); } override op(_STORE_FAST, (value --)) { @@ -88,15 +88,19 @@ dummy_func(void) { sym_set_locals_idx(value, oparg); } - override op(_PUSH_NULL, (-- res)) { - res = sym_new_null(ctx); - } - override op(_LOAD_CONST, (-- value)) { // Should've all been converted by specializer. Py_UNREACHABLE(); } + + override op(_POP_TOP, (pop --)) { +// if (sym_is_virtual(pop)) { +// SET_STATIC_INST(); +// } + } + + override op (_CHECK_STACK_SPACE_OPERAND, ( -- )) { } diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index eb73d2d68b8cb2..a9f9ea59ebece4 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -24,7 +24,7 @@ /* _MONITOR_RESUME is not a viable micro-op for tier 2 */ case _LOAD_FAST_CHECK: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = GETLOCAL(oparg); // We guarantee this will error - just bail and don't optimize it. if (sym_is_null(value)) { @@ -37,8 +37,9 @@ } case _LOAD_FAST: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = GETLOCAL(oparg); + SET_STATIC_INST(); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -46,10 +47,9 @@ } case _LOAD_FAST_AND_CLEAR: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = GETLOCAL(oparg); - _Py_UopsSymbol *temp = sym_new_null(ctx); - GETLOCAL(oparg) = temp; + GETLOCAL(oparg) = sym_new_null(ctx); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -57,7 +57,7 @@ } case _LOAD_CONST: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; // Should've all been converted by specializer. Py_UNREACHABLE(); stack_pointer[0] = value; @@ -67,7 +67,7 @@ } case _STORE_FAST: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = stack_pointer[-1]; GETLOCAL(oparg) = value; sym_set_locals_idx(value, oparg); @@ -77,13 +77,17 @@ } case _POP_TOP: { + _Py_UopsLocalsPlusSlot pop; + // if (sym_is_virtual(pop)) { + // SET_STATIC_INST(); + // } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } case _PUSH_NULL: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -92,7 +96,7 @@ } case _END_SEND: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[-2] = value; stack_pointer += -1; @@ -101,22 +105,22 @@ } case _UNARY_NEGATIVE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _UNARY_NOT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _TO_BOOL: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { res = sym_new_type(ctx, &PyBool_Type); @@ -126,8 +130,8 @@ } case _TO_BOOL_BOOL: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { sym_set_type(value, &PyBool_Type); @@ -138,8 +142,8 @@ } case _TO_BOOL_INT: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { sym_set_type(value, &PyLong_Type); @@ -150,8 +154,8 @@ } case _TO_BOOL_LIST: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { sym_set_type(value, &PyList_Type); @@ -162,8 +166,8 @@ } case _TO_BOOL_NONE: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { sym_set_const(value, Py_None); @@ -174,8 +178,8 @@ } case _TO_BOOL_STR: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { res = sym_new_type(ctx, &PyBool_Type); @@ -186,22 +190,22 @@ } case _REPLACE_WITH_TRUE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _UNARY_INVERT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _GUARD_BOTH_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_matches_type(left, &PyLong_Type)) { @@ -231,9 +235,9 @@ } case _BINARY_OP_MULTIPLY_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -261,9 +265,9 @@ } case _BINARY_OP_ADD_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -291,9 +295,9 @@ } case _BINARY_OP_SUBTRACT_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -321,8 +325,8 @@ } case _GUARD_BOTH_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_matches_type(left, &PyFloat_Type)) { @@ -352,9 +356,9 @@ } case _BINARY_OP_MULTIPLY_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -383,9 +387,9 @@ } case _BINARY_OP_ADD_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -414,9 +418,9 @@ } case _BINARY_OP_SUBTRACT_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -445,8 +449,8 @@ } case _GUARD_BOTH_UNICODE: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_matches_type(left, &PyUnicode_Type) && @@ -459,9 +463,9 @@ } case _BINARY_OP_ADD_UNICODE: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(left) && sym_is_const(right) && @@ -489,7 +493,7 @@ } case _BINARY_SUBSCR: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -498,7 +502,7 @@ } case _BINARY_SLICE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3] = res; stack_pointer += -2; @@ -513,7 +517,7 @@ } case _BINARY_SUBSCR_LIST_INT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -522,7 +526,7 @@ } case _BINARY_SUBSCR_STR_INT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -531,7 +535,7 @@ } case _BINARY_SUBSCR_TUPLE_INT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -540,7 +544,7 @@ } case _BINARY_SUBSCR_DICT: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -553,16 +557,16 @@ } case _BINARY_SUBSCR_INIT_CALL: { - _Py_UopsSymbol *sub; - _Py_UopsSymbol *container; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot sub; + _Py_UopsLocalsPlusSlot container; + _Py_UopsLocalsPlusSlot new_frame; sub = stack_pointer[-1]; container = stack_pointer[-2]; (void)container; (void)sub; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-2] = new_frame; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -605,14 +609,14 @@ } case _CALL_INTRINSIC_1: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _CALL_INTRINSIC_2: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -621,8 +625,8 @@ } case _RETURN_VALUE: { - _Py_UopsSymbol *retval; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot retval; + _Py_UopsLocalsPlusSlot res; retval = stack_pointer[-1]; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -649,14 +653,14 @@ } case _GET_AITER: { - _Py_UopsSymbol *iter; + _Py_UopsLocalsPlusSlot iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; } case _GET_ANEXT: { - _Py_UopsSymbol *awaitable; + _Py_UopsLocalsPlusSlot awaitable; awaitable = sym_new_not_null(ctx); stack_pointer[0] = awaitable; stack_pointer += 1; @@ -665,7 +669,7 @@ } case _GET_AWAITABLE: { - _Py_UopsSymbol *iter; + _Py_UopsLocalsPlusSlot iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; @@ -680,7 +684,7 @@ } case _YIELD_VALUE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_unknown(ctx); stack_pointer[-1] = res; break; @@ -693,7 +697,7 @@ } case _LOAD_COMMON_CONSTANT: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[0] = value; stack_pointer += 1; @@ -702,7 +706,7 @@ } case _LOAD_BUILD_CLASS: { - _Py_UopsSymbol *bc; + _Py_UopsLocalsPlusSlot bc; bc = sym_new_not_null(ctx); stack_pointer[0] = bc; stack_pointer += 1; @@ -721,8 +725,8 @@ } case _UNPACK_SEQUENCE: { - _Py_UopsSymbol *seq; - _Py_UopsSymbol **values; + _Py_UopsLocalsPlusSlot seq; + _Py_UopsLocalsPlusSlot *values; seq = stack_pointer[-1]; values = &stack_pointer[-1]; /* This has to be done manually */ @@ -736,8 +740,8 @@ } case _UNPACK_SEQUENCE_TWO_TUPLE: { - _Py_UopsSymbol *val1; - _Py_UopsSymbol *val0; + _Py_UopsLocalsPlusSlot val1; + _Py_UopsLocalsPlusSlot val0; val1 = sym_new_not_null(ctx); val0 = sym_new_not_null(ctx); stack_pointer[-1] = val1; @@ -748,7 +752,7 @@ } case _UNPACK_SEQUENCE_TUPLE: { - _Py_UopsSymbol **values; + _Py_UopsLocalsPlusSlot *values; values = &stack_pointer[-1]; for (int _i = oparg; --_i >= 0;) { values[_i] = sym_new_not_null(ctx); @@ -759,7 +763,7 @@ } case _UNPACK_SEQUENCE_LIST: { - _Py_UopsSymbol **values; + _Py_UopsLocalsPlusSlot *values; values = &stack_pointer[-1]; for (int _i = oparg; --_i >= 0;) { values[_i] = sym_new_not_null(ctx); @@ -770,8 +774,8 @@ } case _UNPACK_EX: { - _Py_UopsSymbol *seq; - _Py_UopsSymbol **values; + _Py_UopsLocalsPlusSlot seq; + _Py_UopsLocalsPlusSlot *values; seq = stack_pointer[-1]; values = &stack_pointer[-1]; /* This has to be done manually */ @@ -808,7 +812,7 @@ } case _LOAD_LOCALS: { - _Py_UopsSymbol *locals; + _Py_UopsLocalsPlusSlot locals; locals = sym_new_not_null(ctx); stack_pointer[0] = locals; stack_pointer += 1; @@ -819,7 +823,7 @@ /* _LOAD_FROM_DICT_OR_GLOBALS is not a viable micro-op for tier 2 */ case _LOAD_NAME: { - _Py_UopsSymbol *v; + _Py_UopsLocalsPlusSlot v; v = sym_new_not_null(ctx); stack_pointer[0] = v; stack_pointer += 1; @@ -828,8 +832,8 @@ } case _LOAD_GLOBAL: { - _Py_UopsSymbol *res; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot res; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; res = sym_new_not_null(ctx); null = sym_new_null(ctx); stack_pointer[0] = res; @@ -848,8 +852,8 @@ } case _LOAD_GLOBAL_MODULE: { - _Py_UopsSymbol *res; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot res; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; res = sym_new_not_null(ctx); null = sym_new_null(ctx); stack_pointer[0] = res; @@ -860,8 +864,8 @@ } case _LOAD_GLOBAL_BUILTINS: { - _Py_UopsSymbol *res; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot res; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; res = sym_new_not_null(ctx); null = sym_new_null(ctx); stack_pointer[0] = res; @@ -884,14 +888,14 @@ } case _LOAD_FROM_DICT_OR_DEREF: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[-1] = value; break; } case _LOAD_DEREF: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[0] = value; stack_pointer += 1; @@ -910,7 +914,7 @@ } case _BUILD_STRING: { - _Py_UopsSymbol *str; + _Py_UopsLocalsPlusSlot str; str = sym_new_not_null(ctx); stack_pointer[-oparg] = str; stack_pointer += 1 - oparg; @@ -919,7 +923,7 @@ } case _BUILD_TUPLE: { - _Py_UopsSymbol *tup; + _Py_UopsLocalsPlusSlot tup; tup = sym_new_not_null(ctx); stack_pointer[-oparg] = tup; stack_pointer += 1 - oparg; @@ -928,7 +932,7 @@ } case _BUILD_LIST: { - _Py_UopsSymbol *list; + _Py_UopsLocalsPlusSlot list; list = sym_new_not_null(ctx); stack_pointer[-oparg] = list; stack_pointer += 1 - oparg; @@ -949,7 +953,7 @@ } case _BUILD_SET: { - _Py_UopsSymbol *set; + _Py_UopsLocalsPlusSlot set; set = sym_new_not_null(ctx); stack_pointer[-oparg] = set; stack_pointer += 1 - oparg; @@ -958,7 +962,7 @@ } case _BUILD_MAP: { - _Py_UopsSymbol *map; + _Py_UopsLocalsPlusSlot map; map = sym_new_not_null(ctx); stack_pointer[-oparg*2] = map; stack_pointer += 1 - oparg*2; @@ -991,7 +995,7 @@ /* _INSTRUMENTED_LOAD_SUPER_ATTR is not a viable micro-op for tier 2 */ case _LOAD_SUPER_ATTR_ATTR: { - _Py_UopsSymbol *attr_st; + _Py_UopsLocalsPlusSlot attr_st; attr_st = sym_new_not_null(ctx); stack_pointer[-3] = attr_st; stack_pointer += -2; @@ -1000,8 +1004,8 @@ } case _LOAD_SUPER_ATTR_METHOD: { - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self_or_null; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self_or_null; attr = sym_new_not_null(ctx); self_or_null = sym_new_not_null(ctx); stack_pointer[-3] = attr; @@ -1012,9 +1016,9 @@ } case _LOAD_ATTR: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self_or_null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self_or_null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; (void)owner; attr = sym_new_not_null(ctx); @@ -1029,7 +1033,7 @@ } case _GUARD_TYPE_VERSION: { - _Py_UopsSymbol *owner; + _Py_UopsLocalsPlusSlot owner; owner = stack_pointer[-1]; uint32_t type_version = (uint32_t)this_instr->operand; assert(type_version); @@ -1059,9 +1063,9 @@ } case _LOAD_ATTR_INSTANCE_VALUE: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; uint16_t offset = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); @@ -1076,7 +1080,7 @@ } case _CHECK_ATTR_MODULE: { - _Py_UopsSymbol *owner; + _Py_UopsLocalsPlusSlot owner; owner = stack_pointer[-1]; uint32_t dict_version = (uint32_t)this_instr->operand; (void)dict_version; @@ -1097,14 +1101,14 @@ } case _LOAD_ATTR_MODULE: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; uint16_t index = (uint16_t)this_instr->operand; (void)index; null = sym_new_null(ctx); - attr = NULL; + attr = (_Py_UopsLocalsPlusSlot){NULL, 0}; if (this_instr[-1].opcode == _NOP) { // Preceding _CHECK_ATTR_MODULE was removed: mod is const and dict is watched. assert(sym_is_const(owner)); @@ -1117,7 +1121,7 @@ attr = sym_new_const(ctx, res); } } - if (attr == NULL) { + if (attr.sym == NULL) { /* No conversion made. We don't know what `attr` is. */ attr = sym_new_not_null(ctx); } @@ -1133,9 +1137,9 @@ } case _LOAD_ATTR_WITH_HINT: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; uint16_t hint = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); @@ -1150,9 +1154,9 @@ } case _LOAD_ATTR_SLOT: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; uint16_t index = (uint16_t)this_instr->operand; attr = sym_new_not_null(ctx); @@ -1171,9 +1175,9 @@ } case _LOAD_ATTR_CLASS: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *null = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand; attr = sym_new_not_null(ctx); @@ -1188,15 +1192,15 @@ } case _LOAD_ATTR_PROPERTY_FRAME: { - _Py_UopsSymbol *owner; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot new_frame; owner = stack_pointer[-1]; PyObject *fget = (PyObject *)this_instr->operand; (void)fget; (void)owner; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-1] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-1] = new_frame; break; } @@ -1225,9 +1229,9 @@ } case _COMPARE_OP: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1245,9 +1249,9 @@ } case _COMPARE_OP_FLOAT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1260,9 +1264,9 @@ } case _COMPARE_OP_INT: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1275,9 +1279,9 @@ } case _COMPARE_OP_STR: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1290,9 +1294,9 @@ } case _IS_OP: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1305,9 +1309,9 @@ } case _CONTAINS_OP: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; (void)left; @@ -1320,7 +1324,7 @@ } case _CONTAINS_OP_SET: { - _Py_UopsSymbol *b; + _Py_UopsLocalsPlusSlot b; b = sym_new_not_null(ctx); stack_pointer[-2] = b; stack_pointer += -1; @@ -1329,7 +1333,7 @@ } case _CONTAINS_OP_DICT: { - _Py_UopsSymbol *b; + _Py_UopsLocalsPlusSlot b; b = sym_new_not_null(ctx); stack_pointer[-2] = b; stack_pointer += -1; @@ -1338,8 +1342,8 @@ } case _CHECK_EG_MATCH: { - _Py_UopsSymbol *rest; - _Py_UopsSymbol *match; + _Py_UopsLocalsPlusSlot rest; + _Py_UopsLocalsPlusSlot match; rest = sym_new_not_null(ctx); match = sym_new_not_null(ctx); stack_pointer[-2] = rest; @@ -1348,14 +1352,14 @@ } case _CHECK_EXC_MATCH: { - _Py_UopsSymbol *b; + _Py_UopsLocalsPlusSlot b; b = sym_new_not_null(ctx); stack_pointer[-1] = b; break; } case _IMPORT_NAME: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -1364,7 +1368,7 @@ } case _IMPORT_FROM: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1377,14 +1381,14 @@ /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */ case _IS_NONE: { - _Py_UopsSymbol *b; + _Py_UopsLocalsPlusSlot b; b = sym_new_not_null(ctx); stack_pointer[-1] = b; break; } case _GET_LEN: { - _Py_UopsSymbol *len; + _Py_UopsLocalsPlusSlot len; len = sym_new_not_null(ctx); stack_pointer[0] = len; stack_pointer += 1; @@ -1393,7 +1397,7 @@ } case _MATCH_CLASS: { - _Py_UopsSymbol *attrs; + _Py_UopsLocalsPlusSlot attrs; attrs = sym_new_not_null(ctx); stack_pointer[-3] = attrs; stack_pointer += -2; @@ -1402,7 +1406,7 @@ } case _MATCH_MAPPING: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1411,7 +1415,7 @@ } case _MATCH_SEQUENCE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1420,7 +1424,7 @@ } case _MATCH_KEYS: { - _Py_UopsSymbol *values_or_none; + _Py_UopsLocalsPlusSlot values_or_none; values_or_none = sym_new_not_null(ctx); stack_pointer[0] = values_or_none; stack_pointer += 1; @@ -1429,14 +1433,14 @@ } case _GET_ITER: { - _Py_UopsSymbol *iter; + _Py_UopsLocalsPlusSlot iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; } case _GET_YIELD_FROM_ITER: { - _Py_UopsSymbol *iter; + _Py_UopsLocalsPlusSlot iter; iter = sym_new_not_null(ctx); stack_pointer[-1] = iter; break; @@ -1445,7 +1449,7 @@ /* _FOR_ITER is not a viable micro-op for tier 2 */ case _FOR_ITER_TIER_TWO: { - _Py_UopsSymbol *next; + _Py_UopsLocalsPlusSlot next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1466,7 +1470,7 @@ } case _ITER_NEXT_LIST: { - _Py_UopsSymbol *next; + _Py_UopsLocalsPlusSlot next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1485,7 +1489,7 @@ } case _ITER_NEXT_TUPLE: { - _Py_UopsSymbol *next; + _Py_UopsLocalsPlusSlot next; next = sym_new_not_null(ctx); stack_pointer[0] = next; stack_pointer += 1; @@ -1504,8 +1508,8 @@ } case _ITER_NEXT_RANGE: { - _Py_UopsSymbol *iter; - _Py_UopsSymbol *next; + _Py_UopsLocalsPlusSlot iter; + _Py_UopsLocalsPlusSlot next; iter = stack_pointer[-1]; next = sym_new_type(ctx, &PyLong_Type); (void)iter; @@ -1522,9 +1526,9 @@ } case _LOAD_SPECIAL: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self_or_null; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self_or_null; owner = stack_pointer[-1]; (void)owner; attr = sym_new_not_null(ctx); @@ -1537,7 +1541,7 @@ } case _WITH_EXCEPT_START: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[0] = res; stack_pointer += 1; @@ -1546,8 +1550,8 @@ } case _PUSH_EXC_INFO: { - _Py_UopsSymbol *prev_exc; - _Py_UopsSymbol *new_exc; + _Py_UopsLocalsPlusSlot prev_exc; + _Py_UopsLocalsPlusSlot new_exc; prev_exc = sym_new_not_null(ctx); new_exc = sym_new_not_null(ctx); stack_pointer[-1] = prev_exc; @@ -1566,9 +1570,9 @@ } case _LOAD_ATTR_METHOD_WITH_VALUES: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand; (void)descr; @@ -1582,9 +1586,9 @@ } case _LOAD_ATTR_METHOD_NO_DICT: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand; (void)descr; @@ -1598,14 +1602,14 @@ } case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { - _Py_UopsSymbol *attr; + _Py_UopsLocalsPlusSlot attr; attr = sym_new_not_null(ctx); stack_pointer[-1] = attr; break; } case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { - _Py_UopsSymbol *attr; + _Py_UopsLocalsPlusSlot attr; attr = sym_new_not_null(ctx); stack_pointer[-1] = attr; break; @@ -1616,9 +1620,9 @@ } case _LOAD_ATTR_METHOD_LAZY_DICT: { - _Py_UopsSymbol *owner; - _Py_UopsSymbol *attr; - _Py_UopsSymbol *self = NULL; + _Py_UopsLocalsPlusSlot owner; + _Py_UopsLocalsPlusSlot attr; + _Py_UopsLocalsPlusSlot self = (_Py_UopsLocalsPlusSlot){NULL, 0}; owner = stack_pointer[-1]; PyObject *descr = (PyObject *)this_instr->operand; (void)descr; @@ -1632,11 +1636,11 @@ } case _MAYBE_EXPAND_METHOD: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; - _Py_UopsSymbol *func; - _Py_UopsSymbol *maybe_self; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot func; + _Py_UopsLocalsPlusSlot maybe_self; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1656,10 +1660,10 @@ /* _MONITOR_CALL is not a viable micro-op for tier 2 */ case _PY_FRAME_GENERAL: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot new_frame; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1667,9 +1671,9 @@ (void)callable; (void)self_or_null; (void)args; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -1684,8 +1688,8 @@ } case _EXPAND_METHOD: { - _Py_UopsSymbol *method; - _Py_UopsSymbol *self; + _Py_UopsLocalsPlusSlot method; + _Py_UopsLocalsPlusSlot self; method = sym_new_not_null(ctx); self = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = method; @@ -1698,7 +1702,7 @@ } case _CALL_NON_PY_GENERAL: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1707,8 +1711,8 @@ } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - _Py_UopsSymbol *null; - _Py_UopsSymbol *callable; + _Py_UopsLocalsPlusSlot null; + _Py_UopsLocalsPlusSlot callable; null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; sym_set_null(null); @@ -1717,9 +1721,9 @@ } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - _Py_UopsSymbol *callable; - _Py_UopsSymbol *func; - _Py_UopsSymbol *self; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot func; + _Py_UopsLocalsPlusSlot self; callable = stack_pointer[-2 - oparg]; (void)callable; func = sym_new_not_null(ctx); @@ -1739,8 +1743,8 @@ } case _CHECK_FUNCTION_EXACT_ARGS: { - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; sym_set_type(callable, &PyFunction_Type); @@ -1755,10 +1759,10 @@ } case _INIT_CALL_PY_EXACT_ARGS: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot new_frame; args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1784,7 +1788,7 @@ co = (PyCodeObject *)func->func_code; DPRINTF(3, "code=%p ", co); } - assert(self_or_null != NULL); + assert(self_or_null.sym != NULL); assert(args != NULL); if (sym_is_not_null(self_or_null)) { // Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM @@ -1792,25 +1796,25 @@ argcount++; } if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - new_frame = frame_new(ctx, co, 0, args, argcount); + new_frame.sym = (_Py_UopsSymbol *)frame_new(ctx, co, 0, args, argcount); } else { - new_frame = frame_new(ctx, co, 0, NULL, 0); + new_frame.sym = (_Py_UopsSymbol *)frame_new(ctx, co, 0, NULL, 0); } - stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-2 - oparg] = new_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; } case _PUSH_FRAME: { - _Py_UOpsAbstractFrame *new_frame; - new_frame = (_Py_UOpsAbstractFrame *)stack_pointer[-1]; + _Py_UopsLocalsPlusSlot new_frame; + new_frame = stack_pointer[-1]; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; - ctx->frame = new_frame; + ctx->frame = (_Py_UOpsAbstractFrame *)new_frame.sym; ctx->curr_frame_depth++; - stack_pointer = new_frame->stack_pointer; + stack_pointer = ((_Py_UOpsAbstractFrame *)new_frame.sym)->stack_pointer; co = get_code(this_instr); if (co == NULL) { // should be about to _EXIT_TRACE anyway @@ -1839,7 +1843,7 @@ } case _CALL_TYPE_1: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3] = res; stack_pointer += -2; @@ -1848,7 +1852,7 @@ } case _CALL_STR_1: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3] = res; stack_pointer += -2; @@ -1857,7 +1861,7 @@ } case _CALL_TUPLE_1: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3] = res; stack_pointer += -2; @@ -1866,11 +1870,11 @@ } case _CHECK_AND_ALLOCATE_OBJECT: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *null; - _Py_UopsSymbol *callable; - _Py_UopsSymbol *self; - _Py_UopsSymbol *init; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot self; + _Py_UopsLocalsPlusSlot init; args = &stack_pointer[-oparg]; null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; @@ -1888,19 +1892,19 @@ } case _CREATE_INIT_FRAME: { - _Py_UopsSymbol **args; - _Py_UopsSymbol *init; - _Py_UopsSymbol *self; - _Py_UOpsAbstractFrame *init_frame; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot init; + _Py_UopsLocalsPlusSlot self; + _Py_UopsLocalsPlusSlot init_frame; args = &stack_pointer[-oparg]; init = stack_pointer[-1 - oparg]; self = stack_pointer[-2 - oparg]; (void)self; (void)init; (void)args; - init_frame = NULL; + init_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)init_frame; + stack_pointer[-2 - oparg] = init_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -1913,7 +1917,7 @@ } case _CALL_BUILTIN_CLASS: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1922,7 +1926,7 @@ } case _CALL_BUILTIN_O: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1931,7 +1935,7 @@ } case _CALL_BUILTIN_FAST: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1940,7 +1944,7 @@ } case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1949,7 +1953,7 @@ } case _CALL_LEN: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1958,7 +1962,7 @@ } case _CALL_ISINSTANCE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1973,7 +1977,7 @@ } case _CALL_METHOD_DESCRIPTOR_O: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1982,7 +1986,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -1991,7 +1995,7 @@ } case _CALL_METHOD_DESCRIPTOR_NOARGS: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2000,7 +2004,7 @@ } case _CALL_METHOD_DESCRIPTOR_FAST: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = res; stack_pointer += -1 - oparg; @@ -2013,11 +2017,11 @@ /* _DO_CALL_KW is not a viable micro-op for tier 2 */ case _PY_FRAME_KW: { - _Py_UopsSymbol *kwnames; - _Py_UopsSymbol **args; - _Py_UopsSymbol *self_or_null; - _Py_UopsSymbol *callable; - _Py_UOpsAbstractFrame *new_frame; + _Py_UopsLocalsPlusSlot kwnames; + _Py_UopsLocalsPlusSlot *args; + _Py_UopsLocalsPlusSlot self_or_null; + _Py_UopsLocalsPlusSlot callable; + _Py_UopsLocalsPlusSlot new_frame; kwnames = stack_pointer[-1]; args = &stack_pointer[-1 - oparg]; self_or_null = stack_pointer[-2 - oparg]; @@ -2026,9 +2030,9 @@ (void)self_or_null; (void)args; (void)kwnames; - new_frame = NULL; + new_frame = (_Py_UopsLocalsPlusSlot){NULL, 0}; ctx->done = true; - stack_pointer[-3 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer[-3 - oparg] = new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); break; @@ -2043,9 +2047,9 @@ } case _EXPAND_METHOD_KW: { - _Py_UopsSymbol *method; - _Py_UopsSymbol *self; - _Py_UopsSymbol *kwnames; + _Py_UopsLocalsPlusSlot method; + _Py_UopsLocalsPlusSlot self; + _Py_UopsLocalsPlusSlot kwnames; method = sym_new_not_null(ctx); self = sym_new_not_null(ctx); kwnames = sym_new_not_null(ctx); @@ -2060,7 +2064,7 @@ } case _CALL_KW_NON_PY: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-3 - oparg] = res; stack_pointer += -2 - oparg; @@ -2073,14 +2077,14 @@ /* __DO_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ case _MAKE_FUNCTION: { - _Py_UopsSymbol *func; + _Py_UopsLocalsPlusSlot func; func = sym_new_not_null(ctx); stack_pointer[-1] = func; break; } case _SET_FUNCTION_ATTRIBUTE: { - _Py_UopsSymbol *func_st; + _Py_UopsLocalsPlusSlot func_st; func_st = sym_new_not_null(ctx); stack_pointer[-2] = func_st; stack_pointer += -1; @@ -2089,7 +2093,7 @@ } case _RETURN_GENERATOR: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; ctx->frame->stack_pointer = stack_pointer; frame_pop(ctx); stack_pointer = ctx->frame->stack_pointer; @@ -2113,7 +2117,7 @@ } case _BUILD_SLICE: { - _Py_UopsSymbol *slice; + _Py_UopsLocalsPlusSlot slice; slice = sym_new_not_null(ctx); stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; stack_pointer += -1 - ((oparg == 3) ? 1 : 0); @@ -2122,21 +2126,21 @@ } case _CONVERT_VALUE: { - _Py_UopsSymbol *result; + _Py_UopsLocalsPlusSlot result; result = sym_new_not_null(ctx); stack_pointer[-1] = result; break; } case _FORMAT_SIMPLE: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-1] = res; break; } case _FORMAT_WITH_SPEC: { - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot res; res = sym_new_not_null(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -2145,8 +2149,8 @@ } case _COPY: { - _Py_UopsSymbol *bottom; - _Py_UopsSymbol *top; + _Py_UopsLocalsPlusSlot bottom; + _Py_UopsLocalsPlusSlot top; bottom = stack_pointer[-1 - (oparg-1)]; assert(oparg > 0); top = bottom; @@ -2157,9 +2161,9 @@ } case _BINARY_OP: { - _Py_UopsSymbol *right; - _Py_UopsSymbol *left; - _Py_UopsSymbol *res; + _Py_UopsLocalsPlusSlot right; + _Py_UopsLocalsPlusSlot left; + _Py_UopsLocalsPlusSlot res; right = stack_pointer[-1]; left = stack_pointer[-2]; PyTypeObject *ltype = sym_get_type(left); @@ -2185,8 +2189,8 @@ } case _SWAP: { - _Py_UopsSymbol *top; - _Py_UopsSymbol *bottom; + _Py_UopsLocalsPlusSlot top; + _Py_UopsLocalsPlusSlot bottom; top = stack_pointer[-1]; bottom = stack_pointer[-2 - (oparg-2)]; stack_pointer[-2 - (oparg-2)] = top; @@ -2211,7 +2215,7 @@ /* _INSTRUMENTED_POP_JUMP_IF_NOT_NONE is not a viable micro-op for tier 2 */ case _GUARD_IS_TRUE_POP: { - _Py_UopsSymbol *flag; + _Py_UopsLocalsPlusSlot flag; flag = stack_pointer[-1]; if (sym_is_const(flag)) { PyObject *value = sym_get_const(flag); @@ -2224,7 +2228,7 @@ } case _GUARD_IS_FALSE_POP: { - _Py_UopsSymbol *flag; + _Py_UopsLocalsPlusSlot flag; flag = stack_pointer[-1]; if (sym_is_const(flag)) { PyObject *value = sym_get_const(flag); @@ -2237,7 +2241,7 @@ } case _GUARD_IS_NONE_POP: { - _Py_UopsSymbol *flag; + _Py_UopsLocalsPlusSlot flag; flag = stack_pointer[-1]; if (sym_is_const(flag)) { PyObject *value = sym_get_const(flag); @@ -2254,7 +2258,7 @@ } case _GUARD_IS_NOT_NONE_POP: { - _Py_UopsSymbol *flag; + _Py_UopsLocalsPlusSlot flag; flag = stack_pointer[-1]; if (sym_is_const(flag)) { PyObject *value = sym_get_const(flag); @@ -2300,7 +2304,7 @@ } case _LOAD_CONST_INLINE: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); stack_pointer[0] = value; @@ -2310,7 +2314,7 @@ } case _LOAD_CONST_INLINE_BORROW: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); stack_pointer[0] = value; @@ -2320,15 +2324,15 @@ } case _POP_TOP_LOAD_CONST_INLINE_BORROW: { - _Py_UopsSymbol *value; + _Py_UopsLocalsPlusSlot value; value = sym_new_not_null(ctx); stack_pointer[-1] = value; break; } case _LOAD_CONST_INLINE_WITH_NULL: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *null; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot null; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); null = sym_new_null(ctx); @@ -2340,8 +2344,8 @@ } case _LOAD_CONST_INLINE_BORROW_WITH_NULL: { - _Py_UopsSymbol *value; - _Py_UopsSymbol *null; + _Py_UopsLocalsPlusSlot value; + _Py_UopsLocalsPlusSlot null; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); null = sym_new_null(ctx); diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index b74f627235ad84..8e3e6e2d6c6022 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -35,10 +35,10 @@ def validate_uop(override: Uop, uop: Uop) -> None: def type_name(var: StackItem) -> str: if var.is_array(): - return f"_Py_UopsSymbol **" + return f"_Py_UopsLocalsPlusSlot *" if var.type: return var.type - return f"_Py_UopsSymbol *" + return f"_Py_UopsLocalsPlusSlot " def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: @@ -48,7 +48,7 @@ def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: if var.name not in variables: variables.add(var.name) if var.condition: - out.emit(f"{type_name(var)}{var.name} = NULL;\n") + out.emit(f"{type_name(var)}{var.name} = (_Py_UopsLocalsPlusSlot){{NULL, 0}};\n") else: out.emit(f"{type_name(var)}{var.name};\n") for var in uop.stack.outputs: @@ -57,7 +57,7 @@ def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: if var.name not in variables: variables.add(var.name) if var.condition: - out.emit(f"{type_name(var)}{var.name} = NULL;\n") + out.emit(f"{type_name(var)}{var.name} = (_Py_UopsLocalsPlusSlot){{NULL, 0}};\n") else: out.emit(f"{type_name(var)}{var.name};\n") @@ -141,7 +141,7 @@ def write_uop( local = Local.local(var) stack.push(local) out.start_line() - stack.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=True) + stack.flush(out, cast_type="", extract_bits=True) except StackError as ex: raise analysis_error(ex.args[0], uop.body[0]) diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 34bf597f2f552d..91ef47c65c46a6 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -249,7 +249,7 @@ def _do_emit( cast_type: str = "uintptr_t", extract_bits: bool = False, ) -> None: - cast = f"({cast_type})" if var.type else "" + cast = f"({cast_type})" if var.type and cast_type else "" bits = ".bits" if cast and not extract_bits else "" if var.condition == "0": return From d8732fc19b8bb4473da7cccde7938ac92b164279 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 3 Sep 2024 20:43:36 +0800 Subject: [PATCH 06/18] baby pe --- Include/internal/pycore_opcode_metadata.h | 6 +- Include/internal/pycore_optimizer.h | 3 + Include/internal/pycore_uop_metadata.h | 22 +-- Python/bytecodes.c | 6 +- Python/optimizer_analysis.c | 139 ++++++++++-------- Python/partial_evaluator_bytecodes.c | 13 +- Python/partial_evaluator_cases.c.h | 12 +- Tools/cases_generator/analyzer.py | 2 + Tools/cases_generator/generators_common.py | 2 + Tools/cases_generator/lexer.py | 1 + .../opcode_metadata_generator.py | 1 + 11 files changed, 127 insertions(+), 80 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 97a8e4a00a9d55..b0fa37381e9b69 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -974,6 +974,7 @@ enum InstructionFormat { #define HAS_PASSTHROUGH_FLAG (4096) #define HAS_OPARG_AND_1_FLAG (8192) #define HAS_ERROR_NO_POP_FLAG (16384) +#define HAS_STATIC_FLAG (32768) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) @@ -989,6 +990,7 @@ enum InstructionFormat { #define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG)) #define OPCODE_HAS_OPARG_AND_1(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_OPARG_AND_1_FLAG)) #define OPCODE_HAS_ERROR_NO_POP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_NO_POP_FLAG)) +#define OPCODE_HAS_STATIC(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_STATIC_FLAG)) #define OPARG_FULL 0 #define OPARG_CACHE_1 1 @@ -1145,7 +1147,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [LOAD_COMMON_CONSTANT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, @@ -1225,7 +1227,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [_DO_CALL_FUNCTION_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_NO_INTERRUPT] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 68b59d1c877785..906931dc2abffb 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -158,6 +158,7 @@ struct _Py_UopsSymbol { #define UOP_FORMAT_TARGET 0 #define UOP_FORMAT_JUMP 1 +void _PyUOpPrint(const _PyUOpInstruction*); static inline uint32_t uop_get_target(const _PyUOpInstruction *inst) { @@ -207,6 +208,8 @@ struct _Py_UOpsAbstractFrame { _Py_UopsLocalsPlusSlot *stack_pointer; _Py_UopsLocalsPlusSlot *stack; _Py_UopsLocalsPlusSlot *locals; + + void *instr_ptr; }; typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index e2cba4dc0dfc81..9300d48b025a65 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -19,20 +19,20 @@ extern int _PyUop_num_popped(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { - [_NOP] = HAS_PURE_FLAG, + [_NOP] = HAS_PURE_FLAG | HAS_STATIC_FLAG, [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_3] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_4] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_5] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_STATIC_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, @@ -47,7 +47,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_POP_TOP] = HAS_PURE_FLAG, + [_POP_TOP] = HAS_PURE_FLAG | HAS_STATIC_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_SEND] = HAS_PURE_FLAG, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 01e88a34d10b6a..a07fcf055332e7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -140,7 +140,7 @@ dummy_func( switch (opcode) { // BEGIN BYTECODES // - pure inst(NOP, (--)) { + pure _static inst(NOP, (--)) { } family(RESUME, 0) = { @@ -239,7 +239,7 @@ dummy_func( value = PyStackRef_DUP(value_s); } - replicate(8) pure inst(LOAD_FAST, (-- value)) { + replicate(8) _static inst(LOAD_FAST, (-- value)) { assert(!PyStackRef_IsNull(GETLOCAL(oparg))); value = PyStackRef_DUP(GETLOCAL(oparg)); } @@ -283,7 +283,7 @@ dummy_func( SETLOCAL(oparg2, value2); } - pure inst(POP_TOP, (value --)) { + pure _static inst(POP_TOP, (value --)) { DECREF_INPUTS(); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5758d404553565..c9061eabe96bed 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -39,12 +39,13 @@ extern void _PyUOpPrint(const _PyUOpInstruction *uop); static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; static inline int get_lltrace(void) { - char *uop_debug = Py_GETENV(DEBUG_ENV); - int lltrace = 0; - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = *uop_debug - '0'; // TODO: Parse an int and all that - } - return lltrace; + return 5; +// char *uop_debug = Py_GETENV(DEBUG_ENV); +// int lltrace = 0; +// if (uop_debug != NULL && *uop_debug >= '0') { +// lltrace = *uop_debug - '0'; // TODO: Parse an int and all that +// } +// return lltrace; } #define DPRINTF(level, ...) \ if (get_lltrace() >= (level)) { printf(__VA_ARGS__); } @@ -501,37 +502,41 @@ optimize_uops( static void reify_shadow_stack(_Py_UOpsContext *ctx) -{ +{; + bool wrote_inst = false; _PyUOpInstruction *trace_dest = ctx->trace_dest; for (_Py_UopsLocalsPlusSlot *sp = ctx->frame->stack; sp < ctx->frame->stack_pointer; sp++) { - _Py_UopsSymbol *sym = sp->sym; - assert(sym != NULL); + _Py_UopsLocalsPlusSlot slot = *sp; + assert(slot.sym != NULL); // Need reifying. -// if (sym->is_virtual) { -// if (sym->const_val) { -// WRITE_OP(&trace_dest[ctx->n_trace_dest], _Py_IsImmortal(sym->const_val) ? -// _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE, sym->locals_idx, (uint64_t)sym->const_val); -// } -// else if (sym->locals_idx >= 0) { -// printf("pe reified LOAD_FAST %d\n", sym->locals_idx); -// WRITE_OP(&trace_dest[ctx->n_trace_dest], _LOAD_FAST, sym->locals_idx, 0); -// } -// else if (sym_is_null(sym)) { -// WRITE_OP(&trace_dest[ctx->n_trace_dest], _PUSH_NULL, sym->locals_idx, 0); -// } -// else { -// // Is static but not a constant value of locals or NULL. -// // How is that possible? -// Py_UNREACHABLE(); -// } -// ctx->n_trace_dest++; -// sym->is_virtual = false; -// } -// if (ctx->n_trace_dest >= UOP_MAX_TRACE_LENGTH) { -// ctx->out_of_space = true; -// ctx->done = true; -// return; -// } + if (slot.is_virtual) { + wrote_inst = true; + if (slot.sym->const_val) { + WRITE_OP(&trace_dest[ctx->n_trace_dest], _Py_IsImmortal(slot.sym->const_val) ? + _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE, 0, (uint64_t)slot.sym->const_val); + } + else if (slot.sym->locals_idx >= 0) { + DPRINTF(3, "reifying LOAD_FAST %d\n", slot.sym->locals_idx); + WRITE_OP(&trace_dest[ctx->n_trace_dest], _LOAD_FAST, slot.sym->locals_idx, 0); + trace_dest[ctx->n_trace_dest].format = UOP_FORMAT_TARGET; + trace_dest[ctx->n_trace_dest].jump_target = 0; + } + else if (sym_is_null(slot)) { + WRITE_OP(&trace_dest[ctx->n_trace_dest], _PUSH_NULL, 0, 0); + } + else { + // Is static but not a constant value of locals or NULL. + // How is that possible? + Py_UNREACHABLE(); + } + ctx->n_trace_dest++; + if (ctx->n_trace_dest >= UOP_MAX_TRACE_LENGTH) { + ctx->out_of_space = true; + ctx->done = true; + return; + } + sp->is_virtual = false; + } } } @@ -570,21 +575,23 @@ partial_evaluate_uops( _PyUOpInstruction *this_instr = NULL; int i = 0; - bool prev_instr_is_truly_static = false; for (; !ctx->done; i++) { assert(i < trace_len); this_instr = &trace[i]; - trace_dest[ctx->n_trace_dest] = *this_instr; int oparg = this_instr->oparg; opcode = this_instr->opcode; uint64_t operand = this_instr->operand; _Py_UopsLocalsPlusSlot *stack_pointer = ctx->frame->stack_pointer; + _Py_UopsLocalsPlusSlot *old_sp = stack_pointer; // An instruction is candidate static if it has no escapes, and all its inputs // are static. // If so, whether it can be eliminated is up to whether it has an implementation. bool instr_is_truly_static = false; + if (!(_PyUop_Flags[opcode] & HAS_STATIC_FLAG)) { + reify_shadow_stack(ctx); + } #ifdef Py_DEBUG if (get_lltrace() >= 3) { @@ -609,29 +616,25 @@ partial_evaluate_uops( if (ctx->done) { break; } - // Always write these instructions for bookkeeping. - if (opcode == _CHECK_VALIDITY_AND_SET_IP || opcode == _SET_IP || opcode == _CHECK_VALIDITY) { - WRITE_OP(&trace_dest[ctx->n_trace_dest], opcode, oparg, operand); - ctx->n_trace_dest++; - } - // If the instruction is not static, - // reify the shadow stack, and write the op. - else if (!instr_is_truly_static) { - reify_shadow_stack(ctx); - WRITE_OP(&trace_dest[ctx->n_trace_dest], opcode, oparg, operand); + if (!instr_is_truly_static) { + trace_dest[ctx->n_trace_dest] = *this_instr; ctx->n_trace_dest++; + if (ctx->n_trace_dest >= UOP_MAX_TRACE_LENGTH) { + ctx->out_of_space = true; + ctx->done = true; + } } else { -//#ifdef Py_DEBUG -// if (get_lltrace() >= 3) { + // Inst is static. Nothing written :)! + assert((_PyUop_Flags[opcode] & HAS_STATIC_FLAG)); +#ifdef Py_DEBUG + if (get_lltrace() >= 3) { printf("%4d pe STATIC: ", (int) (this_instr - trace)); _PyUOpPrint(this_instr); printf("\n"); -// } -//#endif - // Inst is static. Nothing written :)! + } +#endif } - prev_instr_is_truly_static = instr_is_truly_static; } if (ctx->out_of_space) { DPRINTF(3, "\n"); @@ -658,10 +661,28 @@ partial_evaluate_uops( // That's the only time the PE's residual is valid. assert(ctx->n_trace_dest < UOP_MAX_TRACE_LENGTH); assert(is_terminator(this_instr)); - // Copy rest of trace to dest - memcpy(trace, trace_dest, ctx->n_trace_dest); + + // Copy rest of trace into trace_dest + memcpy(&trace_dest[ctx->n_trace_dest], &trace[i], (trace_len - i) * sizeof(_PyUOpInstruction )); + + printf("Optimized trace_dest (length %d):\n", ctx->n_trace_dest); + for (int x = 0; x < (trace_len - i) + ctx->n_trace_dest; x++) { + printf("%4d OPTIMIZED: ", x); + _PyUOpPrint(&trace_dest[x]); + printf("\n"); + } + + // Copy trace_dest into trace. + memcpy(trace, trace_dest, trace_len * sizeof(_PyUOpInstruction )); + printf("Optimized trace (length %d):\n", ctx->n_trace_dest); +// for (int i = 0; i < trace_len; i++) { +// printf("%4d OPTIMIZED: ", i); +// _PyUOpPrint(&trace[i]); +// printf("\n"); +// } + int trace_dest_len = ctx->n_trace_dest; _Py_uop_abstractcontext_fini(ctx); - return trace_len; + return (trace_len - i) + trace_dest_len; } error: @@ -720,7 +741,6 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } if (last->opcode == _LOAD_CONST_INLINE || last->opcode == _LOAD_CONST_INLINE_BORROW || -// last->opcode == _LOAD_FAST || last->opcode == _COPY ) { last->opcode = _NOP; @@ -787,6 +807,11 @@ _Py_uop_analyze_and_optimize( return length; } + // Help the PE by removing as many _CHECK_VALIDITY as possible, + // Since PE treats that as non-static since it can deopt arbitrarily. + + length = remove_unneeded_uops(buffer, length); + length = partial_evaluate_uops( _PyFrame_GetCode(frame), buffer, length, curr_stacklen, dependencies); @@ -795,8 +820,6 @@ _Py_uop_analyze_and_optimize( return length; } - length = remove_unneeded_uops(buffer, length); - assert(length > 0); OPT_STAT_INC(optimizer_successes); return length; diff --git a/Python/partial_evaluator_bytecodes.c b/Python/partial_evaluator_bytecodes.c index a5aff06f442e71..3116cbc3ff1841 100644 --- a/Python/partial_evaluator_bytecodes.c +++ b/Python/partial_evaluator_bytecodes.c @@ -76,6 +76,7 @@ dummy_func(void) { override op(_LOAD_FAST, (-- value)) { value = GETLOCAL(oparg); SET_STATIC_INST(); + value.is_virtual = true; } override op(_LOAD_FAST_AND_CLEAR, (-- value)) { @@ -95,11 +96,17 @@ dummy_func(void) { override op(_POP_TOP, (pop --)) { -// if (sym_is_virtual(pop)) { -// SET_STATIC_INST(); -// } + if (pop.is_virtual) { + SET_STATIC_INST(); + } + else { + reify_shadow_stack(ctx); + } } + override op(_NOP, (--)) { + SET_STATIC_INST(); + } override op (_CHECK_STACK_SPACE_OPERAND, ( -- )) { } diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index a9f9ea59ebece4..bd0a6a3e89d59e 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -4,6 +4,7 @@ // Do not edit! case _NOP: { + SET_STATIC_INST(); break; } @@ -40,6 +41,7 @@ _Py_UopsLocalsPlusSlot value; value = GETLOCAL(oparg); SET_STATIC_INST(); + value.is_virtual = true; stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -78,9 +80,13 @@ case _POP_TOP: { _Py_UopsLocalsPlusSlot pop; - // if (sym_is_virtual(pop)) { - // SET_STATIC_INST(); - // } + pop = stack_pointer[-1]; + if (pop.is_virtual) { + SET_STATIC_INST(); + } + else { + reify_shadow_stack(ctx); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 3cc36b6b5841bd..60d7accdc40f4d 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -23,6 +23,7 @@ class Properties: has_free: bool side_exit: bool pure: bool + static: bool = False tier: int | None = None oparg_and_1: bool = False const_oparg: int = -1 @@ -674,6 +675,7 @@ def compute_properties(op: parser.InstDef) -> Properties: and not has_free, has_free=has_free, pure="pure" in op.annotations, + static="_static" in op.annotations, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index dd4057c931ca19..560a6795c95380 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -262,6 +262,8 @@ def cflags(p: Properties) -> str: flags.append("HAS_PURE_FLAG") if p.oparg_and_1: flags.append("HAS_OPARG_AND_1_FLAG") + if p.static: + flags.append("HAS_STATIC_FLAG") if flags: return " | ".join(flags) else: diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index d5831593215f76..c171e0b94da5ed 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -226,6 +226,7 @@ def choice(*opts: str) -> str: "replicate", "tier1", "tier2", + "_static", } __all__ = [] diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 9b1bc98b5c08d7..58fffa3a5ac483 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -52,6 +52,7 @@ "PASSTHROUGH", "OPARG_AND_1", "ERROR_NO_POP", + "STATIC", ] From a6bc1a0dbdad294d204f167f27e14659b4cd8993 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 4 Sep 2024 03:16:25 +0800 Subject: [PATCH 07/18] dead store elimination --- Include/internal/pycore_opcode_metadata.h | 14 +++--- Include/internal/pycore_optimizer.h | 1 + Include/internal/pycore_uop_metadata.h | 22 ++++----- Lib/test/test_capi/test_opt.py | 14 ++++++ Python/bytecodes.c | 6 +-- Python/optimizer_analysis.c | 56 +++++++++-------------- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_symbols.c | 6 +++ Python/partial_evaluator_bytecodes.c | 15 ++++-- Python/partial_evaluator_cases.c.h | 9 +++- 10 files changed, 84 insertions(+), 61 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index b0fa37381e9b69..960d2fd4c915f8 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1083,7 +1083,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [DICT_MERGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DICT_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [END_ASYNC_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [END_FOR] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [END_FOR] = { true, INSTR_FMT_IX, 0 }, [END_SEND] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [EXIT_INIT_CHECK] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1169,13 +1169,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 }, [MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 }, - [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [NOP] = { true, INSTR_FMT_IX, 0 }, [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_TOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [POP_TOP] = { true, INSTR_FMT_IX, 0 }, [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, [PUSH_NULL] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1228,10 +1228,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_NO_INTERRUPT] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, - [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, - [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, - [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, - [SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [POP_BLOCK] = { true, -1, 0 }, + [SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG }, + [SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG }, + [SETUP_WITH] = { true, -1, HAS_ARG_FLAG }, [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, }; #endif diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 906931dc2abffb..f2e15987c83b57 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -256,6 +256,7 @@ extern bool _Py_uop_sym_has_type(_Py_UopsLocalsPlusSlot sym); extern bool _Py_uop_sym_matches_type(_Py_UopsLocalsPlusSlot sym, PyTypeObject *typ); extern bool _Py_uop_sym_matches_type_version(_Py_UopsLocalsPlusSlot sym, unsigned int version); extern void _Py_uop_sym_set_locals_idx(_Py_UopsLocalsPlusSlot sym, int locals_idx); +extern int _Py_uop_sym_get_locals_idx(_Py_UopsLocalsPlusSlot sym); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsLocalsPlusSlot sym, PyTypeObject *typ); diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 9300d48b025a65..8d1d4af29ae7d1 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -19,7 +19,7 @@ extern int _PyUop_num_popped(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { - [_NOP] = HAS_PURE_FLAG | HAS_STATIC_FLAG, + [_NOP] = HAS_STATIC_FLAG, [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, @@ -36,18 +36,18 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_0] = HAS_LOCAL_FLAG, - [_STORE_FAST_1] = HAS_LOCAL_FLAG, - [_STORE_FAST_2] = HAS_LOCAL_FLAG, - [_STORE_FAST_3] = HAS_LOCAL_FLAG, - [_STORE_FAST_4] = HAS_LOCAL_FLAG, - [_STORE_FAST_5] = HAS_LOCAL_FLAG, - [_STORE_FAST_6] = HAS_LOCAL_FLAG, - [_STORE_FAST_7] = HAS_LOCAL_FLAG, - [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_STORE_FAST_0] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_1] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_2] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_3] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_4] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_5] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_6] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_7] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_STATIC_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_POP_TOP] = HAS_PURE_FLAG | HAS_STATIC_FLAG, + [_POP_TOP] = HAS_STATIC_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_SEND] = HAS_PURE_FLAG, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 449d589b984de8..302c214b74b28b 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1495,5 +1495,19 @@ def thing(a): self.assertEqual(list(iter_opnames(ex)).count("_POP_TOP"), 0) self.assertTrue(ex.is_valid()) + def test_pe_dead_store_elimination(self): + def thing(a): + x = 0 + for i in range(20): + x = x + return i + + + res, ex = self._run_with_optimizer(thing, 1) + self.assertEqual(res, 19) + self.assertIsNotNone(ex) + self.assertEqual(list(iter_opnames(ex)).count("_LOAD_FAST_1"), 0) + self.assertTrue(ex.is_valid()) + if __name__ == "__main__": unittest.main() diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a07fcf055332e7..337af0a35c821c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -140,7 +140,7 @@ dummy_func( switch (opcode) { // BEGIN BYTECODES // - pure _static inst(NOP, (--)) { + _static inst(NOP, (--)) { } family(RESUME, 0) = { @@ -261,7 +261,7 @@ dummy_func( value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); } - replicate(8) inst(STORE_FAST, (value --)) { + replicate(8) _static inst(STORE_FAST, (value --)) { SETLOCAL(oparg, value); } @@ -283,7 +283,7 @@ dummy_func( SETLOCAL(oparg2, value2); } - pure _static inst(POP_TOP, (value --)) { + _static inst(POP_TOP, (value --)) { DECREF_INPUTS(); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index c9061eabe96bed..569ad14f161d9d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -39,13 +39,12 @@ extern void _PyUOpPrint(const _PyUOpInstruction *uop); static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; static inline int get_lltrace(void) { - return 5; -// char *uop_debug = Py_GETENV(DEBUG_ENV); -// int lltrace = 0; -// if (uop_debug != NULL && *uop_debug >= '0') { -// lltrace = *uop_debug - '0'; // TODO: Parse an int and all that -// } -// return lltrace; + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + return lltrace; } #define DPRINTF(level, ...) \ if (get_lltrace() >= (level)) { printf(__VA_ARGS__); } @@ -326,6 +325,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_set_locals_idx _Py_uop_sym_set_locals_idx +#define sym_get_locals_idx _Py_uop_sym_get_locals_idx #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness #define frame_new _Py_uop_frame_new @@ -512,16 +512,20 @@ reify_shadow_stack(_Py_UOpsContext *ctx) if (slot.is_virtual) { wrote_inst = true; if (slot.sym->const_val) { + DPRINTF(3, "reifying LOAD_CONST_INLINE\n"); WRITE_OP(&trace_dest[ctx->n_trace_dest], _Py_IsImmortal(slot.sym->const_val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE, 0, (uint64_t)slot.sym->const_val); + trace_dest[ctx->n_trace_dest].format = UOP_FORMAT_TARGET; + trace_dest[ctx->n_trace_dest].target = 100; } else if (slot.sym->locals_idx >= 0) { DPRINTF(3, "reifying LOAD_FAST %d\n", slot.sym->locals_idx); WRITE_OP(&trace_dest[ctx->n_trace_dest], _LOAD_FAST, slot.sym->locals_idx, 0); trace_dest[ctx->n_trace_dest].format = UOP_FORMAT_TARGET; - trace_dest[ctx->n_trace_dest].jump_target = 0; + trace_dest[ctx->n_trace_dest].target = 100; } else if (sym_is_null(slot)) { + DPRINTF(3, "reifying PUSH_NULL\n"); WRITE_OP(&trace_dest[ctx->n_trace_dest], _PUSH_NULL, 0, 0); } else { @@ -613,9 +617,6 @@ partial_evaluate_uops( DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); - if (ctx->done) { - break; - } if (!instr_is_truly_static) { trace_dest[ctx->n_trace_dest] = *this_instr; ctx->n_trace_dest++; @@ -625,16 +626,17 @@ partial_evaluate_uops( } } else { - // Inst is static. Nothing written :)! - assert((_PyUop_Flags[opcode] & HAS_STATIC_FLAG)); + // Inst is static. Nothing written :)! + assert((_PyUop_Flags[opcode] & HAS_STATIC_FLAG)); #ifdef Py_DEBUG if (get_lltrace() >= 3) { - printf("%4d pe STATIC: ", (int) (this_instr - trace)); - _PyUOpPrint(this_instr); - printf("\n"); + printf("%4d pe -STATIC-\n", (int) (this_instr - trace)); } #endif } + if (ctx->done) { + break; + } } if (ctx->out_of_space) { DPRINTF(3, "\n"); @@ -661,28 +663,13 @@ partial_evaluate_uops( // That's the only time the PE's residual is valid. assert(ctx->n_trace_dest < UOP_MAX_TRACE_LENGTH); assert(is_terminator(this_instr)); - - // Copy rest of trace into trace_dest - memcpy(&trace_dest[ctx->n_trace_dest], &trace[i], (trace_len - i) * sizeof(_PyUOpInstruction )); - - printf("Optimized trace_dest (length %d):\n", ctx->n_trace_dest); - for (int x = 0; x < (trace_len - i) + ctx->n_trace_dest; x++) { - printf("%4d OPTIMIZED: ", x); - _PyUOpPrint(&trace_dest[x]); - printf("\n"); - } + assert(ctx->n_trace_dest <= trace_len); // Copy trace_dest into trace. - memcpy(trace, trace_dest, trace_len * sizeof(_PyUOpInstruction )); - printf("Optimized trace (length %d):\n", ctx->n_trace_dest); -// for (int i = 0; i < trace_len; i++) { -// printf("%4d OPTIMIZED: ", i); -// _PyUOpPrint(&trace[i]); -// printf("\n"); -// } + memcpy(trace, trace_dest, ctx->n_trace_dest * sizeof(_PyUOpInstruction )); int trace_dest_len = ctx->n_trace_dest; _Py_uop_abstractcontext_fini(ctx); - return (trace_len - i) + trace_dest_len; + return trace_dest_len; } error: @@ -741,6 +728,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } if (last->opcode == _LOAD_CONST_INLINE || last->opcode == _LOAD_CONST_INLINE_BORROW || + last->opcode == _LOAD_FAST || last->opcode == _COPY ) { last->opcode = _NOP; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 77514cfd0627ff..9877deadad2941 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -89,7 +89,7 @@ dummy_func(void) { GETLOCAL(oparg) = temp; } - op(_STORE_FAST, (value --)) { + _static op(_STORE_FAST, (value --)) { GETLOCAL(oparg) = value; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 2be4477cc11e34..22145330dd3b64 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -316,6 +316,12 @@ _Py_uop_sym_set_locals_idx(_Py_UopsLocalsPlusSlot sym, int locals_idx) sym.sym->locals_idx = locals_idx; } +int +_Py_uop_sym_get_locals_idx(_Py_UopsLocalsPlusSlot sym) +{ + return sym.sym->locals_idx; +} + int _Py_uop_sym_truthiness(_Py_UopsLocalsPlusSlot sym) { diff --git a/Python/partial_evaluator_bytecodes.c b/Python/partial_evaluator_bytecodes.c index 3116cbc3ff1841..d4f9a2083a61fc 100644 --- a/Python/partial_evaluator_bytecodes.c +++ b/Python/partial_evaluator_bytecodes.c @@ -75,6 +75,7 @@ dummy_func(void) { override op(_LOAD_FAST, (-- value)) { value = GETLOCAL(oparg); + sym_set_locals_idx(value, oparg); SET_STATIC_INST(); value.is_virtual = true; } @@ -84,16 +85,22 @@ dummy_func(void) { GETLOCAL(oparg) = sym_new_null(ctx); } - override op(_STORE_FAST, (value --)) { - GETLOCAL(oparg) = value; - sym_set_locals_idx(value, oparg); - } override op(_LOAD_CONST, (-- value)) { // Should've all been converted by specializer. Py_UNREACHABLE(); } + override op(_STORE_FAST, (value --)) { + // Gets rid of stores by the same load + if (value.is_virtual && oparg == sym_get_locals_idx(value)) { + SET_STATIC_INST(); + } + else { + reify_shadow_stack(ctx); + } + GETLOCAL(oparg) = value; + } override op(_POP_TOP, (pop --)) { if (pop.is_virtual) { diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index bd0a6a3e89d59e..9a1e5e75f9b250 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -40,6 +40,7 @@ case _LOAD_FAST: { _Py_UopsLocalsPlusSlot value; value = GETLOCAL(oparg); + sym_set_locals_idx(value, oparg); SET_STATIC_INST(); value.is_virtual = true; stack_pointer[0] = value; @@ -71,8 +72,14 @@ case _STORE_FAST: { _Py_UopsLocalsPlusSlot value; value = stack_pointer[-1]; + // Gets rid of stores by the same load + if (value.is_virtual && oparg == sym_get_locals_idx(value)) { + SET_STATIC_INST(); + } + else { + reify_shadow_stack(ctx); + } GETLOCAL(oparg) = value; - sym_set_locals_idx(value, oparg); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; From 6a6dbcec5382ccd9a949ec4979c28b28162da5a1 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 4 Sep 2024 03:28:27 +0800 Subject: [PATCH 08/18] cleanup --- Include/internal/pycore_optimizer.h | 1 - Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 4 ++-- Python/optimizer_analysis.c | 8 ++------ Python/partial_evaluator_bytecodes.c | 18 ++---------------- Python/partial_evaluator_cases.c.h | 1 + 6 files changed, 9 insertions(+), 27 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index f2e15987c83b57..adde51eebe6423 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -192,7 +192,6 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) // handle before rejoining the rest of the program. #define MAX_CHAIN_DEPTH 4 - typedef struct _Py_UopsSymbol _Py_UopsSymbol; typedef struct _Py_UopsLocalsPlusSlot { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 337af0a35c821c..9ae41136ad95a0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4711,7 +4711,7 @@ dummy_func( if (lltrace >= 2) { printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", + printf(", exit %ld, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.as_counter, (int)(target - _PyCode_CODE(code)), _PyOpcode_OpName[target->op.code]); @@ -4801,7 +4801,7 @@ dummy_func( if (lltrace >= 2) { printf("DYNAMIC EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", + printf(", exit %ld, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.as_counter, (int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))), _PyOpcode_OpName[target->op.code]); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0de5c8a0408d8c..7751f0e751ac2c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5277,7 +5277,7 @@ if (lltrace >= 2) { printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", + printf(", exit %ld, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.as_counter, (int)(target - _PyCode_CODE(code)), _PyOpcode_OpName[target->op.code]); @@ -5416,7 +5416,7 @@ if (lltrace >= 2) { printf("DYNAMIC EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", + printf(", exit %ld, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.as_counter, (int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))), _PyOpcode_OpName[target->op.code]); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 569ad14f161d9d..508525cae48dc4 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -502,15 +502,13 @@ optimize_uops( static void reify_shadow_stack(_Py_UOpsContext *ctx) -{; - bool wrote_inst = false; +{ _PyUOpInstruction *trace_dest = ctx->trace_dest; for (_Py_UopsLocalsPlusSlot *sp = ctx->frame->stack; sp < ctx->frame->stack_pointer; sp++) { _Py_UopsLocalsPlusSlot slot = *sp; assert(slot.sym != NULL); // Need reifying. if (slot.is_virtual) { - wrote_inst = true; if (slot.sym->const_val) { DPRINTF(3, "reifying LOAD_CONST_INLINE\n"); WRITE_OP(&trace_dest[ctx->n_trace_dest], _Py_IsImmortal(slot.sym->const_val) ? @@ -585,9 +583,7 @@ partial_evaluate_uops( int oparg = this_instr->oparg; opcode = this_instr->opcode; - uint64_t operand = this_instr->operand; _Py_UopsLocalsPlusSlot *stack_pointer = ctx->frame->stack_pointer; - _Py_UopsLocalsPlusSlot *old_sp = stack_pointer; // An instruction is candidate static if it has no escapes, and all its inputs // are static. @@ -797,8 +793,8 @@ _Py_uop_analyze_and_optimize( // Help the PE by removing as many _CHECK_VALIDITY as possible, // Since PE treats that as non-static since it can deopt arbitrarily. - length = remove_unneeded_uops(buffer, length); + assert(length > 0); length = partial_evaluate_uops( _PyFrame_GetCode(frame), buffer, diff --git a/Python/partial_evaluator_bytecodes.c b/Python/partial_evaluator_bytecodes.c index d4f9a2083a61fc..b6e634f0fbd7f1 100644 --- a/Python/partial_evaluator_bytecodes.c +++ b/Python/partial_evaluator_bytecodes.c @@ -48,21 +48,6 @@ extern PyCodeObject *get_code(_PyUOpInstruction *op); static int dummy_func(void) { - PyCodeObject *co; - int oparg; - _Py_UopsSymbol *flag; - _Py_UopsSymbol *left; - _Py_UopsSymbol *right; - _Py_UopsSymbol *value; - _Py_UopsSymbol *res; - _Py_UopsSymbol *iter; - _Py_UopsSymbol *top; - _Py_UopsSymbol *bottom; - _Py_UOpsAbstractFrame *frame; - _Py_UOpsAbstractFrame *new_frame; - _Py_UOpsContext *ctx; - _PyUOpInstruction *this_instr; - // BEGIN BYTECODES // override op(_LOAD_FAST_CHECK, (-- value)) { @@ -115,7 +100,8 @@ dummy_func(void) { SET_STATIC_INST(); } - override op (_CHECK_STACK_SPACE_OPERAND, ( -- )) { + override op(_CHECK_STACK_SPACE_OPERAND, ( -- )) { + (void)framesize; } // END BYTECODES // diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index 9a1e5e75f9b250..89ad5fb7fa7d1f 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -2298,6 +2298,7 @@ case _CHECK_STACK_SPACE_OPERAND: { uint32_t framesize = (uint32_t)this_instr->operand; + (void)framesize; break; } From 7562c75b8679e6a2e17368a64a833145c3e05b96 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 4 Sep 2024 03:36:53 +0800 Subject: [PATCH 09/18] Create 2024-09-04-03-36-48.gh-issue-120619.yE7lQb.rst --- .../2024-09-04-03-36-48.gh-issue-120619.yE7lQb.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-09-04-03-36-48.gh-issue-120619.yE7lQb.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-09-04-03-36-48.gh-issue-120619.yE7lQb.rst b/Misc/NEWS.d/next/Core and Builtins/2024-09-04-03-36-48.gh-issue-120619.yE7lQb.rst new file mode 100644 index 00000000000000..6029db0998ebe9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-09-04-03-36-48.gh-issue-120619.yE7lQb.rst @@ -0,0 +1 @@ +Set up a tier 2 partial evaluation pass. Patch by Ken Jin. From 5200bceb3186ceae7609ccac22711badd21503eb Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 4 Sep 2024 03:39:44 +0800 Subject: [PATCH 10/18] fix tests --- Lib/test/test_generated_cases.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 7f821810aea00c..11c456e7707f69 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1196,15 +1196,15 @@ def test_overridden_abstract_args(self): """ output = """ case OP: { - _Py_UopsSymbol *arg1; - _Py_UopsSymbol *out; + _Py_UopsLocalsPlusSlot arg1; + _Py_UopsLocalsPlusSlot out; eggs(); stack_pointer[-1] = out; break; } case OP2: { - _Py_UopsSymbol *out; + _Py_UopsLocalsPlusSlot out; out = sym_new_not_null(ctx); stack_pointer[-1] = out; break; @@ -1228,15 +1228,15 @@ def test_no_overridden_case(self): """ output = """ case OP: { - _Py_UopsSymbol *out; + _Py_UopsLocalsPlusSlot out; out = sym_new_not_null(ctx); stack_pointer[-1] = out; break; } case OP2: { - _Py_UopsSymbol *arg1; - _Py_UopsSymbol *out; + _Py_UopsLocalsPlusSlot arg1; + _Py_UopsLocalsPlusSlot out; stack_pointer[-1] = out; break; } From 23e4b7cbf0e217f43c94abd1b6371d13c1e2e222 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 13 Sep 2024 21:03:33 +0800 Subject: [PATCH 11/18] Update partial_evaluator_cases.c.h --- Python/partial_evaluator_cases.c.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index 89ad5fb7fa7d1f..18945827b2b974 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -1702,11 +1702,13 @@ case _EXPAND_METHOD: { _Py_UopsLocalsPlusSlot method; - _Py_UopsLocalsPlusSlot self; + _Py_UopsLocalsPlusSlot *self; + self = &stack_pointer[-1 - oparg]; method = sym_new_not_null(ctx); - self = sym_new_not_null(ctx); + for (int _i = 1; --_i >= 0;) { + self[_i] = sym_new_not_null(ctx); + } stack_pointer[-2 - oparg] = method; - stack_pointer[-1 - oparg] = self; break; } @@ -2061,13 +2063,15 @@ case _EXPAND_METHOD_KW: { _Py_UopsLocalsPlusSlot method; - _Py_UopsLocalsPlusSlot self; + _Py_UopsLocalsPlusSlot *self; _Py_UopsLocalsPlusSlot kwnames; + self = &stack_pointer[-2 - oparg]; method = sym_new_not_null(ctx); - self = sym_new_not_null(ctx); + for (int _i = 1; --_i >= 0;) { + self[_i] = sym_new_not_null(ctx); + } kwnames = sym_new_not_null(ctx); stack_pointer[-3 - oparg] = method; - stack_pointer[-2 - oparg] = self; stack_pointer[-1] = kwnames; break; } From 0a1d12ec98533bdae01f2e06318d6426eb8c0ee3 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 15 Sep 2024 18:11:10 +0800 Subject: [PATCH 12/18] Update partial_evaluator_cases.c.h --- Python/partial_evaluator_cases.c.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index 18945827b2b974..de42d05691572c 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -845,11 +845,13 @@ } case _LOAD_GLOBAL: { - _Py_UopsLocalsPlusSlot res; + _Py_UopsLocalsPlusSlot *res; _Py_UopsLocalsPlusSlot null = (_Py_UopsLocalsPlusSlot){NULL, 0}; - res = sym_new_not_null(ctx); + res = &stack_pointer[0]; + for (int _i = 1; --_i >= 0;) { + res[_i] = sym_new_not_null(ctx); + } null = sym_new_null(ctx); - stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; stack_pointer += 1 + (oparg & 1); assert(WITHIN_STACK_BOUNDS()); From bfbf6080ee4529788465aa4e03b19b76c1fc5e9d Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 16 Sep 2024 00:07:06 +0800 Subject: [PATCH 13/18] reorder reifications --- Python/optimizer_analysis.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 508525cae48dc4..f8f63d82e38ea4 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -509,16 +509,16 @@ reify_shadow_stack(_Py_UOpsContext *ctx) assert(slot.sym != NULL); // Need reifying. if (slot.is_virtual) { - if (slot.sym->const_val) { - DPRINTF(3, "reifying LOAD_CONST_INLINE\n"); - WRITE_OP(&trace_dest[ctx->n_trace_dest], _Py_IsImmortal(slot.sym->const_val) ? - _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE, 0, (uint64_t)slot.sym->const_val); + if (slot.sym->locals_idx >= 0) { + DPRINTF(3, "reifying LOAD_FAST %d\n", slot.sym->locals_idx); + WRITE_OP(&trace_dest[ctx->n_trace_dest], _LOAD_FAST, slot.sym->locals_idx, 0); trace_dest[ctx->n_trace_dest].format = UOP_FORMAT_TARGET; trace_dest[ctx->n_trace_dest].target = 100; } - else if (slot.sym->locals_idx >= 0) { - DPRINTF(3, "reifying LOAD_FAST %d\n", slot.sym->locals_idx); - WRITE_OP(&trace_dest[ctx->n_trace_dest], _LOAD_FAST, slot.sym->locals_idx, 0); + else if (slot.sym->const_val) { + DPRINTF(3, "reifying LOAD_CONST_INLINE\n"); + WRITE_OP(&trace_dest[ctx->n_trace_dest], _Py_IsImmortal(slot.sym->const_val) ? + _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE, 0, (uint64_t)slot.sym->const_val); trace_dest[ctx->n_trace_dest].format = UOP_FORMAT_TARGET; trace_dest[ctx->n_trace_dest].target = 100; } From 8fe279ef6ef6fbf02704ba5c600daccd0b7dc912 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 16 Sep 2024 00:43:14 +0800 Subject: [PATCH 14/18] fix c-analzyer --- Tools/c-analyzer/cpython/_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 3a73f65f8ff7b3..6c8250f67073b4 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -84,6 +84,7 @@ def clean_lines(text): Python/generated_cases.c.h Python/executor_cases.c.h Python/optimizer_cases.c.h +Python/partial_evaluator_cases.c.h # not actually source Python/bytecodes.c From 4361821fab8c38d0d54c0d95790e79b6717bf6c6 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 18 Sep 2024 02:32:13 +0800 Subject: [PATCH 15/18] remove static, remove some pure --- Include/internal/pycore_opcode_metadata.h | 34 ++++---- Include/internal/pycore_uop_metadata.h | 86 +++++++++---------- Python/bytecodes.c | 44 +++++----- Python/optimizer_analysis.c | 4 +- Python/optimizer_bytecodes.c | 2 +- Tools/cases_generator/analyzer.py | 2 - Tools/cases_generator/generators_common.py | 2 - Tools/cases_generator/lexer.py | 1 - .../opcode_metadata_generator.py | 1 - 9 files changed, 84 insertions(+), 92 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 3e9fa5588673be..f49f19cd6407a4 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -974,7 +974,6 @@ enum InstructionFormat { #define HAS_PASSTHROUGH_FLAG (4096) #define HAS_OPARG_AND_1_FLAG (8192) #define HAS_ERROR_NO_POP_FLAG (16384) -#define HAS_STATIC_FLAG (32768) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) @@ -990,7 +989,6 @@ enum InstructionFormat { #define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG)) #define OPCODE_HAS_OPARG_AND_1(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_OPARG_AND_1_FLAG)) #define OPCODE_HAS_ERROR_NO_POP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_NO_POP_FLAG)) -#define OPCODE_HAS_STATIC(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_STATIC_FLAG)) #define OPARG_FULL 0 #define OPARG_CACHE_1 1 @@ -1072,7 +1070,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [CONTAINS_OP_DICT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CONTAINS_OP_SET] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CONVERT_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, - [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, + [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [COPY_FREE_VARS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [DELETE_ATTR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DELETE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1083,8 +1081,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [DICT_MERGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DICT_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [END_ASYNC_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [END_FOR] = { true, INSTR_FMT_IX, 0 }, - [END_SEND] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [END_FOR] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [END_SEND] = { true, INSTR_FMT_IX, 0 }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [EXIT_INIT_CHECK] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1147,7 +1145,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [LOAD_COMMON_CONSTANT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, @@ -1169,15 +1167,15 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 }, [MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 }, - [NOP] = { true, INSTR_FMT_IX, 0 }, + [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_TOP] = { true, INSTR_FMT_IX, 0 }, + [POP_TOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, - [PUSH_NULL] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [PUSH_NULL] = { true, INSTR_FMT_IX, 0 }, [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RERAISE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RESERVED] = { true, INSTR_FMT_IX, 0 }, @@ -1197,7 +1195,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_EXIT_FLAG }, [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG }, - [STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [STORE_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [STORE_FAST_STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [STORE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1206,7 +1204,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [STORE_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, - [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, + [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, @@ -1216,7 +1214,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNARY_NEGATIVE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [UNARY_NOT] = { true, INSTR_FMT_IX, 0 }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, @@ -1227,12 +1225,12 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [_DO_CALL_FUNCTION_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_NO_INTERRUPT] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, - [POP_BLOCK] = { true, -1, 0 }, - [SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG }, - [SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG }, - [SETUP_WITH] = { true, -1, HAS_ARG_FLAG }, - [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, + [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, }; #endif diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 06f51803ab13db..6616936f9ca692 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -19,39 +19,39 @@ extern int _PyUop_num_popped(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { - [_NOP] = HAS_STATIC_FLAG, + [_NOP] = HAS_PURE_FLAG, [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_LOAD_FAST_3] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_LOAD_FAST_4] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_LOAD_FAST_5] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_0] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_STORE_FAST_1] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_STORE_FAST_2] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_STORE_FAST_3] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_STORE_FAST_4] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_STORE_FAST_5] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_STORE_FAST_6] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_STORE_FAST_7] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, - [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_POP_TOP] = HAS_STATIC_FLAG, - [_PUSH_NULL] = HAS_PURE_FLAG, - [_END_SEND] = HAS_PURE_FLAG, + [_POP_TOP] = HAS_PURE_FLAG, + [_PUSH_NULL] = 0, + [_END_SEND] = 0, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_UNARY_NOT] = HAS_PURE_FLAG, + [_UNARY_NOT] = 0, [_TO_BOOL] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_TO_BOOL_BOOL] = HAS_EXIT_FLAG, [_TO_BOOL_INT] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, @@ -63,17 +63,17 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_BOTH_INT] = HAS_EXIT_FLAG, [_GUARD_NOS_INT] = HAS_EXIT_FLAG, [_GUARD_TOS_INT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG, + [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG, + [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG, [_GUARD_BOTH_FLOAT] = HAS_EXIT_FLAG, [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, - [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, - [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_MULTIPLY_FLOAT] = 0, + [_BINARY_OP_ADD_FLOAT] = 0, + [_BINARY_OP_SUBTRACT_FLOAT] = 0, [_GUARD_BOTH_UNICODE] = HAS_EXIT_FLAG, - [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -215,12 +215,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = HAS_DEOPT_FLAG, [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_0] = HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_1] = HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_2] = HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_3] = HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_4] = HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_0] = 0, + [_INIT_CALL_PY_EXACT_ARGS_1] = 0, + [_INIT_CALL_PY_EXACT_ARGS_2] = 0, + [_INIT_CALL_PY_EXACT_ARGS_3] = 0, + [_INIT_CALL_PY_EXACT_ARGS_4] = 0, + [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG, [_PUSH_FRAME] = 0, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -252,9 +252,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_COPY] = HAS_ARG_FLAG, [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_SWAP] = HAS_ARG_FLAG, [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_NONE_POP] = HAS_EXIT_FLAG, @@ -265,11 +265,11 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_ESCAPES_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, - [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, - [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, - [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, - [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, - [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE] = 0, + [_LOAD_CONST_INLINE_BORROW] = 0, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = 0, + [_LOAD_CONST_INLINE_WITH_NULL] = 0, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8f00fdb0158d29..6c1fd178ef9184 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -140,7 +140,7 @@ dummy_func( switch (opcode) { // BEGIN BYTECODES // - _static inst(NOP, (--)) { + pure inst(NOP, (--)) { } family(RESUME, 0) = { @@ -239,7 +239,7 @@ dummy_func( value = PyStackRef_DUP(value_s); } - replicate(8) _static inst(LOAD_FAST, (-- value)) { + replicate(8) pure inst(LOAD_FAST, (-- value)) { assert(!PyStackRef_IsNull(GETLOCAL(oparg))); value = PyStackRef_DUP(GETLOCAL(oparg)); } @@ -261,7 +261,7 @@ dummy_func( value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); } - replicate(8) _static inst(STORE_FAST, (value --)) { + replicate(8) pure inst(STORE_FAST, (value --)) { SETLOCAL(oparg, value); } @@ -283,11 +283,11 @@ dummy_func( SETLOCAL(oparg2, value2); } - _static inst(POP_TOP, (value --)) { + pure inst(POP_TOP, (value --)) { DECREF_INPUTS(); } - pure inst(PUSH_NULL, (-- res)) { + inst(PUSH_NULL, (-- res)) { res = PyStackRef_NULL; } @@ -305,7 +305,7 @@ dummy_func( DECREF_INPUTS(); } - pure inst(END_SEND, (receiver, value -- value)) { + inst(END_SEND, (receiver, value -- value)) { (void)receiver; PyStackRef_CLOSE(receiver); } @@ -328,7 +328,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure inst(UNARY_NOT, (value -- res)) { + inst(UNARY_NOT, (value -- res)) { assert(PyStackRef_BoolCheck(value)); res = PyStackRef_Is(value, PyStackRef_False) ? PyStackRef_True : PyStackRef_False; @@ -458,7 +458,7 @@ dummy_func( EXIT_IF(!PyLong_CheckExact(value_o)); } - pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { + op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -470,7 +470,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure op(_BINARY_OP_ADD_INT, (left, right -- res)) { + op(_BINARY_OP_ADD_INT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -482,7 +482,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { + op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -518,7 +518,7 @@ dummy_func( EXIT_IF(!PyFloat_CheckExact(value_o)); } - pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { + op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -531,7 +531,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { + op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -544,7 +544,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { + op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -572,7 +572,7 @@ dummy_func( EXIT_IF(!PyUnicode_CheckExact(right_o)); } - pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { + op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -3474,7 +3474,7 @@ dummy_func( DEOPT_IF(tstate->py_recursion_remaining <= 1); } - replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null[1], args[oparg] -- new_frame: _PyInterpreterFrame*)) { + replicate(5) op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null[1], args[oparg] -- new_frame: _PyInterpreterFrame*)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); int has_self = !PyStackRef_IsNull(self_or_null[0]); STAT_INC(CALL, hit); @@ -4475,7 +4475,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { + inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = PyStackRef_DUP(bottom); } @@ -4507,7 +4507,7 @@ dummy_func( macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + _BINARY_OP; - pure inst(SWAP, (bottom, unused[oparg-2], top -- + inst(SWAP, (bottom, unused[oparg-2], top -- top, unused[oparg-2], bottom)) { assert(oparg >= 2); } @@ -4747,25 +4747,25 @@ dummy_func( DEOPT_IF(!current_executor->vm_data.valid); } - tier2 pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + tier2 op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { value = PyStackRef_FromPyObjectNew(ptr); } - tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + tier2 op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { value = PyStackRef_FromPyObjectImmortal(ptr); } - tier2 pure op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { + tier2 op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { PyStackRef_CLOSE(pop); value = PyStackRef_FromPyObjectImmortal(ptr); } - tier2 pure op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + tier2 op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { value = PyStackRef_FromPyObjectNew(ptr); null = PyStackRef_NULL; } - tier2 pure op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + tier2 op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { value = PyStackRef_FromPyObjectImmortal(ptr); null = PyStackRef_NULL; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index f8f63d82e38ea4..9f63b98198eb3c 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -589,7 +589,7 @@ partial_evaluate_uops( // are static. // If so, whether it can be eliminated is up to whether it has an implementation. bool instr_is_truly_static = false; - if (!(_PyUop_Flags[opcode] & HAS_STATIC_FLAG)) { + if (!(_PyUop_Flags[opcode] & HAS_PURE_FLAG)) { reify_shadow_stack(ctx); } @@ -623,7 +623,7 @@ partial_evaluate_uops( } else { // Inst is static. Nothing written :)! - assert((_PyUop_Flags[opcode] & HAS_STATIC_FLAG)); + assert((_PyUop_Flags[opcode] & HAS_PURE_FLAG)); #ifdef Py_DEBUG if (get_lltrace() >= 3) { printf("%4d pe -STATIC-\n", (int) (this_instr - trace)); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 9877deadad2941..77514cfd0627ff 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -89,7 +89,7 @@ dummy_func(void) { GETLOCAL(oparg) = temp; } - _static op(_STORE_FAST, (value --)) { + op(_STORE_FAST, (value --)) { GETLOCAL(oparg) = value; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 60d7accdc40f4d..3cc36b6b5841bd 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -23,7 +23,6 @@ class Properties: has_free: bool side_exit: bool pure: bool - static: bool = False tier: int | None = None oparg_and_1: bool = False const_oparg: int = -1 @@ -675,7 +674,6 @@ def compute_properties(op: parser.InstDef) -> Properties: and not has_free, has_free=has_free, pure="pure" in op.annotations, - static="_static" in op.annotations, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index c98cfce0b7adc4..2f8fccec2ea409 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -265,8 +265,6 @@ def cflags(p: Properties) -> str: flags.append("HAS_PURE_FLAG") if p.oparg_and_1: flags.append("HAS_OPARG_AND_1_FLAG") - if p.static: - flags.append("HAS_STATIC_FLAG") if flags: return " | ".join(flags) else: diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index c171e0b94da5ed..d5831593215f76 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -226,7 +226,6 @@ def choice(*opts: str) -> str: "replicate", "tier1", "tier2", - "_static", } __all__ = [] diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 58fffa3a5ac483..9b1bc98b5c08d7 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -52,7 +52,6 @@ "PASSTHROUGH", "OPARG_AND_1", "ERROR_NO_POP", - "STATIC", ] From 5df786d61c4513d1e6fd965568de1c857c3ee51a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 18 Sep 2024 02:34:05 +0800 Subject: [PATCH 16/18] Revert "remove static, remove some pure" This reverts commit 4361821fab8c38d0d54c0d95790e79b6717bf6c6. --- Include/internal/pycore_opcode_metadata.h | 34 ++++---- Include/internal/pycore_uop_metadata.h | 86 +++++++++---------- Python/bytecodes.c | 44 +++++----- Python/optimizer_analysis.c | 4 +- Python/optimizer_bytecodes.c | 2 +- Tools/cases_generator/analyzer.py | 2 + Tools/cases_generator/generators_common.py | 2 + Tools/cases_generator/lexer.py | 1 + .../opcode_metadata_generator.py | 1 + 9 files changed, 92 insertions(+), 84 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index f49f19cd6407a4..3e9fa5588673be 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -974,6 +974,7 @@ enum InstructionFormat { #define HAS_PASSTHROUGH_FLAG (4096) #define HAS_OPARG_AND_1_FLAG (8192) #define HAS_ERROR_NO_POP_FLAG (16384) +#define HAS_STATIC_FLAG (32768) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) @@ -989,6 +990,7 @@ enum InstructionFormat { #define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG)) #define OPCODE_HAS_OPARG_AND_1(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_OPARG_AND_1_FLAG)) #define OPCODE_HAS_ERROR_NO_POP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_NO_POP_FLAG)) +#define OPCODE_HAS_STATIC(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_STATIC_FLAG)) #define OPARG_FULL 0 #define OPARG_CACHE_1 1 @@ -1070,7 +1072,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [CONTAINS_OP_DICT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CONTAINS_OP_SET] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CONVERT_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, - [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, [COPY_FREE_VARS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [DELETE_ATTR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DELETE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1081,8 +1083,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [DICT_MERGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DICT_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [END_ASYNC_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [END_FOR] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, - [END_SEND] = { true, INSTR_FMT_IX, 0 }, + [END_FOR] = { true, INSTR_FMT_IX, 0 }, + [END_SEND] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [EXIT_INIT_CHECK] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1145,7 +1147,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [LOAD_COMMON_CONSTANT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, @@ -1167,15 +1169,15 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 }, [MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 }, - [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [NOP] = { true, INSTR_FMT_IX, 0 }, [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_TOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [POP_TOP] = { true, INSTR_FMT_IX, 0 }, [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, - [PUSH_NULL] = { true, INSTR_FMT_IX, 0 }, + [PUSH_NULL] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RERAISE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RESERVED] = { true, INSTR_FMT_IX, 0 }, @@ -1195,7 +1197,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_EXIT_FLAG }, [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG }, - [STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [STORE_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [STORE_FAST_STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [STORE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1204,7 +1206,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [STORE_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, - [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, [TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, @@ -1214,7 +1216,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNARY_NEGATIVE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNARY_NOT] = { true, INSTR_FMT_IX, 0 }, + [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, @@ -1225,12 +1227,12 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[264] = { [_DO_CALL_FUNCTION_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_NO_INTERRUPT] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, - [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, - [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, - [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, - [SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, - [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [POP_BLOCK] = { true, -1, 0 }, + [SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG }, + [SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG }, + [SETUP_WITH] = { true, -1, HAS_ARG_FLAG }, + [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, }; #endif diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 6616936f9ca692..06f51803ab13db 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -19,39 +19,39 @@ extern int _PyUop_num_popped(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { - [_NOP] = HAS_PURE_FLAG, + [_NOP] = HAS_STATIC_FLAG, [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_3] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_4] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_5] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_STATIC_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_STORE_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_0] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_1] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_2] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_3] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_4] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_5] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_6] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST_7] = HAS_LOCAL_FLAG | HAS_STATIC_FLAG, + [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_STATIC_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_POP_TOP] = HAS_PURE_FLAG, - [_PUSH_NULL] = 0, - [_END_SEND] = 0, + [_POP_TOP] = HAS_STATIC_FLAG, + [_PUSH_NULL] = HAS_PURE_FLAG, + [_END_SEND] = HAS_PURE_FLAG, [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_UNARY_NOT] = 0, + [_UNARY_NOT] = HAS_PURE_FLAG, [_TO_BOOL] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_TO_BOOL_BOOL] = HAS_EXIT_FLAG, [_TO_BOOL_INT] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, @@ -63,17 +63,17 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_BOTH_INT] = HAS_EXIT_FLAG, [_GUARD_NOS_INT] = HAS_EXIT_FLAG, [_GUARD_TOS_INT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG, - [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG, - [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG, + [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_GUARD_BOTH_FLOAT] = HAS_EXIT_FLAG, [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_FLOAT] = 0, - [_BINARY_OP_ADD_FLOAT] = 0, - [_BINARY_OP_SUBTRACT_FLOAT] = 0, + [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, [_GUARD_BOTH_UNICODE] = HAS_EXIT_FLAG, - [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG, + [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -215,12 +215,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = HAS_DEOPT_FLAG, [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_0] = 0, - [_INIT_CALL_PY_EXACT_ARGS_1] = 0, - [_INIT_CALL_PY_EXACT_ARGS_2] = 0, - [_INIT_CALL_PY_EXACT_ARGS_3] = 0, - [_INIT_CALL_PY_EXACT_ARGS_4] = 0, - [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_0] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_1] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_2] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_3] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_4] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_PUSH_FRAME] = 0, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -252,9 +252,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_COPY] = HAS_ARG_FLAG, + [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_SWAP] = HAS_ARG_FLAG, + [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_NONE_POP] = HAS_EXIT_FLAG, @@ -265,11 +265,11 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_ESCAPES_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, - [_LOAD_CONST_INLINE] = 0, - [_LOAD_CONST_INLINE_BORROW] = 0, - [_POP_TOP_LOAD_CONST_INLINE_BORROW] = 0, - [_LOAD_CONST_INLINE_WITH_NULL] = 0, - [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0, + [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6c1fd178ef9184..8f00fdb0158d29 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -140,7 +140,7 @@ dummy_func( switch (opcode) { // BEGIN BYTECODES // - pure inst(NOP, (--)) { + _static inst(NOP, (--)) { } family(RESUME, 0) = { @@ -239,7 +239,7 @@ dummy_func( value = PyStackRef_DUP(value_s); } - replicate(8) pure inst(LOAD_FAST, (-- value)) { + replicate(8) _static inst(LOAD_FAST, (-- value)) { assert(!PyStackRef_IsNull(GETLOCAL(oparg))); value = PyStackRef_DUP(GETLOCAL(oparg)); } @@ -261,7 +261,7 @@ dummy_func( value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); } - replicate(8) pure inst(STORE_FAST, (value --)) { + replicate(8) _static inst(STORE_FAST, (value --)) { SETLOCAL(oparg, value); } @@ -283,11 +283,11 @@ dummy_func( SETLOCAL(oparg2, value2); } - pure inst(POP_TOP, (value --)) { + _static inst(POP_TOP, (value --)) { DECREF_INPUTS(); } - inst(PUSH_NULL, (-- res)) { + pure inst(PUSH_NULL, (-- res)) { res = PyStackRef_NULL; } @@ -305,7 +305,7 @@ dummy_func( DECREF_INPUTS(); } - inst(END_SEND, (receiver, value -- value)) { + pure inst(END_SEND, (receiver, value -- value)) { (void)receiver; PyStackRef_CLOSE(receiver); } @@ -328,7 +328,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - inst(UNARY_NOT, (value -- res)) { + pure inst(UNARY_NOT, (value -- res)) { assert(PyStackRef_BoolCheck(value)); res = PyStackRef_Is(value, PyStackRef_False) ? PyStackRef_True : PyStackRef_False; @@ -458,7 +458,7 @@ dummy_func( EXIT_IF(!PyLong_CheckExact(value_o)); } - op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { + pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -470,7 +470,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_BINARY_OP_ADD_INT, (left, right -- res)) { + pure op(_BINARY_OP_ADD_INT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -482,7 +482,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { + pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -518,7 +518,7 @@ dummy_func( EXIT_IF(!PyFloat_CheckExact(value_o)); } - op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { + pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -531,7 +531,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { + pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -544,7 +544,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { + pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -572,7 +572,7 @@ dummy_func( EXIT_IF(!PyUnicode_CheckExact(right_o)); } - op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { + pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); @@ -3474,7 +3474,7 @@ dummy_func( DEOPT_IF(tstate->py_recursion_remaining <= 1); } - replicate(5) op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null[1], args[oparg] -- new_frame: _PyInterpreterFrame*)) { + replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null[1], args[oparg] -- new_frame: _PyInterpreterFrame*)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); int has_self = !PyStackRef_IsNull(self_or_null[0]); STAT_INC(CALL, hit); @@ -4475,7 +4475,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { + pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = PyStackRef_DUP(bottom); } @@ -4507,7 +4507,7 @@ dummy_func( macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + _BINARY_OP; - inst(SWAP, (bottom, unused[oparg-2], top -- + pure inst(SWAP, (bottom, unused[oparg-2], top -- top, unused[oparg-2], bottom)) { assert(oparg >= 2); } @@ -4747,25 +4747,25 @@ dummy_func( DEOPT_IF(!current_executor->vm_data.valid); } - tier2 op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + tier2 pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { value = PyStackRef_FromPyObjectNew(ptr); } - tier2 op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { value = PyStackRef_FromPyObjectImmortal(ptr); } - tier2 op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { + tier2 pure op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { PyStackRef_CLOSE(pop); value = PyStackRef_FromPyObjectImmortal(ptr); } - tier2 op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { + tier2 pure op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) { value = PyStackRef_FromPyObjectNew(ptr); null = PyStackRef_NULL; } - tier2 op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { + tier2 pure op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) { value = PyStackRef_FromPyObjectImmortal(ptr); null = PyStackRef_NULL; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 9f63b98198eb3c..f8f63d82e38ea4 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -589,7 +589,7 @@ partial_evaluate_uops( // are static. // If so, whether it can be eliminated is up to whether it has an implementation. bool instr_is_truly_static = false; - if (!(_PyUop_Flags[opcode] & HAS_PURE_FLAG)) { + if (!(_PyUop_Flags[opcode] & HAS_STATIC_FLAG)) { reify_shadow_stack(ctx); } @@ -623,7 +623,7 @@ partial_evaluate_uops( } else { // Inst is static. Nothing written :)! - assert((_PyUop_Flags[opcode] & HAS_PURE_FLAG)); + assert((_PyUop_Flags[opcode] & HAS_STATIC_FLAG)); #ifdef Py_DEBUG if (get_lltrace() >= 3) { printf("%4d pe -STATIC-\n", (int) (this_instr - trace)); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 77514cfd0627ff..9877deadad2941 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -89,7 +89,7 @@ dummy_func(void) { GETLOCAL(oparg) = temp; } - op(_STORE_FAST, (value --)) { + _static op(_STORE_FAST, (value --)) { GETLOCAL(oparg) = value; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 3cc36b6b5841bd..60d7accdc40f4d 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -23,6 +23,7 @@ class Properties: has_free: bool side_exit: bool pure: bool + static: bool = False tier: int | None = None oparg_and_1: bool = False const_oparg: int = -1 @@ -674,6 +675,7 @@ def compute_properties(op: parser.InstDef) -> Properties: and not has_free, has_free=has_free, pure="pure" in op.annotations, + static="_static" in op.annotations, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 2f8fccec2ea409..c98cfce0b7adc4 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -265,6 +265,8 @@ def cflags(p: Properties) -> str: flags.append("HAS_PURE_FLAG") if p.oparg_and_1: flags.append("HAS_OPARG_AND_1_FLAG") + if p.static: + flags.append("HAS_STATIC_FLAG") if flags: return " | ".join(flags) else: diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index d5831593215f76..c171e0b94da5ed 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -226,6 +226,7 @@ def choice(*opts: str) -> str: "replicate", "tier1", "tier2", + "_static", } __all__ = [] diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 9b1bc98b5c08d7..58fffa3a5ac483 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -52,6 +52,7 @@ "PASSTHROUGH", "OPARG_AND_1", "ERROR_NO_POP", + "STATIC", ] From e8b402f60973227a84a725c32c998e2c0b891f21 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 18 Sep 2024 03:31:22 +0800 Subject: [PATCH 17/18] make LOAD_CONST static as well --- Include/internal/pycore_uop_metadata.h | 4 ++-- Python/bytecodes.c | 4 ++-- Python/optimizer_analysis.c | 12 ++++++------ Python/partial_evaluator_bytecodes.c | 22 +++++++++++++--------- Python/partial_evaluator_cases.c.h | 5 +++++ 5 files changed, 28 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 06f51803ab13db..d15a72da3885bb 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -265,8 +265,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_ESCAPES_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, - [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, - [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE] = HAS_STATIC_FLAG, + [_LOAD_CONST_INLINE_BORROW] = HAS_STATIC_FLAG, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8f00fdb0158d29..b3c6aabe02cf1e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4747,11 +4747,11 @@ dummy_func( DEOPT_IF(!current_executor->vm_data.valid); } - tier2 pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + tier2 _static op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { value = PyStackRef_FromPyObjectNew(ptr); } - tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + tier2 _static op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { value = PyStackRef_FromPyObjectImmortal(ptr); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index f8f63d82e38ea4..8edf4d75adadc9 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -509,21 +509,22 @@ reify_shadow_stack(_Py_UOpsContext *ctx) assert(slot.sym != NULL); // Need reifying. if (slot.is_virtual) { + sp->is_virtual = false; if (slot.sym->locals_idx >= 0) { - DPRINTF(3, "reifying LOAD_FAST %d\n", slot.sym->locals_idx); + DPRINTF(3, "reifying %d LOAD_FAST %d\n", (int)(sp - ctx->frame->stack), slot.sym->locals_idx); WRITE_OP(&trace_dest[ctx->n_trace_dest], _LOAD_FAST, slot.sym->locals_idx, 0); trace_dest[ctx->n_trace_dest].format = UOP_FORMAT_TARGET; - trace_dest[ctx->n_trace_dest].target = 100; + trace_dest[ctx->n_trace_dest].target = 0; } else if (slot.sym->const_val) { - DPRINTF(3, "reifying LOAD_CONST_INLINE\n"); + DPRINTF(3, "reifying %d LOAD_CONST_INLINE %p\n", (int)(sp - ctx->frame->stack), slot.sym->const_val); WRITE_OP(&trace_dest[ctx->n_trace_dest], _Py_IsImmortal(slot.sym->const_val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE, 0, (uint64_t)slot.sym->const_val); trace_dest[ctx->n_trace_dest].format = UOP_FORMAT_TARGET; - trace_dest[ctx->n_trace_dest].target = 100; + trace_dest[ctx->n_trace_dest].target = 0; } else if (sym_is_null(slot)) { - DPRINTF(3, "reifying PUSH_NULL\n"); + DPRINTF(3, "reifying %d PUSH_NULL\n", (int)(sp - ctx->frame->stack)); WRITE_OP(&trace_dest[ctx->n_trace_dest], _PUSH_NULL, 0, 0); } else { @@ -537,7 +538,6 @@ reify_shadow_stack(_Py_UOpsContext *ctx) ctx->done = true; return; } - sp->is_virtual = false; } } } diff --git a/Python/partial_evaluator_bytecodes.c b/Python/partial_evaluator_bytecodes.c index b6e634f0fbd7f1..8ef020e0b7250b 100644 --- a/Python/partial_evaluator_bytecodes.c +++ b/Python/partial_evaluator_bytecodes.c @@ -50,14 +50,6 @@ dummy_func(void) { // BEGIN BYTECODES // - override op(_LOAD_FAST_CHECK, (-- value)) { - value = GETLOCAL(oparg); - // We guarantee this will error - just bail and don't optimize it. - if (sym_is_null(value)) { - ctx->done = true; - } - } - override op(_LOAD_FAST, (-- value)) { value = GETLOCAL(oparg); sym_set_locals_idx(value, oparg); @@ -70,12 +62,23 @@ dummy_func(void) { GETLOCAL(oparg) = sym_new_null(ctx); } - override op(_LOAD_CONST, (-- value)) { // Should've all been converted by specializer. Py_UNREACHABLE(); } + override op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + value = sym_new_const(ctx, ptr); + SET_STATIC_INST(); + value.is_virtual = true; + } + + override op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + value = sym_new_const(ctx, ptr); + SET_STATIC_INST(); + value.is_virtual = true; + } + override op(_STORE_FAST, (value --)) { // Gets rid of stores by the same load if (value.is_virtual && oparg == sym_get_locals_idx(value)) { @@ -83,6 +86,7 @@ dummy_func(void) { } else { reify_shadow_stack(ctx); + value.is_virtual = false; } GETLOCAL(oparg) = value; } diff --git a/Python/partial_evaluator_cases.c.h b/Python/partial_evaluator_cases.c.h index de42d05691572c..6781a9f3a13373 100644 --- a/Python/partial_evaluator_cases.c.h +++ b/Python/partial_evaluator_cases.c.h @@ -78,6 +78,7 @@ } else { reify_shadow_stack(ctx); + value.is_virtual = false; } GETLOCAL(oparg) = value; stack_pointer += -1; @@ -2327,6 +2328,8 @@ _Py_UopsLocalsPlusSlot value; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); + SET_STATIC_INST(); + value.is_virtual = true; stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -2337,6 +2340,8 @@ _Py_UopsLocalsPlusSlot value; PyObject *ptr = (PyObject *)this_instr->operand; value = sym_new_const(ctx, ptr); + SET_STATIC_INST(); + value.is_virtual = true; stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); From 096d392ff9c4c9e764f068cd0b1f57424a2424aa Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 2 Oct 2024 23:52:34 +0800 Subject: [PATCH 18/18] fix makefile --- Makefile.pre.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index e98e7b8c8b22d3..028a0272ef4d28 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2051,7 +2051,8 @@ Python/optimizer.o: \ Python/optimizer_analysis.o: \ $(srcdir)/Include/internal/pycore_opcode_metadata.h \ $(srcdir)/Include/internal/pycore_optimizer.h \ - $(srcdir)/Python/optimizer_cases.c.h + $(srcdir)/Python/optimizer_cases.c.h \ + $(srcdir)/Python/partial_evaluator_cases.c.h Python/frozen.o: $(FROZEN_FILES_OUT)