Skip to content

Commit 51fc725

Browse files
authored
gh-104584: Baby steps towards generating and executing traces (#105924)
Added a new, experimental, tracing optimizer and interpreter (a.k.a. "tier 2"). This currently pessimizes, so don't use yet -- this is infrastructure so we can experiment with optimizing passes. To enable it, pass ``-Xuops`` or set ``PYTHONUOPS=1``. To get debug output, set ``PYTHONUOPSDEBUG=N`` where ``N`` is a debug level (0-4, where 0 is no debug output and 4 is excessively verbose). All of this code is likely to change dramatically before the 3.13 feature freeze. But this is a first step.
1 parent d3af83b commit 51fc725

21 files changed

+2559
-305
lines changed

.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ Parser/token.c generated
8686
Programs/test_frozenmain.h generated
8787
Python/Python-ast.c generated
8888
Python/generated_cases.c.h generated
89+
Python/executor_cases.c.h generated
8990
Python/opcode_targets.h generated
9091
Python/stdlib_module_names.h generated
9192
Tools/peg_generator/pegen/grammar_parser.py generated

Include/cpython/optimizer.h

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ extern _PyOptimizerObject _PyOptimizer_Default;
4545

4646
/* For testing */
4747
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
48+
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
4849

4950
#define OPTIMIZER_BITS_IN_COUNTER 4
5051

Include/internal/pycore_uops.h

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#ifndef Py_INTERNAL_UOPS_H
2+
#define Py_INTERNAL_UOPS_H
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
#ifndef Py_BUILD_CORE
8+
# error "this header requires Py_BUILD_CORE define"
9+
#endif
10+
11+
#define _Py_UOP_MAX_TRACE_LENGTH 16
12+
13+
typedef struct {
14+
int opcode;
15+
uint64_t operand; // Sometimes oparg, sometimes a cache entry
16+
} _PyUOpInstruction;
17+
18+
typedef struct {
19+
_PyExecutorObject base;
20+
_PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH]; // TODO: variable length
21+
} _PyUOpExecutorObject;
22+
23+
_PyInterpreterFrame *_PyUopExecute(
24+
_PyExecutorObject *executor,
25+
_PyInterpreterFrame *frame,
26+
PyObject **stack_pointer);
27+
28+
#ifdef __cplusplus
29+
}
30+
#endif
31+
#endif /* !Py_INTERNAL_UOPS_H */

Include/pystats.h

+3
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ typedef struct _object_stats {
7171
uint64_t type_cache_dunder_misses;
7272
uint64_t type_cache_collisions;
7373
uint64_t optimization_attempts;
74+
uint64_t optimization_traces_created;
75+
uint64_t optimization_traces_executed;
76+
uint64_t optimization_uops_executed;
7477
} ObjectStats;
7578

7679
typedef struct _stats {

Makefile.pre.in

+9-12
Original file line numberDiff line numberDiff line change
@@ -1542,19 +1542,9 @@ regen-opcode-targets:
15421542

15431543
.PHONY: regen-cases
15441544
regen-cases:
1545-
# Regenerate Python/generated_cases.c.h
1546-
# and Python/opcode_metadata.h
1547-
# from Python/bytecodes.c
1548-
# using Tools/cases_generator/generate_cases.py
1545+
# Regenerate various files from Python/bytecodes.c
15491546
PYTHONPATH=$(srcdir)/Tools/cases_generator \
1550-
$(PYTHON_FOR_REGEN) \
1551-
$(srcdir)/Tools/cases_generator/generate_cases.py \
1552-
--emit-line-directives \
1553-
-o $(srcdir)/Python/generated_cases.c.h.new \
1554-
-m $(srcdir)/Python/opcode_metadata.h.new \
1555-
$(srcdir)/Python/bytecodes.c
1556-
$(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new
1557-
$(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new
1547+
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/generate_cases.py -l
15581548

15591549
Python/compile.o: $(srcdir)/Python/opcode_metadata.h
15601550

@@ -1565,6 +1555,13 @@ Python/ceval.o: \
15651555
$(srcdir)/Python/opcode_metadata.h \
15661556
$(srcdir)/Python/opcode_targets.h
15671557

1558+
Python/flowgraph.o: \
1559+
$(srcdir)/Python/opcode_metadata.h
1560+
1561+
Python/optimizer.o: \
1562+
$(srcdir)/Python/executor_cases.c.h \
1563+
$(srcdir)/Python/opcode_metadata.h
1564+
15681565
Python/frozen.o: $(FROZEN_FILES_OUT)
15691566

15701567
# Generate DTrace probe macros, then rename them (PYTHON_ -> PyDTrace_) to
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added a new, experimental, tracing optimizer and interpreter (a.k.a. "tier 2"). This currently pessimizes, so don't use yet -- this is infrastructure so we can experiment with optimizing passes. To enable it, pass ``-Xuops`` or set ``PYTHONUOPS=1``. To get debug output, set ``PYTHONUOPSDEBUG=N`` where ``N`` is a debug level (0-4, where 0 is no debug output and 4 is excessively verbose).

Modules/_testinternalcapi.c

+7
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,12 @@ get_counter_optimizer(PyObject *self, PyObject *arg)
830830
return PyUnstable_Optimizer_NewCounter();
831831
}
832832

833+
static PyObject *
834+
get_uop_optimizer(PyObject *self, PyObject *arg)
835+
{
836+
return PyUnstable_Optimizer_NewUOpOptimizer();
837+
}
838+
833839
static PyObject *
834840
set_optimizer(PyObject *self, PyObject *opt)
835841
{
@@ -994,6 +1000,7 @@ static PyMethodDef module_functions[] = {
9941000
{"get_optimizer", get_optimizer, METH_NOARGS, NULL},
9951001
{"set_optimizer", set_optimizer, METH_O, NULL},
9961002
{"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL},
1003+
{"get_uop_optimizer", get_uop_optimizer, METH_NOARGS, NULL},
9971004
{"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc),
9981005
METH_VARARGS | METH_KEYWORDS},
9991006
// {"pending_fd_identify", pending_fd_identify, METH_VARARGS, NULL},

Python/bytecodes.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,6 @@
5252
#define family(name, ...) static int family_##name
5353
#define pseudo(name) static int pseudo_##name
5454

55-
typedef PyObject *(*convertion_func_ptr)(PyObject *);
56-
5755
// Dummy variables for stack effects.
5856
static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub;
5957
static PyObject *container, *start, *stop, *v, *lhs, *rhs, *res2;
@@ -2182,7 +2180,7 @@ dummy_func(
21822180
frame = executor->execute(executor, frame, stack_pointer);
21832181
if (frame == NULL) {
21842182
frame = cframe.current_frame;
2185-
goto error;
2183+
goto resume_with_error;
21862184
}
21872185
goto resume_frame;
21882186
}

Python/ceval.c

+129-8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "pycore_sysmodule.h" // _PySys_Audit()
2323
#include "pycore_tuple.h" // _PyTuple_ITEMS()
2424
#include "pycore_typeobject.h" // _PySuper_Lookup()
25+
#include "pycore_uops.h" // _PyUOpExecutorObject
2526
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
2627

2728
#include "pycore_dict.h"
@@ -223,14 +224,6 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
223224
static void
224225
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
225226

226-
typedef PyObject *(*convertion_func_ptr)(PyObject *);
227-
228-
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
229-
[FVC_STR] = PyObject_Str,
230-
[FVC_REPR] = PyObject_Repr,
231-
[FVC_ASCII] = PyObject_ASCII
232-
};
233-
234227
#define UNBOUNDLOCAL_ERROR_MSG \
235228
"cannot access local variable '%s' where it is not associated with a value"
236229
#define UNBOUNDFREE_ERROR_MSG \
@@ -2771,3 +2764,131 @@ void Py_LeaveRecursiveCall(void)
27712764
{
27722765
_Py_LeaveRecursiveCall();
27732766
}
2767+
2768+
///////////////////// Experimental UOp Interpreter /////////////////////
2769+
2770+
// UPDATE_MISS_STATS (called by DEOPT_IF) uses next_instr
2771+
// TODO: Make it do something useful
2772+
#undef UPDATE_MISS_STATS
2773+
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
2774+
2775+
_PyInterpreterFrame *
2776+
_PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer)
2777+
{
2778+
#ifdef LLTRACE
2779+
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
2780+
int lltrace = 0;
2781+
if (uop_debug != NULL && *uop_debug >= '0') {
2782+
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
2783+
}
2784+
if (lltrace >= 2) {
2785+
PyCodeObject *code = _PyFrame_GetCode(frame);
2786+
_Py_CODEUNIT *instr = frame->prev_instr + 1;
2787+
fprintf(stderr,
2788+
"Entering _PyUopExecute for %s (%s:%d) at offset %ld\n",
2789+
PyUnicode_AsUTF8(code->co_qualname),
2790+
PyUnicode_AsUTF8(code->co_filename),
2791+
code->co_firstlineno,
2792+
(long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
2793+
}
2794+
#endif
2795+
2796+
PyThreadState *tstate = _PyThreadState_GET();
2797+
_PyUOpExecutorObject *self = (_PyUOpExecutorObject *)executor;
2798+
2799+
// Equivalent to CHECK_EVAL_BREAKER()
2800+
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
2801+
if (_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker)) {
2802+
if (_Py_HandlePending(tstate) != 0) {
2803+
goto error;
2804+
}
2805+
}
2806+
2807+
OBJECT_STAT_INC(optimization_traces_executed);
2808+
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive - 1;
2809+
int pc = 0;
2810+
int opcode;
2811+
uint64_t operand;
2812+
int oparg;
2813+
for (;;) {
2814+
opcode = self->trace[pc].opcode;
2815+
operand = self->trace[pc].operand;
2816+
oparg = (int)operand;
2817+
#ifdef LLTRACE
2818+
if (lltrace >= 3) {
2819+
const char *opname = opcode < 256 ? _PyOpcode_OpName[opcode] : "";
2820+
int stack_level = (int)(stack_pointer - _PyFrame_Stackbase(frame));
2821+
fprintf(stderr, " uop %s %d, operand %" PRIu64 ", stack_level %d\n",
2822+
opname, opcode, operand, stack_level);
2823+
}
2824+
#endif
2825+
pc++;
2826+
OBJECT_STAT_INC(optimization_uops_executed);
2827+
switch (opcode) {
2828+
2829+
#undef ENABLE_SPECIALIZATION
2830+
#define ENABLE_SPECIALIZATION 0
2831+
#include "executor_cases.c.h"
2832+
2833+
case SET_IP:
2834+
{
2835+
frame->prev_instr = ip_offset + oparg;
2836+
break;
2837+
}
2838+
2839+
case EXIT_TRACE:
2840+
{
2841+
_PyFrame_SetStackPointer(frame, stack_pointer);
2842+
Py_DECREF(self);
2843+
return frame;
2844+
}
2845+
2846+
default:
2847+
{
2848+
fprintf(stderr, "Unknown uop %d, operand %" PRIu64 "\n", opcode, operand);
2849+
Py_FatalError("Unknown uop");
2850+
abort(); // Unreachable
2851+
for (;;) {}
2852+
// Really unreachable
2853+
}
2854+
2855+
}
2856+
}
2857+
2858+
pop_4_error:
2859+
STACK_SHRINK(1);
2860+
pop_3_error:
2861+
STACK_SHRINK(1);
2862+
pop_2_error:
2863+
STACK_SHRINK(1);
2864+
pop_1_error:
2865+
STACK_SHRINK(1);
2866+
error:
2867+
// On ERROR_IF we return NULL as the frame.
2868+
// The caller recovers the frame from cframe.current_frame.
2869+
#ifdef LLTRACE
2870+
if (lltrace >= 2) {
2871+
fprintf(stderr, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
2872+
}
2873+
#endif
2874+
_PyFrame_SetStackPointer(frame, stack_pointer);
2875+
Py_DECREF(self);
2876+
return NULL;
2877+
2878+
PREDICTED(UNPACK_SEQUENCE)
2879+
PREDICTED(COMPARE_OP)
2880+
PREDICTED(LOAD_SUPER_ATTR)
2881+
PREDICTED(STORE_SUBSCR)
2882+
PREDICTED(BINARY_SUBSCR)
2883+
PREDICTED(BINARY_OP)
2884+
// On DEOPT_IF we just repeat the last instruction.
2885+
// This presumes nothing was popped from the stack (nor pushed).
2886+
#ifdef LLTRACE
2887+
if (lltrace >= 2) {
2888+
fprintf(stderr, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
2889+
}
2890+
#endif
2891+
_PyFrame_SetStackPointer(frame, stack_pointer);
2892+
Py_DECREF(self);
2893+
return frame;
2894+
}

Python/ceval_macros.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Macros needed by ceval.c and bytecodes.c
1+
// Macros and other things needed by ceval.c and bytecodes.c
22

33
/* Computed GOTOs, or
44
the-optimization-commonly-but-improperly-known-as-"threaded code"
@@ -339,3 +339,11 @@ do { \
339339
goto error; \
340340
} \
341341
} while (0);
342+
343+
typedef PyObject *(*convertion_func_ptr)(PyObject *);
344+
345+
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
346+
[FVC_STR] = PyObject_Str,
347+
[FVC_REPR] = PyObject_Repr,
348+
[FVC_ASCII] = PyObject_ASCII
349+
};

0 commit comments

Comments
 (0)