Skip to content

GH-113860: All executors are now defined in terms of micro ops. Convert counter executor to use uops. #113864

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Include/cpython/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ typedef struct {

typedef struct _PyExecutorObject {
PyObject_VAR_HEAD
/* WARNING: execute consumes a reference to self. This is necessary to allow executors to tail call into each other. */
_Py_CODEUNIT *(*execute)(struct _PyExecutorObject *self, struct _PyInterpreterFrame *frame, PyObject **stack_pointer);
_PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
/* Data needed by the executor goes here, but is opaque to the VM */
} _PyExecutorObject;
Expand All @@ -52,6 +50,12 @@ typedef struct _PyOptimizerObject {
/* Data needed by the optimizer goes here, but is opaque to the VM */
} _PyOptimizerObject;

/** Test support **/
typedef struct {
_PyOptimizerObject base;
int64_t count;
} _PyCounterOptimizerObject;

PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor);

PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Include/internal/pycore_uop_ids.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Include/internal/pycore_uop_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_EXIT_TRACE] = HAS_DEOPT_FLAG,
[_INSERT] = HAS_ARG_FLAG,
[_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
[_LOAD_CONST_INLINE_BORROW] = 0,
[_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
};

const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
Expand Down Expand Up @@ -303,6 +305,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS",
[_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS",
[_INSERT] = "_INSERT",
[_INTERNAL_INCREMENT_OPT_COUNTER] = "_INTERNAL_INCREMENT_OPT_COUNTER",
[_IS_NONE] = "_IS_NONE",
[_IS_OP] = "_IS_OP",
[_ITER_CHECK_LIST] = "_ITER_CHECK_LIST",
Expand All @@ -328,6 +331,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT",
[_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS",
[_LOAD_CONST] = "_LOAD_CONST",
[_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW",
[_LOAD_DEREF] = "_LOAD_DEREF",
[_LOAD_FAST] = "_LOAD_FAST",
[_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR",
Expand Down
5 changes: 0 additions & 5 deletions Include/internal/pycore_uops.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@ typedef struct {
_PyUOpInstruction trace[1];
} _PyUOpExecutorObject;

_Py_CODEUNIT *_PyUOpExecute(
_PyExecutorObject *executor,
_PyInterpreterFrame *frame,
PyObject **stack_pointer);

#ifdef __cplusplus
}
#endif
Expand Down
22 changes: 12 additions & 10 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2366,16 +2366,8 @@ dummy_func(
_PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255];
if (executor->vm_data.valid) {
Py_INCREF(executor);
if (executor->execute == _PyUOpExecute) {
current_executor = (_PyUOpExecutorObject *)executor;
GOTO_TIER_TWO();
}
next_instr = executor->execute(executor, frame, stack_pointer);
frame = tstate->current_frame;
if (next_instr == NULL) {
goto resume_with_error;
}
stack_pointer = _PyFrame_GetStackPointer(frame);
current_executor = (_PyUOpExecutorObject *)executor;
GOTO_TIER_TWO();
}
else {
code->co_executors->executors[oparg & 255] = NULL;
Expand Down Expand Up @@ -4066,6 +4058,16 @@ dummy_func(
DEOPT_IF(!current_executor->base.vm_data.valid);
}

op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
value = ptr;
}

/* Internal -- for testing executors */
op(_INTERNAL_INCREMENT_OPT_COUNTER, (opt --)) {
_PyCounterOptimizerObject *exe = (_PyCounterOptimizerObject *)opt;
exe->count++;
}


// END BYTECODES //

Expand Down
18 changes: 18 additions & 0 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 2 additions & 10 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

189 changes: 79 additions & 110 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,27 +212,6 @@ PyUnstable_GetExecutor(PyCodeObject *code, int offset)
return NULL;
}

/** Test support **/


typedef struct {
_PyOptimizerObject base;
int64_t count;
} _PyCounterOptimizerObject;

typedef struct {
_PyExecutorObject executor;
_PyCounterOptimizerObject *optimizer;
_Py_CODEUNIT *next_instr;
} _PyCounterExecutorObject;

static void
counter_dealloc(_PyCounterExecutorObject *self) {
_Py_ExecutorClear((_PyExecutorObject *)self);
Py_DECREF(self->optimizer);
PyObject_Free(self);
}

static PyObject *
is_valid(PyObject *self, PyObject *Py_UNUSED(ignored))
{
Expand All @@ -244,84 +223,6 @@ static PyMethodDef executor_methods[] = {
{ NULL, NULL },
};

PyTypeObject _PyCounterExecutor_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "counting_executor",
.tp_basicsize = sizeof(_PyCounterExecutorObject),
.tp_itemsize = 0,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_dealloc = (destructor)counter_dealloc,
.tp_methods = executor_methods,
};

static _Py_CODEUNIT *
counter_execute(_PyExecutorObject *self, _PyInterpreterFrame *frame, PyObject **stack_pointer)
{
((_PyCounterExecutorObject *)self)->optimizer->count++;
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return ((_PyCounterExecutorObject *)self)->next_instr;
}

static int
counter_optimize(
_PyOptimizerObject* self,
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyExecutorObject **exec_ptr,
int Py_UNUSED(curr_stackentries)
)
{
_PyCounterExecutorObject *executor = (_PyCounterExecutorObject *)_PyObject_New(&_PyCounterExecutor_Type);
if (executor == NULL) {
return -1;
}
executor->executor.execute = counter_execute;
Py_INCREF(self);
executor->optimizer = (_PyCounterOptimizerObject *)self;
executor->next_instr = instr;
*exec_ptr = (_PyExecutorObject *)executor;
_PyBloomFilter empty;
_Py_BloomFilter_Init(&empty);
_Py_ExecutorInit((_PyExecutorObject *)executor, &empty);
return 1;
}

static PyObject *
counter_get_counter(PyObject *self, PyObject *args)
{
return PyLong_FromLongLong(((_PyCounterOptimizerObject *)self)->count);
}

static PyMethodDef counter_optimizer_methods[] = {
{ "get_count", counter_get_counter, METH_NOARGS, NULL },
{ NULL, NULL },
};

PyTypeObject _PyCounterOptimizer_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "Counter optimizer",
.tp_basicsize = sizeof(_PyCounterOptimizerObject),
.tp_itemsize = 0,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_methods = counter_optimizer_methods,
.tp_dealloc = (destructor)PyObject_Del,
};

PyObject *
PyUnstable_Optimizer_NewCounter(void)
{
_PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&_PyCounterOptimizer_Type);
if (opt == NULL) {
return NULL;
}
opt->base.optimize = counter_optimize;
opt->base.resume_threshold = INT16_MAX;
opt->base.backedge_threshold = 0;
opt->count = 0;
return (PyObject *)opt;
}

///////////////////// Experimental UOp Optimizer /////////////////////

static void
Expand Down Expand Up @@ -381,7 +282,7 @@ PySequenceMethods uop_as_sequence = {
PyTypeObject _PyUOpExecutor_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "uop_executor",
.tp_basicsize = sizeof(_PyUOpExecutorObject) - sizeof(_PyUOpInstruction),
.tp_basicsize = offsetof(_PyUOpExecutorObject, trace),
.tp_itemsize = sizeof(_PyUOpInstruction),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_dealloc = (destructor)uop_dealloc,
Expand Down Expand Up @@ -843,7 +744,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
dest--;
}
assert(dest == -1);
executor->base.execute = _PyUOpExecute;
_Py_ExecutorInit((_PyExecutorObject *)executor, dependencies);
#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
Expand Down Expand Up @@ -899,15 +799,6 @@ uop_optimize(
return 1;
}

/* Dummy execute() function for UOp Executor.
* The actual implementation is inlined in ceval.c,
* in _PyEval_EvalFrameDefault(). */
_Py_CODEUNIT *
_PyUOpExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer)
{
Py_FatalError("Tier 2 is now inlined into Tier 1");
}

static void
uop_opt_dealloc(PyObject *self) {
PyObject_Free(self);
Expand Down Expand Up @@ -937,6 +828,84 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
return (PyObject *)opt;
}

static void
counter_dealloc(_PyUOpExecutorObject *self) {
PyObject *opt = (PyObject *)self->trace[0].operand;
Py_DECREF(opt);
uop_dealloc(self);
}

PyTypeObject _PyCounterExecutor_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "counting_executor",
.tp_basicsize = offsetof(_PyUOpExecutorObject, trace),
.tp_itemsize = sizeof(_PyUOpInstruction),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_dealloc = (destructor)counter_dealloc,
.tp_methods = executor_methods,
};

static int
counter_optimize(
_PyOptimizerObject* self,
PyCodeObject *code,
_Py_CODEUNIT *instr,
_PyExecutorObject **exec_ptr,
int Py_UNUSED(curr_stackentries)
)
{
_PyUOpInstruction buffer[3] = {
{ .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) }
};
_PyBloomFilter empty;
_Py_BloomFilter_Init(&empty);
_PyExecutorObject *executor = make_executor_from_uops(buffer, &empty);
if (executor == NULL) {
return -1;
}
Py_INCREF(self);
Py_SET_TYPE(executor, &_PyCounterExecutor_Type);
*exec_ptr = executor;
return 1;
}

static PyObject *
counter_get_counter(PyObject *self, PyObject *args)
{
return PyLong_FromLongLong(((_PyCounterOptimizerObject *)self)->count);
}

static PyMethodDef counter_optimizer_methods[] = {
{ "get_count", counter_get_counter, METH_NOARGS, NULL },
{ NULL, NULL },
};

PyTypeObject _PyCounterOptimizer_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "Counter optimizer",
.tp_basicsize = sizeof(_PyCounterOptimizerObject),
.tp_itemsize = 0,
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_methods = counter_optimizer_methods,
.tp_dealloc = (destructor)PyObject_Del,
};

PyObject *
PyUnstable_Optimizer_NewCounter(void)
{
_PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&_PyCounterOptimizer_Type);
if (opt == NULL) {
return NULL;
}
opt->base.optimize = counter_optimize;
opt->base.resume_threshold = INT16_MAX;
opt->base.backedge_threshold = 0;
opt->count = 0;
return (PyObject *)opt;
}


/*****************************************
* Executor management
Expand Down