Skip to content

Reduce the overhead of tracing, profiling, and quickening checks for calls #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ def jabs_op(name, op):
def_op('DICT_UPDATE', 165)
def_op('CALL_METHOD_KW', 166)

def_op('START_FUNCTION', 167) # Check for tracing/profiling
def_op('RETURN_VALUE_QUICK', 168) # RETURN_VALUE without tracing/profiling checks

del def_op, name_op, jrel_op, jabs_op

_nb_ops = [
Expand Down
48 changes: 33 additions & 15 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1537,10 +1537,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
STAT_INC(LOAD_##attr_or_method, hit); \
Py_INCREF(res);

#define TRACE_FUNCTION_EXIT() \
#define TRACE_FUNCTION_EXIT(return_value) \
if (cframe.use_tracing) { \
if (trace_function_exit(tstate, frame, retval)) { \
Py_DECREF(retval); \
PyObject *trace_retval = (return_value); \
if (trace_function_exit(tstate, frame, trace_retval)) { \
Py_DECREF(trace_retval); \
goto exit_unwind; \
} \
}
Expand Down Expand Up @@ -1668,7 +1669,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
int opcode; /* Current opcode */
int oparg; /* Current opcode argument, if any */
_Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker;

CFrame cframe;

/* WARNING: Because the CFrame lives on the C stack,
Expand Down Expand Up @@ -1734,12 +1734,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
assert(tstate->cframe == &cframe);
assert(frame == cframe.current_frame);

TRACE_FUNCTION_ENTRY();
DTRACE_FUNCTION_ENTRY();

if (_Py_IncrementCountAndMaybeQuicken(frame->f_code) < 0) {
goto exit_unwind;
}
frame->f_state = FRAME_EXECUTING;

resume_frame:
Expand Down Expand Up @@ -1826,6 +1820,17 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DISPATCH();
}

TARGET(START_FUNCTION) {
TRACE_FUNCTION_ENTRY();
DTRACE_FUNCTION_ENTRY();

if (_Py_IncrementCountAndMaybeQuicken(frame->f_code) < 0) {
goto exit_unwind;
}

DISPATCH();
}

TARGET(LOAD_CLOSURE) {
/* We keep LOAD_CLOSURE so that the bytecode stays more readable. */
PyObject *value = GETLOCAL(oparg);
Expand Down Expand Up @@ -2314,6 +2319,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
new_frame->depth = frame->depth + 1;
/* FIXME(lpereira): We're not tracing anymore by jumping to this label. */
goto start_frame;
}

Expand Down Expand Up @@ -2472,13 +2478,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
goto error;
}

TARGET(RETURN_VALUE) {
TARGET(RETURN_VALUE_QUICK)
return_value_without_tracing: {
PyObject *retval = POP();
assert(EMPTY());
frame->f_state = FRAME_RETURNED;
_PyFrame_SetStackPointer(frame, stack_pointer);
TRACE_FUNCTION_EXIT();
DTRACE_FUNCTION_EXIT();
_Py_LeaveRecursiveCall(tstate);
if (frame->depth) {
frame = cframe.current_frame = pop_frame(tstate, frame);
Expand All @@ -2493,6 +2498,17 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
return retval;
}

TARGET(RETURN_VALUE) {
frame->f_state = FRAME_RETURNED;
/* FIXME(lpereira): The stack pointer and f_state will be set to the same
* value here, and when jumping to return_value_without_tracing above. This
* is redundant, but saves us from copying some code. */
Comment on lines +2503 to +2505

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, I would just duplicate the code. You also redundantly have an extra variable retval, which is only used in the TRACE_FUNCTION_EXIT() macro. Maybe something needs to be refactored again here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did some refactoring and the variable that's only used when tracing is enabled is now gone. (It's now inside the macro, which now takes a parameter.)

I dislike both copying code and doing what has been done in this PR, to be honest... while I do have a slight preference to the way I've done things as it's quite a bit of code that don't need to be copied around.

Copy link
Member

@brandtbucher brandtbucher Dec 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another possible option: split the opcode in two? So we have one opcode which does the tracing, and one which does the return.

The original code would be EXIT_FUNCTION + RETURN_VALUE, and the quickening step would replace EXIT_FUNCTION with NOP (or, even better, RETURN_VALUE).

I'm pretty sure that could work correctly here, but I might be missing some edge case where an eval break could leave us in a weird state where we trace a return that never happens (or something).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, never mind. I don't know how much that would actually clean this up.

_PyFrame_SetStackPointer(frame, stack_pointer - 1);
TRACE_FUNCTION_EXIT(TOP());
DTRACE_FUNCTION_EXIT();
goto return_value_without_tracing;
}

TARGET(GET_AITER) {
unaryfunc getter = NULL;
PyObject *iter = NULL;
Expand Down Expand Up @@ -2680,7 +2696,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
frame->f_lasti -= 1;
frame->f_state = FRAME_SUSPENDED;
_PyFrame_SetStackPointer(frame, stack_pointer);
TRACE_FUNCTION_EXIT();
TRACE_FUNCTION_EXIT(retval);
DTRACE_FUNCTION_EXIT();
_Py_LeaveRecursiveCall(tstate);
/* Restore previous cframe and return. */
Expand All @@ -2706,7 +2722,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
}
frame->f_state = FRAME_SUSPENDED;
_PyFrame_SetStackPointer(frame, stack_pointer);
TRACE_FUNCTION_EXIT();
TRACE_FUNCTION_EXIT(retval);
DTRACE_FUNCTION_EXIT();
_Py_LeaveRecursiveCall(tstate);
/* Restore previous cframe and return. */
Expand Down Expand Up @@ -4672,6 +4688,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
new_frame->previous = frame;
cframe.current_frame = frame = new_frame;
new_frame->depth = frame->depth + 1;
/* FIXME(lpereira): We're not tracing anymore by jumping to this label. */
goto start_frame;
}
}
Expand Down Expand Up @@ -4761,6 +4778,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
new_frame->depth = frame->depth + 1;
/* FIXME(lpereira): We're not tracing anymore by jumping to this label. */
goto start_frame;
}

Expand Down
23 changes: 13 additions & 10 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,7 @@ stack_effect(int opcode, int oparg, int jump)
{
switch (opcode) {
case NOP:
case START_FUNCTION:
case EXTENDED_ARG:
return 0;

Expand Down Expand Up @@ -2365,7 +2366,7 @@ compiler_check_debug_args(struct compiler *c, arguments_ty args)
}

static int
compiler_function(struct compiler *c, stmt_ty s, int is_async)
compiler_function(struct compiler *c, stmt_ty s)
{
PyCodeObject *co;
PyObject *qualname, *docstring = NULL;
Expand All @@ -2379,26 +2380,27 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async)
int scope_type;
int firstlineno;

if (is_async) {
assert(s->kind == AsyncFunctionDef_kind);

switch (s->kind) {
case AsyncFunctionDef_kind:
args = s->v.AsyncFunctionDef.args;
returns = s->v.AsyncFunctionDef.returns;
decos = s->v.AsyncFunctionDef.decorator_list;
name = s->v.AsyncFunctionDef.name;
body = s->v.AsyncFunctionDef.body;

scope_type = COMPILER_SCOPE_ASYNC_FUNCTION;
} else {
assert(s->kind == FunctionDef_kind);

break;
case FunctionDef_kind:
args = s->v.FunctionDef.args;
returns = s->v.FunctionDef.returns;
decos = s->v.FunctionDef.decorator_list;
name = s->v.FunctionDef.name;
body = s->v.FunctionDef.body;

scope_type = COMPILER_SCOPE_FUNCTION;
break;
default:
Py_UNREACHABLE();
}

if (!compiler_check_debug_args(c, args))
Expand Down Expand Up @@ -2438,6 +2440,8 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async)
return 0;
}

ADDOP(c, START_FUNCTION);

c->u->u_argcount = asdl_seq_LEN(args->args);
c->u->u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
c->u->u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs);
Expand Down Expand Up @@ -3571,8 +3575,9 @@ compiler_visit_stmt(struct compiler *c, stmt_ty s)
SET_LOC(c, s);

switch (s->kind) {
case AsyncFunctionDef_kind:
case FunctionDef_kind:
return compiler_function(c, s, 0);
return compiler_function(c, s);
case ClassDef_kind:
return compiler_class(c, s);
case Return_kind:
Expand Down Expand Up @@ -3637,8 +3642,6 @@ compiler_visit_stmt(struct compiler *c, stmt_ty s)
return compiler_continue(c);
case With_kind:
return compiler_with(c, s, 0);
case AsyncFunctionDef_kind:
return compiler_function(c, s, 1);
case AsyncWith_kind:
return compiler_async_with(c, s, 0);
case AsyncFor_kind:
Expand Down
5 changes: 3 additions & 2 deletions Python/makeopcodetargets.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def write_contents(f):
while targets[next_op] != '_unknown_opcode':
next_op += 1
targets[next_op] = "TARGET_%s" % opname
f.write("static void *opcode_targets[256] = {\n")
f.write(",\n".join([" &&%s" % s for s in targets]))
f.write("/* Automatically generated by makeopcodetargets.py. Do not edit! */\n")
f.write("static const void *opcode_targets[256] = {\n")
f.write(",\n".join([" [%d] = &&%s" % (index, s) for index, s in enumerate(targets)]))
f.write("\n};\n")


Expand Down
Loading