From 78b869edb8f372112eb11f429c553ef8641792b2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 11 Oct 2021 12:10:58 +0100 Subject: [PATCH 01/12] Avoid making C calls for most calls to Python functions. --- Python/ceval.c | 162 ++++++++++++++++--------------------------------- 1 file changed, 53 insertions(+), 109 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 0af233c0ba485d..8bba7ba0e9c454 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -50,9 +50,9 @@ _Py_IDENTIFIER(__name__); /* Forward declarations */ -Py_LOCAL_INLINE(PyObject *) call_function( - PyThreadState *tstate, PyObject ***pp_stack, - Py_ssize_t oparg, PyObject *kwnames, int use_tracing); +static PyObject *trace_call_function( + PyThreadState *tstate, PyObject *callable, PyObject **stack, + Py_ssize_t oparg, PyObject *kwnames); static PyObject * do_call_core( PyThreadState *tstate, PyObject *func, PyObject *callargs, PyObject *kwdict, int use_tracing); @@ -4538,12 +4538,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } + /* Declare variables used for making calls */ + PyObject *kwnames; + int nargs; + int stackadj; + TARGET(CALL_METHOD) { /* Designed to work in tamdem with LOAD_METHOD. */ - PyObject **sp, *res; - int meth_found; - - sp = stack_pointer; /* `meth` is NULL when LOAD_METHOD thinks that it's not a method call. @@ -4569,58 +4570,53 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr We'll be passing `oparg + 1` to call_function, to make it accept the `self` as a first argument. */ - meth_found = (PEEK(oparg + 2) != NULL); - res = call_function(tstate, &sp, oparg + meth_found, NULL, cframe.use_tracing); - stack_pointer = sp; - - STACK_SHRINK(1 - meth_found); - PUSH(res); - if (res == NULL) { - goto error; - } - CHECK_EVAL_BREAKER(); - DISPATCH(); + int is_method = (PEEK(oparg + 2) != NULL); + oparg += is_method; + nargs = oparg; + kwnames = NULL; + stackadj = 2-is_method; + goto call_function; } TARGET(CALL_METHOD_KW) { /* Designed to work in tandem with LOAD_METHOD. Same as CALL_METHOD but pops TOS to get a tuple of keyword names. */ - PyObject **sp, *res; - PyObject *names = NULL; - int meth_found; - - names = POP(); - - sp = stack_pointer; - meth_found = (PEEK(oparg + 2) != NULL); - res = call_function(tstate, &sp, oparg + meth_found, names, cframe.use_tracing); - stack_pointer = sp; - - STACK_SHRINK(1 - meth_found); - PUSH(res); - Py_DECREF(names); - if (res == NULL) { - goto error; - } - CHECK_EVAL_BREAKER(); - DISPATCH(); + kwnames = POP(); + int is_method = (PEEK(oparg + 2) != NULL); + oparg += is_method; + nargs = oparg - PyTuple_GET_SIZE(kwnames); + stackadj = 2-is_method; + goto call_function; } TARGET(CALL_FUNCTION) { PREDICTED(CALL_FUNCTION); - PyObject *res; + nargs = oparg; + kwnames = NULL; + stackadj = 1; + goto call_function; + } + TARGET(CALL_FUNCTION_KW) { + kwnames = POP(); + nargs = oparg - PyTuple_GET_SIZE(kwnames); + stackadj = 1; + goto call_function; + } + + call_function: + { // Check if the call can be inlined or not PyObject *function = PEEK(oparg + 1); if (Py_TYPE(function) == &PyFunction_Type && tstate->interp->eval_frame == NULL) { int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(function))->co_flags; - PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : PyFunction_GET_GLOBALS(function); int is_generator = code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR); if (!is_generator) { + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : PyFunction_GET_GLOBALS(function); InterpreterFrame *new_frame = _PyEvalFramePushAndInit( tstate, PyFunction_AS_FRAME_CONSTRUCTOR(function), locals, stack_pointer-oparg, - oparg, NULL, 1); + nargs, kwnames, 1); if (new_frame == NULL) { // When we exit here, we own all variables in the stack // (the frame creation has not stolen any variable) so @@ -4629,54 +4625,36 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr goto error; } - STACK_SHRINK(oparg + 1); + STACK_SHRINK(oparg + stackadj); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. + Py_XDECREF(kwnames); Py_DECREF(function); _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->depth = frame->depth + 1; tstate->frame = frame = new_frame; goto start_frame; } - else { - /* Callable is a generator or coroutine function: create - * coroutine or generator. */ - res = make_coro(tstate, PyFunction_AS_FRAME_CONSTRUCTOR(function), - locals, stack_pointer-oparg, oparg, NULL); - STACK_SHRINK(oparg + 1); - for (int i = 0; i < oparg + 1; i++) { - Py_DECREF(stack_pointer[i]); - } - } + } + PyObject *res; + /* Callable is not a normal Python function */ + if (cframe.use_tracing) { + res = trace_call_function(tstate, function, stack_pointer-oparg, nargs, kwnames); } else { - /* Callable is not a Python function */ - PyObject **sp = stack_pointer; - res = call_function(tstate, &sp, oparg, NULL, cframe.use_tracing); - stack_pointer = sp; + res = PyObject_Vectorcall(function, stack_pointer-oparg, + nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames); } - - PUSH(res); - if (res == NULL) { - goto error; + assert((res != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + Py_DECREF(function); + Py_XDECREF(kwnames); + /* Clear the stack */ + STACK_SHRINK(oparg); + for (int i = 0; i < oparg; i++) { + Py_DECREF(stack_pointer[i]); } - CHECK_EVAL_BREAKER(); - DISPATCH(); - } - - TARGET(CALL_FUNCTION_KW) { - PyObject **sp, *res, *names; - - names = POP(); - assert(PyTuple_Check(names)); - assert(PyTuple_GET_SIZE(names) <= oparg); - /* We assume without checking that names contains only strings */ - sp = stack_pointer; - res = call_function(tstate, &sp, oparg, names, cframe.use_tracing); - stack_pointer = sp; + STACK_SHRINK(stackadj); PUSH(res); - Py_DECREF(names); - if (res == NULL) { goto error; } @@ -6536,40 +6514,6 @@ trace_call_function(PyThreadState *tstate, return PyObject_Vectorcall(func, args, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames); } -/* Issue #29227: Inline call_function() into _PyEval_EvalFrameDefault() - to reduce the stack consumption. */ -Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION -call_function(PyThreadState *tstate, - PyObject ***pp_stack, - Py_ssize_t oparg, - PyObject *kwnames, - int use_tracing) -{ - PyObject **pfunc = (*pp_stack) - oparg - 1; - PyObject *func = *pfunc; - PyObject *x, *w; - Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames); - Py_ssize_t nargs = oparg - nkwargs; - PyObject **stack = (*pp_stack) - nargs - nkwargs; - - if (use_tracing) { - x = trace_call_function(tstate, func, stack, nargs, kwnames); - } - else { - x = PyObject_Vectorcall(func, stack, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames); - } - - assert((x != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); - - /* Clear the stack of the function object. */ - while ((*pp_stack) > pfunc) { - w = EXT_POP(*pp_stack); - Py_DECREF(w); - } - - return x; -} - static PyObject * do_call_core(PyThreadState *tstate, PyObject *func, From 73041b5e7594f3ead689c766124f2eee08cef5e1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 13 Oct 2021 14:54:50 +0100 Subject: [PATCH 02/12] Refactor a bit to help parsing of instructions. --- Python/ceval.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 8bba7ba0e9c454..7f76448a689d47 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1702,6 +1702,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr switch (opcode) { #endif + /* Variables used for making calls */ + PyObject *kwnames; + int nargs; + int stackadj; + /* BEWARE! It is essential that any operation that fails must goto error and that all operation that succeed call DISPATCH() ! */ @@ -4538,11 +4543,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } - /* Declare variables used for making calls */ - PyObject *kwnames; - int nargs; - int stackadj; - TARGET(CALL_METHOD) { /* Designed to work in tamdem with LOAD_METHOD. */ /* `meth` is NULL when LOAD_METHOD thinks that it's not @@ -4589,14 +4589,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr goto call_function; } - TARGET(CALL_FUNCTION) { - PREDICTED(CALL_FUNCTION); - nargs = oparg; - kwnames = NULL; - stackadj = 1; - goto call_function; - } - TARGET(CALL_FUNCTION_KW) { kwnames = POP(); nargs = oparg - PyTuple_GET_SIZE(kwnames); @@ -4604,10 +4596,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr goto call_function; } - call_function: - { + TARGET(CALL_FUNCTION) { + PREDICTED(CALL_FUNCTION); + PyObject *function; + nargs = oparg; + kwnames = NULL; + stackadj = 1; + call_function: // Check if the call can be inlined or not - PyObject *function = PEEK(oparg + 1); + function = PEEK(oparg + 1); if (Py_TYPE(function) == &PyFunction_Type && tstate->interp->eval_frame == NULL) { int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(function))->co_flags; int is_generator = code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR); From 5cc54429083fb4ae977391079abf43c7caa21c8b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 13 Oct 2021 16:40:30 +0100 Subject: [PATCH 03/12] Update gdb test to account for fewer C calls. --- Lib/test/test_gdb.py | 52 +++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py index fb0f1295574b9b..2805eaf9f95674 100644 --- a/Lib/test/test_gdb.py +++ b/Lib/test/test_gdb.py @@ -724,24 +724,36 @@ def test_two_abs_args(self): ' 3 def foo(a, b, c):\n', bt) +SAMPLE_WITH_C_CALL = """ + +from _testcapi import pyobject_fastcall + +def foo(a, b, c): + bar(a, b, c) + +def bar(a, b, c): + pyobject_fastcall(baz, (a, b, c)) + +def baz(*args): + id(42) + +foo(1, 2, 3) + +""" + + class StackNavigationTests(DebuggerTests): @unittest.skipUnless(HAS_PYUP_PYDOWN, "test requires py-up/py-down commands") @unittest.skipIf(python_is_optimized(), "Python was compiled with optimizations") def test_pyup_command(self): 'Verify that the "py-up" command works' - bt = self.get_stack_trace(script=self.get_sample_script(), + bt = self.get_stack_trace(source=SAMPLE_WITH_C_CALL, cmds_after_breakpoint=['py-up', 'py-up']) self.assertMultilineMatches(bt, r'''^.* -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\) - id\(42\) -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\) - baz\(a, b, c\) -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\) - bar\(a=a, b=b, c=c\) -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 12, in \(\) - foo\(1, 2, 3\) +#[0-9]+ Frame 0x-?[0-9a-f]+, for file , line 12, in baz \(args=\(1, 2, 3\)\) +#[0-9]+ $''') @unittest.skipUnless(HAS_PYUP_PYDOWN, "test requires py-up/py-down commands") @@ -765,22 +777,13 @@ def test_up_at_top(self): "Python was compiled with optimizations") def test_up_then_down(self): 'Verify "py-up" followed by "py-down"' - bt = self.get_stack_trace(script=self.get_sample_script(), + bt = self.get_stack_trace(source=SAMPLE_WITH_C_CALL, cmds_after_breakpoint=['py-up', 'py-up', 'py-down']) self.assertMultilineMatches(bt, r'''^.* -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\) - id\(42\) -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\) - baz\(a, b, c\) -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\) - bar\(a=a, b=b, c=c\) -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 12, in \(\) - foo\(1, 2, 3\) -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\) - id\(42\) -#[0-9]+ Frame 0x-?[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\) - baz\(a, b, c\) +#[0-9]+ Frame 0x-?[0-9a-f]+, for file , line 12, in baz \(args=\(1, 2, 3\)\) +#[0-9]+ +#[0-9]+ Frame 0x-?[0-9a-f]+, for file , line 12, in baz \(args=\(1, 2, 3\)\) $''') class PyBtTests(DebuggerTests): @@ -970,13 +973,12 @@ def __init__(self): self.assertRegex(gdb_output, r" Date: Thu, 14 Oct 2021 12:40:42 +0100 Subject: [PATCH 04/12] Change initialize_locals(steal=true) and _PyTuple_FromArraySteal to consume the argument references regardless of whether they succeed or fail. --- Objects/tupleobject.c | 4 +- Python/ceval.c | 91 ++++++++++++++++++++++--------------------- 2 files changed, 50 insertions(+), 45 deletions(-) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 018e738af06e38..051683086ea2c5 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -490,9 +490,11 @@ _PyTuple_FromArraySteal(PyObject *const *src, Py_ssize_t n) if (n == 0) { return tuple_get_empty(); } - PyTupleObject *tuple = tuple_alloc(n); if (tuple == NULL) { + for (Py_ssize_t i = 0; i < n; i++) { + Py_DECREF(src[i]); + } return NULL; } PyObject **dst = tuple->ob_item; diff --git a/Python/ceval.c b/Python/ceval.c index 7f76448a689d47..152ac9a1bdd26a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4610,23 +4610,19 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr int is_generator = code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR); if (!is_generator) { PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : PyFunction_GET_GLOBALS(function); + STACK_SHRINK(oparg); InterpreterFrame *new_frame = _PyEvalFramePushAndInit( tstate, PyFunction_AS_FRAME_CONSTRUCTOR(function), locals, - stack_pointer-oparg, + stack_pointer, nargs, kwnames, 1); - if (new_frame == NULL) { - // When we exit here, we own all variables in the stack - // (the frame creation has not stolen any variable) so - // we need to clean the whole stack (done in the - // "error" label). - goto error; - } - - STACK_SHRINK(oparg + stackadj); + STACK_SHRINK(stackadj); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. Py_XDECREF(kwnames); Py_DECREF(function); + if (new_frame == NULL) { + goto error; + } _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->depth = frame->depth + 1; tstate->frame = frame = new_frame; @@ -5397,7 +5393,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, if (co->co_flags & CO_VARKEYWORDS) { kwdict = PyDict_New(); if (kwdict == NULL) { - goto fail; + goto fail_early; } i = total_args; if (co->co_flags & CO_VARARGS) { @@ -5436,11 +5432,19 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, u = _PyTuple_FromArray(args + n, argcount - n); } if (u == NULL) { - goto fail; + goto fail_post_positional; } assert(localsplus[total_args] == NULL); localsplus[total_args] = u; } + else if (argcount > n) { + /* Too many postional args. Error is reported later */ + if (steal_args) { + for (j = n; j < argcount; j++) { + Py_DECREF(args[j]); + } + } + } /* Handle keyword arguments */ if (kwnames != NULL) { @@ -5455,7 +5459,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, _PyErr_Format(tstate, PyExc_TypeError, "%U() keywords must be strings", con->fc_qualname); - goto fail; + goto kw_fail; } /* Speed hack: do raw pointer compares. As names are @@ -5476,7 +5480,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, goto kw_found; } else if (cmp < 0) { - goto fail; + goto kw_fail; } } @@ -5488,29 +5492,38 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, kwcount, kwnames, con->fc_qualname)) { - goto fail; + goto kw_fail; } _PyErr_Format(tstate, PyExc_TypeError, "%U() got an unexpected keyword argument '%S'", con->fc_qualname, keyword); - goto fail; + goto kw_fail; } if (PyDict_SetItem(kwdict, keyword, value) == -1) { - goto fail; + goto kw_fail; } if (steal_args) { Py_DECREF(value); } continue; + kw_fail: + if (steal_args) { + for (;i < kwcount; i++) { + PyObject *value = args[i+argcount]; + Py_DECREF(value); + } + } + goto fail_late; + kw_found: if (localsplus[j] != NULL) { _PyErr_Format(tstate, PyExc_TypeError, "%U() got multiple values for argument '%S'", con->fc_qualname, keyword); - goto fail; + goto kw_fail; } if (!steal_args) { Py_INCREF(value); @@ -5523,7 +5536,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, if ((argcount > co->co_argcount) && !(co->co_flags & CO_VARARGS)) { too_many_positional(tstate, co, argcount, con->fc_defaults, localsplus, con->fc_qualname); - goto fail; + goto fail_late; } /* Add missing positional arguments (copy default values from defs) */ @@ -5539,7 +5552,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, if (missing) { missing_arguments(tstate, co, missing, defcount, localsplus, con->fc_qualname); - goto fail; + goto fail_late; } if (n > m) i = n - m; @@ -5572,7 +5585,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, continue; } else if (_PyErr_Occurred(tstate)) { - goto fail; + goto fail_late; } } missing++; @@ -5580,7 +5593,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, if (missing) { missing_arguments(tstate, co, missing, -1, localsplus, con->fc_qualname); - goto fail; + goto fail_late; } } @@ -5593,33 +5606,23 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, return 0; -fail: /* Jump here from prelude on failure */ +fail_early: if (steal_args) { - // If we failed to initialize locals, make sure the caller still own all the - // arguments that were on the stack. We need to increment the reference count - // of everything we copied (everything in localsplus) that came from the stack - // (everything that is present in the "args" array). - Py_ssize_t kwcount = kwnames != NULL ? PyTuple_GET_SIZE(kwnames) : 0; - for (Py_ssize_t k=0; k < total_args; k++) { - PyObject* arg = localsplus[k]; - for (Py_ssize_t j=0; j < argcount + kwcount; j++) { - if (args[j] == arg) { - Py_XINCREF(arg); - break; - } - } + for (j = 0; j < argcount; j++) { + Py_DECREF(args[j]); } - // Restore all the **kwargs we placed into the kwargs dictionary - if (kwdict) { - PyObject *key, *value; - Py_ssize_t pos = 0; - while (PyDict_Next(kwdict, &pos, &key, &value)) { - Py_INCREF(value); - } + } + /* fall through */ +fail_post_positional: + if (steal_args) { + Py_ssize_t kwcount = kwnames != NULL ? PyTuple_GET_SIZE(kwnames) : 0; + for (j = argcount; j < argcount+kwcount; j++) { + Py_DECREF(args[j]); } } + /* fall through */ +fail_late: return -1; - } static InterpreterFrame * From 03e7ad9fa0fd5235bfea7785817a4685663ee5bd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 14 Oct 2021 16:12:13 +0100 Subject: [PATCH 05/12] Fix compiler warnings on Windows. --- Python/ceval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 152ac9a1bdd26a..fcb12841d8d3ca 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4584,14 +4584,14 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr kwnames = POP(); int is_method = (PEEK(oparg + 2) != NULL); oparg += is_method; - nargs = oparg - PyTuple_GET_SIZE(kwnames); + nargs = oparg - (int)PyTuple_GET_SIZE(kwnames); stackadj = 2-is_method; goto call_function; } TARGET(CALL_FUNCTION_KW) { kwnames = POP(); - nargs = oparg - PyTuple_GET_SIZE(kwnames); + nargs = oparg - (int)PyTuple_GET_SIZE(kwnames); stackadj = 1; goto call_function; } From 82f5093cbf5a5ed8535f92ce4218df831ec5af92 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 14 Oct 2021 18:12:29 +0100 Subject: [PATCH 06/12] Rename labels and variable for clarity. --- Python/ceval.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index fcb12841d8d3ca..cbeb6e67da89f4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1705,7 +1705,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr /* Variables used for making calls */ PyObject *kwnames; int nargs; - int stackadj; + int postcall_shrink; /* BEWARE! It is essential that any operation that fails must goto error @@ -4574,7 +4574,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr oparg += is_method; nargs = oparg; kwnames = NULL; - stackadj = 2-is_method; + postcall_shrink = 2-is_method; goto call_function; } @@ -4585,14 +4585,14 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr int is_method = (PEEK(oparg + 2) != NULL); oparg += is_method; nargs = oparg - (int)PyTuple_GET_SIZE(kwnames); - stackadj = 2-is_method; + postcall_shrink = 2-is_method; goto call_function; } TARGET(CALL_FUNCTION_KW) { kwnames = POP(); nargs = oparg - (int)PyTuple_GET_SIZE(kwnames); - stackadj = 1; + postcall_shrink = 1; goto call_function; } @@ -4601,7 +4601,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *function; nargs = oparg; kwnames = NULL; - stackadj = 1; + postcall_shrink = 1; call_function: // Check if the call can be inlined or not function = PEEK(oparg + 1); @@ -4615,7 +4615,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr tstate, PyFunction_AS_FRAME_CONSTRUCTOR(function), locals, stack_pointer, nargs, kwnames, 1); - STACK_SHRINK(stackadj); + STACK_SHRINK(postcall_shrink); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. Py_XDECREF(kwnames); @@ -4629,8 +4629,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr goto start_frame; } } - PyObject *res; /* Callable is not a normal Python function */ + PyObject *res; if (cframe.use_tracing) { res = trace_call_function(tstate, function, stack_pointer-oparg, nargs, kwnames); } @@ -4646,7 +4646,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr for (int i = 0; i < oparg; i++) { Py_DECREF(stack_pointer[i]); } - STACK_SHRINK(stackadj); + STACK_SHRINK(postcall_shrink); PUSH(res); if (res == NULL) { goto error; @@ -5393,7 +5393,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, if (co->co_flags & CO_VARKEYWORDS) { kwdict = PyDict_New(); if (kwdict == NULL) { - goto fail_early; + goto fail_pre_positional; } i = total_args; if (co->co_flags & CO_VARARGS) { @@ -5516,7 +5516,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, Py_DECREF(value); } } - goto fail_late; + goto fail_noclean; kw_found: if (localsplus[j] != NULL) { @@ -5536,7 +5536,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, if ((argcount > co->co_argcount) && !(co->co_flags & CO_VARARGS)) { too_many_positional(tstate, co, argcount, con->fc_defaults, localsplus, con->fc_qualname); - goto fail_late; + goto fail_noclean; } /* Add missing positional arguments (copy default values from defs) */ @@ -5552,7 +5552,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, if (missing) { missing_arguments(tstate, co, missing, defcount, localsplus, con->fc_qualname); - goto fail_late; + goto fail_noclean; } if (n > m) i = n - m; @@ -5585,7 +5585,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, continue; } else if (_PyErr_Occurred(tstate)) { - goto fail_late; + goto fail_noclean; } } missing++; @@ -5593,7 +5593,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, if (missing) { missing_arguments(tstate, co, missing, -1, localsplus, con->fc_qualname); - goto fail_late; + goto fail_noclean; } } @@ -5606,7 +5606,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, return 0; -fail_early: +fail_pre_positional: if (steal_args) { for (j = 0; j < argcount; j++) { Py_DECREF(args[j]); @@ -5621,7 +5621,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, } } /* fall through */ -fail_late: +fail_noclean: return -1; } From e4160c954123bbdeccb73247011f68219954fdb2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 15 Oct 2021 11:30:17 +0100 Subject: [PATCH 07/12] Add capability for specializing CALL_FUNCTION. --- Include/internal/pycore_code.h | 1 + Include/opcode.h | 11 +++--- Lib/opcode.py | 1 + Python/ceval.c | 23 +++++++++++++ Python/opcode_targets.h | 10 +++--- Python/specialize.c | 61 ++++++++++++++++++++++++++++++++++ 6 files changed, 97 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 0b127ed28993b7..39586825c19951 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -308,6 +308,7 @@ int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNI int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); +int _Py_Specialize_CallFunction(PyThreadState *tstate, PyObject *callable, int nargs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/opcode.h b/Include/opcode.h index 15f722630dc5da..abad8c16472532 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -163,11 +163,12 @@ extern "C" { #define STORE_ATTR_INSTANCE_VALUE 122 #define STORE_ATTR_SLOT 123 #define STORE_ATTR_WITH_HINT 127 -#define LOAD_FAST__LOAD_FAST 128 -#define STORE_FAST__LOAD_FAST 134 -#define LOAD_FAST__LOAD_CONST 140 -#define LOAD_CONST__LOAD_FAST 143 -#define STORE_FAST__STORE_FAST 149 +#define CALL_FUNCTION_ADAPTIVE 128 +#define LOAD_FAST__LOAD_FAST 134 +#define STORE_FAST__LOAD_FAST 140 +#define LOAD_FAST__LOAD_CONST 143 +#define LOAD_CONST__LOAD_FAST 149 +#define STORE_FAST__STORE_FAST 150 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index efd6aefccc5713..b6ae9cd4e75b45 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -247,6 +247,7 @@ def jabs_op(name, op): "STORE_ATTR_INSTANCE_VALUE", "STORE_ATTR_SLOT", "STORE_ATTR_WITH_HINT", + "CALL_FUNCTION_ADAPTIVE", # Super instructions "LOAD_FAST__LOAD_FAST", "STORE_FAST__LOAD_FAST", diff --git a/Python/ceval.c b/Python/ceval.c index cbeb6e67da89f4..370d1bb130327e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4598,6 +4598,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr TARGET(CALL_FUNCTION) { PREDICTED(CALL_FUNCTION); + STAT_INC(CALL_FUNCTION, unquickened); PyObject *function; nargs = oparg; kwnames = NULL; @@ -4655,6 +4656,28 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } + TARGET(CALL_FUNCTION_ADAPTIVE) { + assert(cframe.use_tracing == 0); + SpecializedCacheEntry *cache = GET_CACHE(); + nargs = cache->adaptive.original_oparg; + if (cache->adaptive.counter == 0) { + PyObject *callable = PEEK(nargs+1); + next_instr--; + if (_Py_Specialize_CallFunction(tstate, callable, nargs, next_instr, cache) < 0) { + goto error; + } + DISPATCH(); + } + else { + STAT_INC(CALL_FUNCTION, deferred); + cache->adaptive.counter--; + oparg = nargs; + kwnames = NULL; + postcall_shrink = 1; + goto call_function; + } + } + TARGET(CALL_FUNCTION_EX) { PREDICTED(CALL_FUNCTION_EX); PyObject *func, *callargs, *kwargs = NULL, *result; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 773f9254cf6244..23fe2ff4a99dc6 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -127,30 +127,30 @@ static void *opcode_targets[256] = { &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_CALL_FUNCTION_ADAPTIVE, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_MATCH_CLASS, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 6efee7643a4554..803a98a6ab90a4 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -232,6 +232,7 @@ static uint8_t adaptive_opcodes[256] = { [BINARY_ADD] = BINARY_ADD_ADAPTIVE, [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, + [CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE, }; /* The number of cache entries required for a "family" of instructions. */ @@ -242,6 +243,7 @@ static uint8_t cache_requirements[256] = { [BINARY_ADD] = 0, [BINARY_SUBSCR] = 0, [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ + [CALL_FUNCTION] = 1 /* _PyAdaptiveEntry */ }; /* Return the oparg for the cache_offset and instruction index. @@ -452,6 +454,11 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_NON_FUNCTION_SCOPE 11 #define SPEC_FAIL_DIFFERENT_TYPES 12 +/* Calls */ +#define SPEC_FAIL_BUILTIN_FUNCTION 7 +#define SPEC_FAIL_CLASS 8 +#define SPEC_FAIL_PYTHON_FUNCTION 9 + static int specialize_module_load_attr( @@ -1188,3 +1195,57 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) assert(!PyErr_Occurred()); return 0; } + +static int specialize_c_call( + PyObject *callable, int nargs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_BUILTIN_FUNCTION); + return -1; +} + +static int specialize_class_call( + PyObject *callable, int nargs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS); + return -1; +} + +static int specialize_py_call(PyThreadState *tstate, + PyObject *callable, int nargs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYTHON_FUNCTION); + return -1; +} + +int +_Py_Specialize_CallFunction( + PyThreadState *tstate, PyObject *callable, int nargs, + _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *cache0 = &cache->adaptive; + int fail; + if (PyCFunction_CheckExact(callable)) { + fail = specialize_c_call(callable, nargs, instr, cache); + } + else if (PyFunction_Check(callable)) { + fail = specialize_py_call(tstate, callable, nargs, instr, cache); + } + else if (PyType_Check(callable)) { + fail = specialize_class_call(callable, nargs, instr, cache); + } + else { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OTHER); + fail = -1; + } + if (fail) { + STAT_INC(CALL_FUNCTION, specialization_failure); + assert(!PyErr_Occurred()); + cache_backoff(cache0); + } + else { + STAT_INC(CALL_FUNCTION, specialization_success); + assert(!PyErr_Occurred()); + cache0->counter = saturating_start(); + } + return 0; +} From d5add72681b11594f7a919a4a888139976c33b9e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 15 Oct 2021 12:36:54 +0100 Subject: [PATCH 08/12] Specialize for calls to simple Python functions with perfectly matching arguments. --- Include/internal/pycore_code.h | 3 +- Include/opcode.h | 11 +++--- Lib/opcode.py | 1 + Python/ceval.c | 33 +++++++++++++++- Python/opcode_targets.h | 8 ++-- Python/specialize.c | 69 +++++++++++++++++++++++++++------- 6 files changed, 100 insertions(+), 25 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 39586825c19951..071b63f96c2b06 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -17,6 +17,7 @@ typedef struct { uint8_t original_oparg; uint8_t counter; uint16_t index; + uint32_t version; } _PyAdaptiveEntry; @@ -308,7 +309,7 @@ int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNI int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); -int _Py_Specialize_CallFunction(PyThreadState *tstate, PyObject *callable, int nargs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/opcode.h b/Include/opcode.h index abad8c16472532..5d97e864b89ac0 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -164,11 +164,12 @@ extern "C" { #define STORE_ATTR_SLOT 123 #define STORE_ATTR_WITH_HINT 127 #define CALL_FUNCTION_ADAPTIVE 128 -#define LOAD_FAST__LOAD_FAST 134 -#define STORE_FAST__LOAD_FAST 140 -#define LOAD_FAST__LOAD_CONST 143 -#define LOAD_CONST__LOAD_FAST 149 -#define STORE_FAST__STORE_FAST 150 +#define CALL_FUNCTION_PY_SIMPLE 134 +#define LOAD_FAST__LOAD_FAST 140 +#define STORE_FAST__LOAD_FAST 143 +#define LOAD_FAST__LOAD_CONST 149 +#define LOAD_CONST__LOAD_FAST 150 +#define STORE_FAST__STORE_FAST 151 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index b6ae9cd4e75b45..9d1598bb3dd861 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -248,6 +248,7 @@ def jabs_op(name, op): "STORE_ATTR_SLOT", "STORE_ATTR_WITH_HINT", "CALL_FUNCTION_ADAPTIVE", + "CALL_FUNCTION_PY_SIMPLE", # Super instructions "LOAD_FAST__LOAD_FAST", "STORE_FAST__LOAD_FAST", diff --git a/Python/ceval.c b/Python/ceval.c index 370d1bb130327e..93bd37290626dd 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4663,7 +4663,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr if (cache->adaptive.counter == 0) { PyObject *callable = PEEK(nargs+1); next_instr--; - if (_Py_Specialize_CallFunction(tstate, callable, nargs, next_instr, cache) < 0) { + if (_Py_Specialize_CallFunction(callable, next_instr, nargs, cache) < 0) { goto error; } DISPATCH(); @@ -4678,6 +4678,36 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } } + TARGET(CALL_FUNCTION_PY_SIMPLE) { + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + int argcount = cache0->original_oparg; + PyObject *callable = PEEK(argcount+1); + DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION); + PyFunctionObject *func = (PyFunctionObject *)callable; + DEOPT_IF(func->func_version != cache0->version, CALL_FUNCTION); + /* PEP 523 */ + DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION); + STAT_INC(CALL_FUNCTION, hit); + record_cache_hit(cache0); + InterpreterFrame *new_frame = _PyThreadState_PushFrame( + tstate, PyFunction_AS_FRAME_CONSTRUCTOR(func), NULL); + if (new_frame == NULL) { + goto error; + } + STACK_SHRINK(argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = stack_pointer[i]; + } + STACK_SHRINK(1); + Py_DECREF(func); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = tstate->frame; + new_frame->depth = frame->depth + 1; + tstate->frame = frame = new_frame; + goto start_frame; + } + TARGET(CALL_FUNCTION_EX) { PREDICTED(CALL_FUNCTION_EX); PyObject *func, *callargs, *kwargs = NULL, *result; @@ -4946,6 +4976,7 @@ MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(STORE_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(LOAD_METHOD) +MISS_WITH_CACHE(CALL_FUNCTION) MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR) MISS_WITH_OPARG_COUNTER(BINARY_ADD) diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 23fe2ff4a99dc6..548d81f98e9c32 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -133,24 +133,24 @@ static void *opcode_targets[256] = { &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_CALL_FUNCTION_PY_SIMPLE, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, &&TARGET_MATCH_CLASS, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 803a98a6ab90a4..bf6cb5c71fa4d0 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -457,7 +457,11 @@ _Py_Quicken(PyCodeObject *code) { /* Calls */ #define SPEC_FAIL_BUILTIN_FUNCTION 7 #define SPEC_FAIL_CLASS 8 -#define SPEC_FAIL_PYTHON_FUNCTION 9 +#define SPEC_FAIL_GENERATOR 9 +#define SPEC_FAIL_COMPLEX_PARAMETERS 10 +#define SPEC_FAIL_WRONG_NUMBER_ARGUMENTS 10 +#define SPEC_FAIL_CO_NOT_OPTIMIZED 11 +#define SPEC_FAIL_FREE_VARS 12 static int @@ -1196,42 +1200,79 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) return 0; } -static int specialize_c_call( - PyObject *callable, int nargs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +static int +specialize_c_call( + PyObject *callable, _Py_CODEUNIT *instr, + int nargs, SpecializedCacheEntry *cache) { SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_BUILTIN_FUNCTION); return -1; } -static int specialize_class_call( - PyObject *callable, int nargs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +static int +specialize_class_call( + PyObject *callable, _Py_CODEUNIT *instr, + int nargs, SpecializedCacheEntry *cache) { SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS); return -1; } -static int specialize_py_call(PyThreadState *tstate, - PyObject *callable, int nargs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +static int +specialize_py_call( + PyFunctionObject *func, _Py_CODEUNIT *instr, + int nargs, SpecializedCacheEntry *cache) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYTHON_FUNCTION); - return -1; + /* Exclude generator or coroutines for now */ + PyCodeObject *code = (PyCodeObject *)func->func_code; + int flags = code->co_flags; + if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR); + return -1; + } + if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS); + return -1; + } + if (code->co_argcount != nargs) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return -1; + } + if ((flags & CO_OPTIMIZED) == 0) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED); + return -1; + } + if (code->co_nfreevars) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS); + return -1; + } + _PyAdaptiveEntry *cache0 = &cache->adaptive; + int version = _PyFunction_GetVersionForCurrentState(func); + if (version == 0) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_VERSIONS); + return -1; + } + cache0->version = version; + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_PY_SIMPLE, _Py_OPARG(*instr)); + return 0; } + int _Py_Specialize_CallFunction( - PyThreadState *tstate, PyObject *callable, int nargs, - _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) + PyObject *callable, _Py_CODEUNIT *instr, + int nargs, SpecializedCacheEntry *cache) { _PyAdaptiveEntry *cache0 = &cache->adaptive; int fail; if (PyCFunction_CheckExact(callable)) { - fail = specialize_c_call(callable, nargs, instr, cache); + fail = specialize_c_call(callable, instr, nargs, cache); } else if (PyFunction_Check(callable)) { - fail = specialize_py_call(tstate, callable, nargs, instr, cache); + fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, cache); } else if (PyType_Check(callable)) { - fail = specialize_class_call(callable, nargs, instr, cache); + fail = specialize_class_call(callable, instr, nargs, cache); } else { SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OTHER); From 1ef71324ea9251ad71fd9cdefe563044d75f0938 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 15 Oct 2021 12:41:44 +0100 Subject: [PATCH 09/12] Add CALL_FUNCTION to stats. --- Python/specialize.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/specialize.c b/Python/specialize.c index bf6cb5c71fa4d0..4ec7fc4cdf321d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -126,6 +126,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, BINARY_ADD, "binary_add"); err += add_stat_dict(stats, BINARY_SUBSCR, "binary_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); + err += add_stat_dict(stats, CALL_FUNCTION, "call_function"); if (err < 0) { Py_DECREF(stats); return NULL; @@ -182,6 +183,7 @@ _Py_PrintSpecializationStats(void) print_stats(out, &_specialization_stats[BINARY_ADD], "binary_add"); print_stats(out, &_specialization_stats[BINARY_SUBSCR], "binary_subscr"); print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); + print_stats(out, &_specialization_stats[CALL_FUNCTION], "call_function"); if (out != stderr) { fclose(out); } From 5dbedd8a8c640999ce6e6a0b816cb4cc0f74093f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 18 Oct 2021 12:38:15 +0100 Subject: [PATCH 10/12] Handle default arguments in CALL_FUNCTION_PY_SIMPLE. --- Include/internal/pycore_code.h | 8 +++++++- Python/ceval.c | 9 ++++++++- Python/specialize.c | 31 +++++++++++++++++++++++-------- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 071b63f96c2b06..3fc636634c4e51 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -17,7 +17,6 @@ typedef struct { uint8_t original_oparg; uint8_t counter; uint16_t index; - uint32_t version; } _PyAdaptiveEntry; @@ -36,6 +35,12 @@ typedef struct { PyObject *obj; } _PyObjectCache; +typedef struct { + uint32_t func_version; + uint16_t defaults_start; + uint16_t defaults_len; +} _PyCallCache; + /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 @@ -52,6 +57,7 @@ typedef union { _PyAttrCache attr; _PyLoadGlobalCache load_global; _PyObjectCache obj; + _PyCallCache call; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) diff --git a/Python/ceval.c b/Python/ceval.c index 93bd37290626dd..9994bef200440a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4682,10 +4682,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; int argcount = cache0->original_oparg; + _PyCallCache *cache1 = &caches[-1].call; PyObject *callable = PEEK(argcount+1); DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION); PyFunctionObject *func = (PyFunctionObject *)callable; - DEOPT_IF(func->func_version != cache0->version, CALL_FUNCTION); + DEOPT_IF(func->func_version != cache1->func_version, CALL_FUNCTION); /* PEP 523 */ DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION); STAT_INC(CALL_FUNCTION, hit); @@ -4700,6 +4701,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr new_frame->localsplus[i] = stack_pointer[i]; } STACK_SHRINK(1); + int deflen = cache1->defaults_len; + for (int i = 0; i < deflen; i++) { + PyObject *def = PyTuple_GET_ITEM(func->func_defaults, cache1->defaults_start+i); + Py_INCREF(def); + new_frame->localsplus[argcount+i] = def; + } Py_DECREF(func); _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->previous = tstate->frame; diff --git a/Python/specialize.c b/Python/specialize.c index 4ec7fc4cdf321d..0a2311c969b61e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -245,7 +245,7 @@ static uint8_t cache_requirements[256] = { [BINARY_ADD] = 0, [BINARY_SUBSCR] = 0, [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ - [CALL_FUNCTION] = 1 /* _PyAdaptiveEntry */ + [CALL_FUNCTION] = 2 /* _PyAdaptiveEntry and _PyCallCache */ }; /* Return the oparg for the cache_offset and instruction index. @@ -1225,6 +1225,7 @@ specialize_py_call( PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache) { + _PyCallCache *cache1 = &cache[-1].call; /* Exclude generator or coroutines for now */ PyCodeObject *code = (PyCodeObject *)func->func_code; int flags = code->co_flags; @@ -1236,10 +1237,6 @@ specialize_py_call( SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS); return -1; } - if (code->co_argcount != nargs) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return -1; - } if ((flags & CO_OPTIMIZED) == 0) { SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED); return -1; @@ -1248,13 +1245,31 @@ specialize_py_call( SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS); return -1; } - _PyAdaptiveEntry *cache0 = &cache->adaptive; + int argcount = code->co_argcount; + int defcount = func->func_defaults == NULL ? 0 : (int)PyTuple_GET_SIZE(func->func_defaults); + assert(defcount <= argcount); + int min_args = argcount-defcount; + if (nargs > argcount || nargs < min_args) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return -1; + } + assert(nargs <= argcount && nargs >= min_args); + int defstart = nargs - min_args; + int deflen = argcount - nargs; + assert(defstart >= 0 && deflen >= 0); + assert(deflen == 0 || func->func_defaults != NULL); + if (defstart > 0xffff || deflen > 0xffff) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE); + return -1; + } int version = _PyFunction_GetVersionForCurrentState(func); if (version == 0) { SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_VERSIONS); return -1; } - cache0->version = version; + cache1->func_version = version; + cache1->defaults_start = defstart; + cache1->defaults_len = deflen; *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_PY_SIMPLE, _Py_OPARG(*instr)); return 0; } @@ -1265,7 +1280,6 @@ _Py_Specialize_CallFunction( PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; int fail; if (PyCFunction_CheckExact(callable)) { fail = specialize_c_call(callable, instr, nargs, cache); @@ -1280,6 +1294,7 @@ _Py_Specialize_CallFunction( SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OTHER); fail = -1; } + _PyAdaptiveEntry *cache0 = &cache->adaptive; if (fail) { STAT_INC(CALL_FUNCTION, specialization_failure); assert(!PyErr_Occurred()); From b844d5546508d85b691cd8d84d5933eff9ba1f23 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 20 Oct 2021 11:57:40 +0100 Subject: [PATCH 11/12] Add NEWS. --- .../Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst new file mode 100644 index 00000000000000..e1ab98a32849c1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst @@ -0,0 +1,2 @@ +Specialize simple calls to Python functions (no * or *args, no closure, +etc.) From 6f1bfab39a20961ef5ce0170f90b1c2bfea766de Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 20 Oct 2021 12:09:41 +0100 Subject: [PATCH 12/12] Tidy up news item. --- .../Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst index e1ab98a32849c1..6ab1d05603db87 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst @@ -1,2 +1 @@ -Specialize simple calls to Python functions (no * or *args, no closure, -etc.) +Specialize simple calls to Python functions (no starargs, keyowrd dict, or closure)