Skip to content

Commit 8857baa

Browse files
committed
bpo-36616: optimize handling of thread state in function call code
Thanks to Mark Shannon for the idea
1 parent f1464f4 commit 8857baa

File tree

1 file changed

+109
-94
lines changed

1 file changed

+109
-94
lines changed

Python/ceval.c

Lines changed: 109 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,10 @@ extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **);
3636
typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
3737

3838
/* Forward declarations */
39-
Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t,
40-
PyObject *);
41-
static PyObject * do_call_core(PyObject *, PyObject *, PyObject *);
39+
static PyObject * profile_call(PyThreadState *, PyObject *,
40+
PyObject *, PyObject *);
41+
Py_LOCAL_INLINE(PyObject *) call_function(PyThreadState *,
42+
PyObject ***, Py_ssize_t, PyObject *);
4243

4344
#ifdef LLTRACE
4445
static int lltrace;
@@ -3241,9 +3242,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
32413242

32423243
case TARGET(CALL_METHOD): {
32433244
/* Designed to work in tamdem with LOAD_METHOD. */
3244-
PyObject **sp, *res, *meth;
3245-
3246-
sp = stack_pointer;
3245+
PyObject *res, *meth;
32473246

32483247
meth = PEEK(oparg + 2);
32493248
if (meth == NULL) {
@@ -3261,8 +3260,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
32613260
`callable` will be POPed by call_function.
32623261
NULL will will be POPed manually later.
32633262
*/
3264-
res = call_function(&sp, oparg, NULL);
3265-
stack_pointer = sp;
3263+
res = call_function(tstate, &stack_pointer, oparg, NULL);
32663264
(void)POP(); /* POP the NULL. */
32673265
}
32683266
else {
@@ -3278,8 +3276,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
32783276
We'll be passing `oparg + 1` to call_function, to
32793277
make it accept the `self` as a first argument.
32803278
*/
3281-
res = call_function(&sp, oparg + 1, NULL);
3282-
stack_pointer = sp;
3279+
res = call_function(tstate, &stack_pointer, oparg + 1, NULL);
32833280
}
32843281

32853282
PUSH(res);
@@ -3290,10 +3287,8 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
32903287

32913288
case TARGET(CALL_FUNCTION): {
32923289
PREDICTED(CALL_FUNCTION);
3293-
PyObject **sp, *res;
3294-
sp = stack_pointer;
3295-
res = call_function(&sp, oparg, NULL);
3296-
stack_pointer = sp;
3290+
PyObject *res;
3291+
res = call_function(tstate, &stack_pointer, oparg, NULL);
32973292
PUSH(res);
32983293
if (res == NULL) {
32993294
goto error;
@@ -3302,13 +3297,11 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
33023297
}
33033298

33043299
case TARGET(CALL_FUNCTION_KW): {
3305-
PyObject **sp, *res, *names;
3300+
PyObject *res, *names;
33063301

33073302
names = POP();
33083303
assert(PyTuple_CheckExact(names) && PyTuple_GET_SIZE(names) <= oparg);
3309-
sp = stack_pointer;
3310-
res = call_function(&sp, oparg, names);
3311-
stack_pointer = sp;
3304+
res = call_function(tstate, &stack_pointer, oparg, names);
33123305
PUSH(res);
33133306
Py_DECREF(names);
33143307

@@ -3351,7 +3344,18 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
33513344
}
33523345
assert(PyTuple_CheckExact(callargs));
33533346

3354-
result = do_call_core(func, callargs, kwargs);
3347+
if (tstate->use_tracing) {
3348+
result = profile_call(tstate, func, callargs, kwargs);
3349+
}
3350+
else if (PyCFunction_Check(func)) {
3351+
result = _PyCFunction_FastCallDict(func,
3352+
_PyTuple_ITEMS(callargs),
3353+
PyTuple_GET_SIZE(callargs),
3354+
kwargs);
3355+
}
3356+
else {
3357+
result = PyObject_Call(func, callargs, kwargs);
3358+
}
33553359
Py_DECREF(func);
33563360
Py_DECREF(callargs);
33573361
Py_XDECREF(kwargs);
@@ -4624,7 +4628,7 @@ PyEval_GetFuncDesc(PyObject *func)
46244628
}
46254629

46264630
#define C_TRACE(x, call) \
4627-
if (tstate->use_tracing && tstate->c_profilefunc) { \
4631+
if (tstate->c_profilefunc) { \
46284632
if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
46294633
tstate, tstate->frame, \
46304634
PyTrace_C_CALL, func)) { \
@@ -4652,55 +4656,103 @@ if (tstate->use_tracing && tstate->c_profilefunc) { \
46524656
} \
46534657
} else { \
46544658
x = call; \
4659+
}
4660+
4661+
4662+
/* Call function when profiling is enabled */
4663+
_Py_NO_INLINE static PyObject *
4664+
profile_fastcall(PyThreadState *tstate, PyObject *func,
4665+
PyObject *const *argv, Py_ssize_t nargs,
4666+
PyObject *kwnames)
4667+
{
4668+
PyObject *result;
4669+
if (PyCFunction_Check(func)) {
4670+
C_TRACE(result, _PyCFunction_FastCallKeywords(func, argv, nargs, kwnames));
4671+
return result;
46554672
}
4673+
else if (Py_TYPE(func) == &PyMethodDescr_Type && nargs > 0) {
4674+
/* We need to create a temporary bound method as argument
4675+
for profiling.
4676+
4677+
If nargs == 0, then this cannot work because we have no
4678+
"self". In any case, the call itself would raise
4679+
TypeError (foo needs an argument), so we just skip
4680+
profiling. */
4681+
PyObject *self = *(argv++);
4682+
nargs--;
4683+
func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
4684+
if (func == NULL) {
4685+
return NULL;
4686+
}
4687+
C_TRACE(result, _PyCFunction_FastCallKeywords(func,
4688+
argv, nargs,
4689+
kwnames));
4690+
Py_DECREF(func);
4691+
return result;
4692+
}
4693+
return _PyObject_FastCallKeywords(func, argv, nargs, kwnames);
4694+
}
4695+
4696+
/* Call function when profiling is enabled */
4697+
_Py_NO_INLINE static PyObject *
4698+
profile_call(PyThreadState *tstate, PyObject *func,
4699+
PyObject *args, PyObject *kwdict)
4700+
{
4701+
PyObject *result;
4702+
PyObject * const* argv = _PyTuple_ITEMS(args);
4703+
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
4704+
4705+
if (PyCFunction_Check(func)) {
4706+
C_TRACE(result, _PyCFunction_FastCallDict(func, argv, nargs, kwdict));
4707+
return result;
4708+
}
4709+
else if (Py_TYPE(func) == &PyMethodDescr_Type && nargs > 0) {
4710+
/* We need to create a temporary bound method as argument
4711+
for profiling.
4712+
4713+
If nargs == 0, then this cannot work because we have no
4714+
"self". In any case, the call itself would raise
4715+
TypeError (foo needs an argument), so we just skip
4716+
profiling. */
4717+
PyObject *self = *(argv++);
4718+
nargs--;
4719+
func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
4720+
if (func == NULL) {
4721+
return NULL;
4722+
}
4723+
C_TRACE(result, _PyCFunction_FastCallDict(func,
4724+
argv, nargs,
4725+
kwdict));
4726+
Py_DECREF(func);
4727+
return result;
4728+
}
4729+
return PyObject_Call(func, args, kwdict);
4730+
}
4731+
46564732

46574733
/* Issue #29227: Inline call_function() into _PyEval_EvalFrameDefault()
46584734
to reduce the stack consumption. */
46594735
Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION
4660-
call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
4736+
call_function(PyThreadState *tstate, PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
46614737
{
4662-
PyObject **pfunc = (*pp_stack) - oparg - 1;
4738+
PyObject **argv = (*pp_stack) - oparg;
4739+
PyObject **pfunc = argv - 1;
46634740
PyObject *func = *pfunc;
46644741
PyObject *x, *w;
46654742
Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
46664743
Py_ssize_t nargs = oparg - nkwargs;
4667-
PyObject **stack = (*pp_stack) - nargs - nkwargs;
46684744

4669-
/* Always dispatch PyCFunction first, because these are
4670-
presumed to be the most frequent callable object.
4671-
*/
4672-
if (PyCFunction_Check(func)) {
4673-
PyThreadState *tstate = _PyThreadState_GET();
4674-
C_TRACE(x, _PyCFunction_FastCallKeywords(func, stack, nargs, kwnames));
4745+
if (tstate->use_tracing) {
4746+
x = profile_fastcall(tstate, func, argv, nargs, kwnames);
4747+
}
4748+
else if (PyCFunction_Check(func)) {
4749+
x = _PyCFunction_FastCallKeywords(func, argv, nargs, kwnames);
46754750
}
46764751
else if (Py_TYPE(func) == &PyMethodDescr_Type) {
4677-
PyThreadState *tstate = _PyThreadState_GET();
4678-
if (nargs > 0 && tstate->use_tracing) {
4679-
/* We need to create a temporary bound method as argument
4680-
for profiling.
4681-
4682-
If nargs == 0, then this cannot work because we have no
4683-
"self". In any case, the call itself would raise
4684-
TypeError (foo needs an argument), so we just skip
4685-
profiling. */
4686-
PyObject *self = stack[0];
4687-
func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
4688-
if (func != NULL) {
4689-
C_TRACE(x, _PyCFunction_FastCallKeywords(func,
4690-
stack+1, nargs-1,
4691-
kwnames));
4692-
Py_DECREF(func);
4693-
}
4694-
else {
4695-
x = NULL;
4696-
}
4697-
}
4698-
else {
4699-
x = _PyMethodDescr_FastCallKeywords(func, stack, nargs, kwnames);
4700-
}
4752+
x = _PyMethodDescr_FastCallKeywords(func, argv, nargs, kwnames);
47014753
}
47024754
else {
4703-
if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
4755+
if (PyMethod_Check(func)) {
47044756
/* Optimize access to bound methods. Reuse the Python stack
47054757
to pass 'self' as the first argument, replace 'func'
47064758
with 'self'. It avoids the creation of a new temporary tuple
@@ -4712,17 +4764,17 @@ call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
47124764
Py_INCREF(func);
47134765
Py_SETREF(*pfunc, self);
47144766
nargs++;
4715-
stack--;
4767+
argv--;
47164768
}
47174769
else {
47184770
Py_INCREF(func);
47194771
}
47204772

47214773
if (PyFunction_Check(func)) {
4722-
x = _PyFunction_FastCallKeywords(func, stack, nargs, kwnames);
4774+
x = _PyFunction_FastCallKeywords(func, argv, nargs, kwnames);
47234775
}
47244776
else {
4725-
x = _PyObject_FastCallKeywords(func, stack, nargs, kwnames);
4777+
x = _PyObject_FastCallKeywords(func, argv, nargs, kwnames);
47264778
}
47274779
Py_DECREF(func);
47284780
}
@@ -4738,43 +4790,6 @@ call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
47384790
return x;
47394791
}
47404792

4741-
static PyObject *
4742-
do_call_core(PyObject *func, PyObject *callargs, PyObject *kwdict)
4743-
{
4744-
PyObject *result;
4745-
4746-
if (PyCFunction_Check(func)) {
4747-
PyThreadState *tstate = _PyThreadState_GET();
4748-
C_TRACE(result, PyCFunction_Call(func, callargs, kwdict));
4749-
return result;
4750-
}
4751-
else if (Py_TYPE(func) == &PyMethodDescr_Type) {
4752-
PyThreadState *tstate = _PyThreadState_GET();
4753-
Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
4754-
if (nargs > 0 && tstate->use_tracing) {
4755-
/* We need to create a temporary bound method as argument
4756-
for profiling.
4757-
4758-
If nargs == 0, then this cannot work because we have no
4759-
"self". In any case, the call itself would raise
4760-
TypeError (foo needs an argument), so we just skip
4761-
profiling. */
4762-
PyObject *self = PyTuple_GET_ITEM(callargs, 0);
4763-
func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
4764-
if (func == NULL) {
4765-
return NULL;
4766-
}
4767-
4768-
C_TRACE(result, _PyCFunction_FastCallDict(func,
4769-
&_PyTuple_ITEMS(callargs)[1],
4770-
nargs - 1,
4771-
kwdict));
4772-
Py_DECREF(func);
4773-
return result;
4774-
}
4775-
}
4776-
return PyObject_Call(func, callargs, kwdict);
4777-
}
47784793

47794794
/* Extract a slice index from a PyLong or an object with the
47804795
nb_index slot defined, and store in *pi.

0 commit comments

Comments
 (0)