From 65349e5bad19f1f3bae85d9fe95b00d1e4040fd6 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 13 Apr 2021 11:20:26 +0100 Subject: [PATCH 01/21] Remove 'zombie' frames. We won't need them once we are allocating fixed-size frames. --- Include/cpython/code.h | 1 - Lib/test/test_gdb.py | 11 ++++---- Objects/codeobject.c | 3 --- Objects/frameobject.c | 61 +++++++++++------------------------------- 4 files changed, 21 insertions(+), 55 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 330f1f54d15203..1b45d5269dec82 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -41,7 +41,6 @@ struct PyCodeObject { PyObject *co_linetable; /* string (encoding addr<->lineno mapping) See Objects/lnotab_notes.txt for details. */ PyObject *co_exceptiontable; /* Byte string encoding exception handling table */ - void *co_zombieframe; /* for optimization only (see frameobject.c) */ PyObject *co_weakreflist; /* to support weakrefs to code objects */ /* Scratch space for extra data relating to the code object. Type is a void* to keep the format private in codeobject.c to force diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py index 22c75bae987219..7bdef25c76384a 100644 --- a/Lib/test/test_gdb.py +++ b/Lib/test/test_gdb.py @@ -666,15 +666,16 @@ def test_builtin_method(self): def test_frames(self): gdb_output = self.get_stack_trace(''' +import sys def foo(a, b, c): - pass + return sys._getframe(0) -foo(3, 4, 5) -id(foo.__code__)''', +f = foo(3, 4, 5) +id(f)''', breakpoint='builtin_id', - cmds_after_breakpoint=['print (PyFrameObject*)(((PyCodeObject*)v)->co_zombieframe)'] + cmds_after_breakpoint=['print (PyFrameObject*)v'] ) - self.assertTrue(re.match(r'.*\s+\$1 =\s+Frame 0x-?[0-9a-f]+, for file , line 3, in foo \(\)\s+.*', + self.assertTrue(re.match(r'.*\s+\$1 =\s+Frame 0x-?[0-9a-f]+, for file , line 4, in foo \(a=3.*', gdb_output, re.DOTALL), 'Unexpected gdb representation: %r\n%s' % (gdb_output, gdb_output)) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index e981e39aaf1240..bd6b439bbea378 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -263,7 +263,6 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, co->co_linetable = linetable; Py_INCREF(exceptiontable); co->co_exceptiontable = exceptiontable; - co->co_zombieframe = NULL; co->co_weakreflist = NULL; co->co_extra = NULL; @@ -674,8 +673,6 @@ code_dealloc(PyCodeObject *co) Py_XDECREF(co->co_exceptiontable); if (co->co_cell2arg != NULL) PyMem_Free(co->co_cell2arg); - if (co->co_zombieframe != NULL) - PyObject_GC_Del(co->co_zombieframe); if (co->co_weakreflist != NULL) PyObject_ClearWeakRefs((PyObject*)co); PyObject_Free(co); diff --git a/Objects/frameobject.c b/Objects/frameobject.c index ae8cdcfb92d6b2..b1a100d8bd3181 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -558,29 +558,10 @@ static PyGetSetDef frame_getsetlist[] = { }; /* Stack frames are allocated and deallocated at a considerable rate. - In an attempt to improve the speed of function calls, we: - - 1. Hold a single "zombie" frame on each code object. This retains - the allocated and initialised frame object from an invocation of - the code object. The zombie is reanimated the next time we need a - frame object for that code object. Doing this saves the malloc/ - realloc required when using a free_list frame that isn't the - correct size. It also saves some field initialisation. - - In zombie mode, no field of PyFrameObject holds a reference, but - the following fields are still valid: - - * ob_type, ob_size, f_code, f_valuestack; - - * f_locals, f_trace are NULL; - - * f_localsplus does not require re-allocation and - the local variables in f_localsplus are NULL. - - 2. We also maintain a separate free list of stack frames (just like - floats are allocated in a special way -- see floatobject.c). When - a stack frame is on the free list, only the following members have - a meaning: + In an attempt to improve the speed of function calls, we maintain + a separate free list of stack frames (just like floats are + allocated in a special way -- see floatobject.c). When a stack + frame is on the free list, only the following members have a meaning: ob_type == &Frametype f_back next item on free list, or NULL f_stacksize size of value stack @@ -628,23 +609,18 @@ frame_dealloc(PyFrameObject *f) Py_CLEAR(f->f_trace); PyCodeObject *co = f->f_code; - if (co->co_zombieframe == NULL) { - co->co_zombieframe = f; - } - else { - struct _Py_frame_state *state = get_frame_state(); + struct _Py_frame_state *state = get_frame_state(); #ifdef Py_DEBUG - // frame_dealloc() must not be called after _PyFrame_Fini() - assert(state->numfree != -1); + // frame_dealloc() must not be called after _PyFrame_Fini() + assert(state->numfree != -1); #endif - if (state->numfree < PyFrame_MAXFREELIST) { - ++state->numfree; - f->f_back = state->free_list; - state->free_list = f; - } - else { - PyObject_GC_Del(f); - } + if (state->numfree < PyFrame_MAXFREELIST) { + ++state->numfree; + f->f_back = state->free_list; + state->free_list = f; + } + else { + PyObject_GC_Del(f); } Py_DECREF(co); @@ -804,14 +780,7 @@ _Py_IDENTIFIER(__builtins__); static inline PyFrameObject* frame_alloc(PyCodeObject *code) { - PyFrameObject *f = code->co_zombieframe; - if (f != NULL) { - code->co_zombieframe = NULL; - _Py_NewReference((PyObject *)f); - assert(f->f_code == code); - return f; - } - + PyFrameObject *f; Py_ssize_t ncells = PyTuple_GET_SIZE(code->co_cellvars); Py_ssize_t nfrees = PyTuple_GET_SIZE(code->co_freevars); Py_ssize_t extras = code->co_stacksize + code->co_nlocals + ncells + nfrees; From c1ac3ef2a2a76dda219ada9aec074c41b4e8379b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 13 Apr 2021 12:23:06 +0100 Subject: [PATCH 02/21] Add co_nlocalplus field to code object to avoid recomputing size of locals + frees + cells. --- Include/cpython/code.h | 1 + Objects/codeobject.c | 2 ++ Objects/frameobject.c | 20 +++++--------------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 1b45d5269dec82..575a4b72b2e0bc 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -40,6 +40,7 @@ struct PyCodeObject { PyObject *co_name; /* unicode (name, for reference) */ PyObject *co_linetable; /* string (encoding addr<->lineno mapping) See Objects/lnotab_notes.txt for details. */ + int co_nlocalsplus; /* Number of locals + free + cell variables */ PyObject *co_exceptiontable; /* Byte string encoding exception handling table */ PyObject *co_weakreflist; /* to support weakrefs to code objects */ /* Scratch space for extra data relating to the code object. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index bd6b439bbea378..0df1cde2ef66b8 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -239,6 +239,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, co->co_posonlyargcount = posonlyargcount; co->co_kwonlyargcount = kwonlyargcount; co->co_nlocals = nlocals; + co->co_nlocalsplus = nlocals + + PyTuple_GET_SIZE(freevars) + PyTuple_GET_SIZE(cellvars); co->co_stacksize = stacksize; co->co_flags = flags; Py_INCREF(code); diff --git a/Objects/frameobject.c b/Objects/frameobject.c index b1a100d8bd3181..4faa1fa2368a42 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -631,9 +631,7 @@ static inline Py_ssize_t frame_nslots(PyFrameObject *frame) { PyCodeObject *code = frame->f_code; - return (code->co_nlocals - + PyTuple_GET_SIZE(code->co_cellvars) - + PyTuple_GET_SIZE(code->co_freevars)); + return code->co_nlocalsplus; } static int @@ -707,14 +705,10 @@ PyDoc_STRVAR(clear__doc__, static PyObject * frame_sizeof(PyFrameObject *f, PyObject *Py_UNUSED(ignored)) { - Py_ssize_t res, extras, ncells, nfrees; - + Py_ssize_t res; PyCodeObject *code = f->f_code; - ncells = PyTuple_GET_SIZE(code->co_cellvars); - nfrees = PyTuple_GET_SIZE(code->co_freevars); - extras = code->co_stacksize + code->co_nlocals + ncells + nfrees; /* subtract one as it is already included in PyFrameObject */ - res = sizeof(PyFrameObject) + (extras-1) * sizeof(PyObject *); + res = sizeof(PyFrameObject) + (code->co_nlocalsplus+code->co_stacksize-1) * sizeof(PyObject *); return PyLong_FromSsize_t(res); } @@ -781,9 +775,7 @@ static inline PyFrameObject* frame_alloc(PyCodeObject *code) { PyFrameObject *f; - Py_ssize_t ncells = PyTuple_GET_SIZE(code->co_cellvars); - Py_ssize_t nfrees = PyTuple_GET_SIZE(code->co_freevars); - Py_ssize_t extras = code->co_stacksize + code->co_nlocals + ncells + nfrees; + Py_ssize_t extras = code->co_nlocalsplus + code->co_stacksize; struct _Py_frame_state *state = get_frame_state(); if (state->free_list == NULL) { @@ -811,9 +803,7 @@ frame_alloc(PyCodeObject *code) } _Py_NewReference((PyObject *)f); } - - extras = code->co_nlocals + ncells + nfrees; - f->f_valuestack = f->f_localsplus + extras; + f->f_valuestack = f->f_localsplus + code->co_nlocalsplus; for (Py_ssize_t i=0; i < extras; i++) { f->f_localsplus[i] = NULL; } From 431e1cbef279274d0a805c07709b7eb34f63305d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 13 Apr 2021 13:57:29 +0100 Subject: [PATCH 03/21] Move locals, cells and freevars out of frame object into separate memory buffer. --- Include/cpython/frameobject.h | 5 ++-- Objects/frameobject.c | 55 ++++++++++++++++++----------------- Objects/typeobject.c | 6 ++-- Python/ceval.c | 12 ++++---- Tools/gdb/libpython.py | 2 +- 5 files changed, 42 insertions(+), 38 deletions(-) diff --git a/Include/cpython/frameobject.h b/Include/cpython/frameobject.h index 581664775cdedc..434b4cc598b6d3 100644 --- a/Include/cpython/frameobject.h +++ b/Include/cpython/frameobject.h @@ -20,7 +20,7 @@ enum _framestate { typedef signed char PyFrameState; struct _frame { - PyObject_VAR_HEAD + PyObject_HEAD struct _frame *f_back; /* previous frame, or NULL */ PyCodeObject *f_code; /* code segment */ PyObject *f_builtins; /* builtin symbol table (PyDictObject) */ @@ -36,7 +36,8 @@ struct _frame { PyFrameState f_state; /* What state the frame is in */ char f_trace_lines; /* Emit per-line trace events? */ char f_trace_opcodes; /* Emit per-opcode trace events? */ - PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */ + char f_own_locals_memory; /* This frame owns the memory for the locals */ + PyObject **f_localsptr; /* Pointer to locals, cells, free */ }; static inline int _PyFrame_IsRunnable(struct _frame *f) { diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 4faa1fa2368a42..8e3dc6d34cb498 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -590,25 +590,28 @@ frame_dealloc(PyFrameObject *f) } Py_TRASHCAN_SAFE_BEGIN(f) + PyCodeObject *co = f->f_code; + /* Kill all local variables */ - PyObject **valuestack = f->f_valuestack; - for (PyObject **p = f->f_localsplus; p < valuestack; p++) { - Py_CLEAR(*p); + for (int i = 0; i < co->co_nlocalsplus; i++) { + Py_CLEAR(f->f_localsptr[i]); } - /* Free stack */ + /* Free items on stack */ for (int i = 0; i < f->f_stackdepth; i++) { Py_XDECREF(f->f_valuestack[i]); } f->f_stackdepth = 0; - Py_XDECREF(f->f_back); Py_DECREF(f->f_builtins); Py_DECREF(f->f_globals); Py_CLEAR(f->f_locals); Py_CLEAR(f->f_trace); - - PyCodeObject *co = f->f_code; + if (f->f_own_locals_memory) { + PyMem_Free(f->f_localsptr); + f->f_localsptr = NULL; + f->f_own_locals_memory = 0; + } struct _Py_frame_state *state = get_frame_state(); #ifdef Py_DEBUG // frame_dealloc() must not be called after _PyFrame_Fini() @@ -645,7 +648,7 @@ frame_traverse(PyFrameObject *f, visitproc visit, void *arg) Py_VISIT(f->f_trace); /* locals */ - PyObject **fastlocals = f->f_localsplus; + PyObject **fastlocals = f->f_localsptr; for (Py_ssize_t i = frame_nslots(f); --i >= 0; ++fastlocals) { Py_VISIT(*fastlocals); } @@ -670,7 +673,7 @@ frame_tp_clear(PyFrameObject *f) Py_CLEAR(f->f_trace); /* locals */ - PyObject **fastlocals = f->f_localsplus; + PyObject **fastlocals = f->f_localsptr; for (Py_ssize_t i = frame_nslots(f); --i >= 0; ++fastlocals) { Py_CLEAR(*fastlocals); } @@ -708,8 +711,10 @@ frame_sizeof(PyFrameObject *f, PyObject *Py_UNUSED(ignored)) Py_ssize_t res; PyCodeObject *code = f->f_code; /* subtract one as it is already included in PyFrameObject */ - res = sizeof(PyFrameObject) + (code->co_nlocalsplus+code->co_stacksize-1) * sizeof(PyObject *); - + res = sizeof(PyFrameObject); + if (f->f_own_locals_memory) { + res += (code->co_nlocalsplus+code->co_stacksize) * sizeof(PyObject *); + } return PyLong_FromSsize_t(res); } @@ -775,11 +780,15 @@ static inline PyFrameObject* frame_alloc(PyCodeObject *code) { PyFrameObject *f; - Py_ssize_t extras = code->co_nlocalsplus + code->co_stacksize; + PyObject **locals = PyMem_Malloc(sizeof(PyObject *)*(code->co_nlocalsplus+code->co_stacksize)); + if (locals == NULL) { + PyErr_NoMemory(); + return NULL; + } struct _Py_frame_state *state = get_frame_state(); if (state->free_list == NULL) { - f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras); + f = PyObject_GC_New(PyFrameObject, &PyFrame_Type); if (f == NULL) { return NULL; } @@ -793,19 +802,13 @@ frame_alloc(PyCodeObject *code) --state->numfree; f = state->free_list; state->free_list = state->free_list->f_back; - if (Py_SIZE(f) < extras) { - PyFrameObject *new_f = PyObject_GC_Resize(PyFrameObject, f, extras); - if (new_f == NULL) { - PyObject_GC_Del(f); - return NULL; - } - f = new_f; - } _Py_NewReference((PyObject *)f); } - f->f_valuestack = f->f_localsplus + code->co_nlocalsplus; - for (Py_ssize_t i=0; i < extras; i++) { - f->f_localsplus[i] = NULL; + f->f_localsptr = locals; + f->f_own_locals_memory = 1; + f->f_valuestack = f->f_localsptr + code->co_nlocalsplus; + for (Py_ssize_t i=0; i < code->co_nlocalsplus; i++) { + f->f_localsptr[i] = NULL; } return f; } @@ -995,7 +998,7 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f) Py_TYPE(map)->tp_name); return -1; } - fast = f->f_localsplus; + fast = f->f_localsptr; j = PyTuple_GET_SIZE(map); if (j > co->co_nlocals) j = co->co_nlocals; @@ -1059,7 +1062,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear) if (!PyTuple_Check(map)) return; PyErr_Fetch(&error_type, &error_value, &error_traceback); - fast = f->f_localsplus; + fast = f->f_localsptr; j = PyTuple_GET_SIZE(map); if (j > co->co_nlocals) j = co->co_nlocals; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e511cf9ebfc7e8..84be0a1a2f5238 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -8836,14 +8836,14 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co, return -1; } - PyObject *obj = f->f_localsplus[0]; + PyObject *obj = f->f_localsptr[0]; Py_ssize_t i, n; if (obj == NULL && co->co_cell2arg) { /* The first argument might be a cell. */ n = PyTuple_GET_SIZE(co->co_cellvars); for (i = 0; i < n; i++) { if (co->co_cell2arg[i] == 0) { - PyObject *cell = f->f_localsplus[co->co_nlocals + i]; + PyObject *cell = f->f_localsptr[co->co_nlocals + i]; assert(PyCell_Check(cell)); obj = PyCell_GET(cell); break; @@ -8871,7 +8871,7 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co, if (_PyUnicode_EqualToASCIIId(name, &PyId___class__)) { Py_ssize_t index = co->co_nlocals + PyTuple_GET_SIZE(co->co_cellvars) + i; - PyObject *cell = f->f_localsplus[index]; + PyObject *cell = f->f_localsptr[index]; if (cell == NULL || !PyCell_Check(cell)) { PyErr_SetString(PyExc_RuntimeError, "super(): bad __class__ cell"); diff --git a/Python/ceval.c b/Python/ceval.c index 2e15eea48003e0..957c6fc10925ff 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1641,8 +1641,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) names = co->co_names; consts = co->co_consts; - fastlocals = f->f_localsplus; - freevars = f->f_localsplus + co->co_nlocals; + fastlocals = f->f_localsptr; + freevars = f->f_localsptr + co->co_nlocals; assert(PyBytes_Check(co->co_code)); assert(PyBytes_GET_SIZE(co->co_code) <= INT_MAX); assert(PyBytes_GET_SIZE(co->co_code) % sizeof(_Py_CODEUNIT) == 0); @@ -4879,8 +4879,8 @@ _PyEval_MakeFrameVector(PyThreadState *tstate, if (f == NULL) { return NULL; } - PyObject **fastlocals = f->f_localsplus; - PyObject **freevars = f->f_localsplus + co->co_nlocals; + PyObject **fastlocals = f->f_localsptr; + PyObject **freevars = f->f_localsptr + co->co_nlocals; /* Create a dictionary for keyword parameters (**kwags) */ PyObject *kwdict; @@ -6429,14 +6429,14 @@ unicode_concatenate(PyThreadState *tstate, PyObject *v, PyObject *w, switch (opcode) { case STORE_FAST: { - PyObject **fastlocals = f->f_localsplus; + PyObject **fastlocals = f->f_localsptr; if (GETLOCAL(oparg) == v) SETLOCAL(oparg, NULL); break; } case STORE_DEREF: { - PyObject **freevars = (f->f_localsplus + + PyObject **freevars = (f->f_localsptr + f->f_code->co_nlocals); PyObject *c = freevars[oparg]; if (PyCell_GET(c) == v) { diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 270aeb426eb5b3..ae2c1799eb4423 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -879,7 +879,7 @@ def iter_locals(self): if self.is_optimized_out(): return - f_localsplus = self.field('f_localsplus') + f_localsplus = self.field('f_localsptr') for i in safe_range(self.co_nlocals): pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) if not pyop_value.is_null(): From 24a77d8be32ab1cb6908096e8b6094cc9021251f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 13 Apr 2021 16:45:46 +0100 Subject: [PATCH 04/21] Use per-threadstate allocated memory chunks for local variables. Dumb and slow implementation. --- Include/cpython/frameobject.h | 4 +- Include/internal/pycore_pystate.h | 3 ++ Lib/test/test_sys.py | 6 +-- Objects/frameobject.c | 82 ++++++++++++++++++---------- Python/ceval.c | 89 +++++++++++++++++++------------ Python/pystate.c | 25 +++++++++ 6 files changed, 141 insertions(+), 68 deletions(-) diff --git a/Include/cpython/frameobject.h b/Include/cpython/frameobject.h index 434b4cc598b6d3..16257450a39b65 100644 --- a/Include/cpython/frameobject.h +++ b/Include/cpython/frameobject.h @@ -63,7 +63,7 @@ PyAPI_FUNC(PyFrameObject *) PyFrame_New(PyThreadState *, PyCodeObject *, /* only internal use */ PyFrameObject* -_PyFrame_New_NoTrack(PyThreadState *, PyFrameConstructor *, PyObject *); +_PyFrame_New_NoTrack(PyThreadState *, PyFrameConstructor *, PyObject *, PyObject **); /* The rest of the interface is specific for frame objects */ @@ -78,3 +78,5 @@ PyAPI_FUNC(void) PyFrame_FastToLocals(PyFrameObject *); PyAPI_FUNC(void) _PyFrame_DebugMallocStats(FILE *out); PyAPI_FUNC(PyFrameObject *) PyFrame_GetBack(PyFrameObject *frame); + +int _PyFrame_MakeCopyOfLocals(PyFrameObject *f); diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 4b894f3eff4967..6601ce2f80bf7e 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -147,6 +147,9 @@ PyAPI_FUNC(int) _PyState_AddModule( PyAPI_FUNC(int) _PyOS_InterruptOccurred(PyThreadState *tstate); +PyObject **_PyThreadState_PushLocals(PyThreadState *, int size); +void _PyThreadState_PopLocals(PyThreadState *, PyObject **); + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 4f266894bfceef..e497c52e13666d 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1274,11 +1274,7 @@ class C(object): pass # frame import inspect x = inspect.currentframe() - ncells = len(x.f_code.co_cellvars) - nfrees = len(x.f_code.co_freevars) - localsplus = x.f_code.co_stacksize + x.f_code.co_nlocals +\ - ncells + nfrees - check(x, vsize('8P3i3c' + localsplus*'P')) + check(x, size('8P3i4cP')) # function def func(): pass check(func, size('14P')) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 8e3dc6d34cb498..0eda8fd4240f2a 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -593,13 +593,16 @@ frame_dealloc(PyFrameObject *f) PyCodeObject *co = f->f_code; /* Kill all local variables */ - for (int i = 0; i < co->co_nlocalsplus; i++) { - Py_CLEAR(f->f_localsptr[i]); - } - - /* Free items on stack */ - for (int i = 0; i < f->f_stackdepth; i++) { - Py_XDECREF(f->f_valuestack[i]); + if (f->f_own_locals_memory) { + for (int i = 0; i < co->co_nlocalsplus; i++) { + Py_CLEAR(f->f_localsptr[i]); + } + /* Free items on stack */ + for (int i = 0; i < f->f_stackdepth; i++) { + Py_XDECREF(f->f_valuestack[i]); + } + PyMem_Free(f->f_localsptr); + f->f_own_locals_memory = 0; } f->f_stackdepth = 0; Py_XDECREF(f->f_back); @@ -607,11 +610,6 @@ frame_dealloc(PyFrameObject *f) Py_DECREF(f->f_globals); Py_CLEAR(f->f_locals); Py_CLEAR(f->f_trace); - if (f->f_own_locals_memory) { - PyMem_Free(f->f_localsptr); - f->f_localsptr = NULL; - f->f_own_locals_memory = 0; - } struct _Py_frame_state *state = get_frame_state(); #ifdef Py_DEBUG // frame_dealloc() must not be called after _PyFrame_Fini() @@ -709,10 +707,9 @@ static PyObject * frame_sizeof(PyFrameObject *f, PyObject *Py_UNUSED(ignored)) { Py_ssize_t res; - PyCodeObject *code = f->f_code; - /* subtract one as it is already included in PyFrameObject */ res = sizeof(PyFrameObject); if (f->f_own_locals_memory) { + PyCodeObject *code = f->f_code; res += (code->co_nlocalsplus+code->co_stacksize) * sizeof(PyObject *); } return PyLong_FromSsize_t(res); @@ -777,19 +774,32 @@ PyTypeObject PyFrame_Type = { _Py_IDENTIFIER(__builtins__); static inline PyFrameObject* -frame_alloc(PyCodeObject *code) +frame_alloc(PyCodeObject *code, PyObject **localsarray) { + int owns; PyFrameObject *f; - PyObject **locals = PyMem_Malloc(sizeof(PyObject *)*(code->co_nlocalsplus+code->co_stacksize)); - if (locals == NULL) { - PyErr_NoMemory(); - return NULL; + if (localsarray == NULL) { + localsarray = PyMem_Malloc(sizeof(PyObject *)*(code->co_nlocalsplus+code->co_stacksize)); + if (localsarray == NULL) { + PyErr_NoMemory(); + return NULL; + } + for (Py_ssize_t i=0; i < code->co_nlocalsplus; i++) { + localsarray[i] = NULL; + } + owns = 1; + } + else { + owns = 0; } struct _Py_frame_state *state = get_frame_state(); if (state->free_list == NULL) { f = PyObject_GC_New(PyFrameObject, &PyFrame_Type); if (f == NULL) { + if (owns) { + PyMem_Free(localsarray); + } return NULL; } } @@ -804,18 +814,36 @@ frame_alloc(PyCodeObject *code) state->free_list = state->free_list->f_back; _Py_NewReference((PyObject *)f); } - f->f_localsptr = locals; - f->f_own_locals_memory = 1; + f->f_localsptr = localsarray; + f->f_own_locals_memory = owns; f->f_valuestack = f->f_localsptr + code->co_nlocalsplus; - for (Py_ssize_t i=0; i < code->co_nlocalsplus; i++) { - f->f_localsptr[i] = NULL; - } return f; } +int +_PyFrame_MakeCopyOfLocals(PyFrameObject *f) +{ + if (f->f_own_locals_memory) { + return 0; + } + PyObject **copy = PyMem_Malloc(sizeof(PyObject *)*(f->f_code->co_nlocalsplus+f->f_code->co_stacksize)); + if (copy == NULL) { + PyErr_NoMemory(); + return -1; + } + for (int i = 0; i < f->f_code->co_nlocalsplus+f->f_stackdepth; i++) { + PyObject *o = f->f_localsptr[i]; + Py_XINCREF(o); + copy[i] = o; + } + f->f_own_locals_memory = 1; + f->f_localsptr = copy; + f->f_valuestack = f->f_localsptr + f->f_code->co_nlocalsplus; + return 0; +} PyFrameObject* _Py_HOT_FUNCTION -_PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *locals) +_PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *locals, PyObject **localsarray) { assert(con != NULL); assert(con->fc_globals != NULL); @@ -823,7 +851,7 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *l assert(con->fc_code != NULL); assert(locals == NULL || PyMapping_Check(locals)); - PyFrameObject *f = frame_alloc((PyCodeObject *)con->fc_code); + PyFrameObject *f = frame_alloc((PyCodeObject *)con->fc_code, localsarray); if (f == NULL) { return NULL; } @@ -865,7 +893,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, .fc_kwdefaults = NULL, .fc_closure = NULL }; - PyFrameObject *f = _PyFrame_New_NoTrack(tstate, &desc, locals); + PyFrameObject *f = _PyFrame_New_NoTrack(tstate, &desc, locals, NULL); if (f) { _PyObject_GC_TRACK(f); } diff --git a/Python/ceval.c b/Python/ceval.c index 957c6fc10925ff..29c3b7e0b06b51 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4771,10 +4771,6 @@ positional_only_passed_as_keyword(PyThreadState *tstate, PyCodeObject *co, } -/* Exception table parsing code. - * See Objects/exception_table_notes.txt for details. - */ - static inline unsigned char * parse_varint(unsigned char *p, int *result) { int val = p[0] & 63; @@ -4862,25 +4858,14 @@ get_exception_handler(PyCodeObject *code, int index, int *level, int *handler, i return 0; } -PyFrameObject * -_PyEval_MakeFrameVector(PyThreadState *tstate, - PyFrameConstructor *con, PyObject *locals, - PyObject *const *args, Py_ssize_t argcount, - PyObject *kwnames) +static int +initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, + PyObject **fastlocals, PyObject *const *args, + Py_ssize_t argcount, PyObject *kwnames) { - assert(is_tstate_valid(tstate)); - PyCodeObject *co = (PyCodeObject*)con->fc_code; - assert(con->fc_defaults == NULL || PyTuple_CheckExact(con->fc_defaults)); const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount; - - /* Create the frame */ - PyFrameObject *f = _PyFrame_New_NoTrack(tstate, con, locals); - if (f == NULL) { - return NULL; - } - PyObject **fastlocals = f->f_localsptr; - PyObject **freevars = f->f_localsptr + co->co_nlocals; + PyObject **freevars = fastlocals + co->co_nlocals; /* Create a dictionary for keyword parameters (**kwags) */ PyObject *kwdict; @@ -5086,25 +5071,33 @@ _PyEval_MakeFrameVector(PyThreadState *tstate, freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o; } - return f; + return 0; fail: /* Jump here from prelude on failure */ + return -1; - /* decref'ing the frame can cause __del__ methods to get invoked, - which can call back into Python. While we're done with the - current Python frame (f), the associated C stack is still in use, - so recursion_depth must be boosted for the duration. - */ - if (Py_REFCNT(f) > 1) { - Py_DECREF(f); - _PyObject_GC_TRACK(f); +} + + +PyFrameObject * +_PyEval_MakeFrameVector(PyThreadState *tstate, + PyFrameConstructor *con, PyObject *locals, + PyObject *const *args, Py_ssize_t argcount, + PyObject *kwnames, PyObject** localsarray) +{ + assert(is_tstate_valid(tstate)); + assert(con->fc_defaults == NULL || PyTuple_CheckExact(con->fc_defaults)); + + /* Create the frame */ + PyFrameObject *f = _PyFrame_New_NoTrack(tstate, con, locals, localsarray); + if (f == NULL) { + return NULL; } - else { - ++tstate->recursion_depth; + if (initialize_locals(tstate, con, f->f_localsptr, args, argcount, kwnames)) { Py_DECREF(f); - --tstate->recursion_depth; + return NULL; } - return NULL; + return f; } static PyObject * @@ -5142,15 +5135,31 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con, PyObject* const* args, size_t argcount, PyObject *kwnames) { + PyObject **localsarray; + PyCodeObject *code = (PyCodeObject *)con->fc_code; + int is_coro = code->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR); + if (is_coro) { + localsarray = NULL; + } + else { + localsarray = _PyThreadState_PushLocals(tstate, code->co_nlocalsplus + code->co_stacksize); + if (localsarray == NULL) { + return NULL; + } + } PyFrameObject *f = _PyEval_MakeFrameVector( - tstate, con, locals, args, argcount, kwnames); + tstate, con, locals, args, argcount, kwnames, localsarray); if (f == NULL) { + if (!is_coro) { + _PyThreadState_PopLocals(tstate, localsarray); + } return NULL; } - if (((PyCodeObject *)con->fc_code)->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { + if (is_coro) { return make_coro(con, f); } PyObject *retval = _PyEval_EvalFrame(tstate, f, 0); + assert(f->f_stackdepth == 0); /* decref'ing the frame can cause __del__ methods to get invoked, which can call back into Python. While we're done with the @@ -5160,12 +5169,22 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con, if (Py_REFCNT(f) > 1) { Py_DECREF(f); _PyObject_GC_TRACK(f); + if (_PyFrame_MakeCopyOfLocals(f)) { + Py_XDECREF(retval); + return NULL; + } } else { ++tstate->recursion_depth; Py_DECREF(f); --tstate->recursion_depth; } + assert (!is_coro); + + for (int i = 0; i < code->co_nlocalsplus; i++) { + Py_XDECREF(localsarray[i]); + } + _PyThreadState_PopLocals(tstate, localsarray); return retval; } diff --git a/Python/pystate.c b/Python/pystate.c index aeebd6f61c6d7f..adbcca2530110e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1969,6 +1969,31 @@ _Py_GetConfig(void) return _PyInterpreterState_GetConfig(tstate->interp); } +/* Dumbest possible (and very inefficient) implementation */ + +PyObject ** +_PyThreadState_PushLocals(PyThreadState *tstate, int size) +{ + (void)tstate; + PyObject **res = PyMem_Malloc(sizeof(PyObject **)*size); + if (res == NULL) { + PyErr_NoMemory(); + return NULL; + } + for (Py_ssize_t i=0; i < size; i++) { + res[i] = NULL; + } + return res; +} + +void +_PyThreadState_PopLocals(PyThreadState *tstate, PyObject **locals) +{ + (void)tstate; + PyMem_Free(locals); +} + + #ifdef __cplusplus } #endif From 19ac31a822ccd49bae196e04737baf94c2cd418d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 14 Apr 2021 10:25:00 +0100 Subject: [PATCH 05/21] Make per-thread data-stack a contiguous block of memory. --- Include/cpython/pystate.h | 4 ++++ Include/internal/pycore_pymem.h | 4 ++++ Objects/obmalloc.c | 14 +++++++++++- Python/ceval.c | 3 +++ Python/pystate.c | 40 +++++++++++++++++++++++++-------- 5 files changed, 55 insertions(+), 10 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index e3ccc543560849..8e27c51514a034 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -149,6 +149,10 @@ struct _ts { CFrame root_cframe; + PyObject **datastack_base; + PyObject **datastack_top; + PyObject **datastack_soft_limit; + PyObject **datastack_hard_limit; /* XXX signal handlers should also be here */ }; diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index e4e35c16ce8eda..d1fa158e3a803c 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -95,6 +95,10 @@ struct _PyTraceMalloc_Config { PyAPI_DATA(struct _PyTraceMalloc_Config) _Py_tracemalloc_config; +void *_PyObject_VirtualAlloc(size_t size); +void _PyObject_VirtualFree(void *, size_t size); + + #ifdef __cplusplus } #endif diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index c1c12797aba111..bd607161d55476 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -552,6 +552,18 @@ PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) *allocator = _PyObject_Arena; } +void * +_PyObject_VirtualAlloc(size_t size) +{ + return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size); +} + +void +_PyObject_VirtualFree(void *obj, size_t size) +{ + return _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size); +} + void PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) { @@ -3035,7 +3047,7 @@ _PyObject_DebugMallocStats(FILE *out) fputc('\n', out); - /* Account for what all of those arena bytes are being used for. */ + /* Account for what all of those arena bytes are being used for. */ total = printone(out, "# bytes in allocated blocks", allocated_bytes); total += printone(out, "# bytes in available blocks", available_bytes); diff --git a/Python/ceval.c b/Python/ceval.c index 29c3b7e0b06b51..571bb41c4ef8a2 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -5171,6 +5171,9 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con, _PyObject_GC_TRACK(f); if (_PyFrame_MakeCopyOfLocals(f)) { Py_XDECREF(retval); + if (!is_coro) { + _PyThreadState_PopLocals(tstate, localsarray); + } return NULL; } } diff --git a/Python/pystate.c b/Python/pystate.c index adbcca2530110e..44894207f49b8b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -607,6 +607,9 @@ PyInterpreterState_GetDict(PyInterpreterState *interp) return interp->dict; } +#define DATA_STACK_SIZE (62*1024) +#define DATA_STACK_HEADROOM (2*1024) + static PyThreadState * new_threadstate(PyInterpreterState *interp, int init) { @@ -658,6 +661,15 @@ new_threadstate(PyInterpreterState *interp, int init) tstate->context = NULL; tstate->context_ver = 1; + size_t total_size = (DATA_STACK_SIZE+DATA_STACK_HEADROOM); + tstate->datastack_base = _PyObject_VirtualAlloc(sizeof(PyObject *)*total_size); + if (tstate->datastack_base == NULL) { + PyMem_RawFree(tstate); + return NULL; + } + tstate->datastack_top = tstate->datastack_base; + tstate->datastack_hard_limit = tstate->datastack_base + total_size; + tstate->datastack_soft_limit = tstate->datastack_base + DATA_STACK_SIZE; if (init) { _PyThreadState_Init(tstate); @@ -906,7 +918,6 @@ tstate_delete_common(PyThreadState *tstate, } } - static void _PyThreadState_Delete(PyThreadState *tstate, int check_current) { @@ -917,6 +928,7 @@ _PyThreadState_Delete(PyThreadState *tstate, int check_current) } } tstate_delete_common(tstate, gilstate); + _PyObject_VirtualFree(tstate->datastack_base, sizeof(PyObject *)*DATA_STACK_SIZE); PyMem_RawFree(tstate); } @@ -1969,17 +1981,27 @@ _Py_GetConfig(void) return _PyInterpreterState_GetConfig(tstate->interp); } -/* Dumbest possible (and very inefficient) implementation */ - PyObject ** _PyThreadState_PushLocals(PyThreadState *tstate, int size) { - (void)tstate; - PyObject **res = PyMem_Malloc(sizeof(PyObject **)*size); - if (res == NULL) { - PyErr_NoMemory(); + PyObject **res = tstate->datastack_top; + PyObject **top = res + size; + if (top >= tstate->datastack_soft_limit) { + if (top >= tstate->datastack_hard_limit) { + if (tstate->recursion_headroom) { + Py_FatalError("Cannot recover from data-stack overflow."); + } + else { + Py_FatalError("Rapid data-stack overflow."); + } + } + tstate->recursion_headroom++; + _PyErr_Format(tstate, PyExc_RecursionError, + "data stack overflow"); + tstate->recursion_headroom--; return NULL; } + tstate->datastack_top = top; for (Py_ssize_t i=0; i < size; i++) { res[i] = NULL; } @@ -1989,8 +2011,8 @@ _PyThreadState_PushLocals(PyThreadState *tstate, int size) void _PyThreadState_PopLocals(PyThreadState *tstate, PyObject **locals) { - (void)tstate; - PyMem_Free(locals); + assert(tstate->datastack_top >= locals); + tstate->datastack_top = locals; } From 2d1a9db1c1da252db34da0d7115c7efbcaf5839d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 14 Apr 2021 16:17:27 +0100 Subject: [PATCH 06/21] Add comments about data stack sizes. --- Python/pystate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/pystate.c b/Python/pystate.c index 44894207f49b8b..3e2263d2bc69bf 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -607,7 +607,11 @@ PyInterpreterState_GetDict(PyInterpreterState *interp) return interp->dict; } +/* Size of data stack + * Experimentally this can be set as low as 12k and have all the tests + * pass (64bit linux). */ #define DATA_STACK_SIZE (62*1024) +/* Additional stack space for error recovery */ #define DATA_STACK_HEADROOM (2*1024) static PyThreadState * From 73c49d5aa53c8ebe0b23e3dafe3f0ad91b0a8dcf Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 4 May 2021 14:20:07 +0100 Subject: [PATCH 07/21] Use chunked stack, allows larger stack when needed with reduced memory use most of the time. --- Include/cpython/pystate.h | 11 ++-- Include/internal/pycore_pystate.h | 2 +- Python/pystate.c | 88 ++++++++++++++++++++----------- 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 8e27c51514a034..63ba60074d56c7 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -57,6 +57,12 @@ typedef struct _err_stackitem { } _PyErr_StackItem; +typedef struct _stack_chunk { + struct _stack_chunk *previous; + size_t size; + size_t top; + PyObject * data[1]; /* Variable sized */ +} _PyStackChunk; // The PyThreadState typedef is in Include/pystate.h. struct _ts { @@ -149,10 +155,9 @@ struct _ts { CFrame root_cframe; - PyObject **datastack_base; + _PyStackChunk *datastack_chunk; PyObject **datastack_top; - PyObject **datastack_soft_limit; - PyObject **datastack_hard_limit; + PyObject **datastack_limit; /* XXX signal handlers should also be here */ }; diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 6601ce2f80bf7e..58e24774e0a3e9 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -147,7 +147,7 @@ PyAPI_FUNC(int) _PyState_AddModule( PyAPI_FUNC(int) _PyOS_InterruptOccurred(PyThreadState *tstate); -PyObject **_PyThreadState_PushLocals(PyThreadState *, int size); +PyObject **_PyThreadState_PushLocals(PyThreadState *, size_t size); void _PyThreadState_PopLocals(PyThreadState *, PyObject **); #ifdef __cplusplus diff --git a/Python/pystate.c b/Python/pystate.c index 3e2263d2bc69bf..4f681fddbaa1f7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -607,12 +607,22 @@ PyInterpreterState_GetDict(PyInterpreterState *interp) return interp->dict; } -/* Size of data stack - * Experimentally this can be set as low as 12k and have all the tests - * pass (64bit linux). */ -#define DATA_STACK_SIZE (62*1024) -/* Additional stack space for error recovery */ -#define DATA_STACK_HEADROOM (2*1024) +/* Minimum size of data stack chunk */ +#define DATA_STACK_CHUNK_SIZE (16*1024) + +static _PyStackChunk* +allocate_chunk(int size_in_bytes, _PyStackChunk* previous) +{ + assert(size_in_bytes % sizeof(PyObject **) == 0); + _PyStackChunk *res = _PyObject_VirtualAlloc(size_in_bytes); + if (res == NULL) { + return NULL; + } + res->previous = previous; + res->size = size_in_bytes; + res->top = 0; + return res; +} static PyThreadState * new_threadstate(PyInterpreterState *interp, int init) @@ -665,15 +675,14 @@ new_threadstate(PyInterpreterState *interp, int init) tstate->context = NULL; tstate->context_ver = 1; - size_t total_size = (DATA_STACK_SIZE+DATA_STACK_HEADROOM); - tstate->datastack_base = _PyObject_VirtualAlloc(sizeof(PyObject *)*total_size); - if (tstate->datastack_base == NULL) { + tstate->datastack_chunk = allocate_chunk(DATA_STACK_CHUNK_SIZE, NULL); + if (tstate->datastack_chunk == NULL) { PyMem_RawFree(tstate); return NULL; } - tstate->datastack_top = tstate->datastack_base; - tstate->datastack_hard_limit = tstate->datastack_base + total_size; - tstate->datastack_soft_limit = tstate->datastack_base + DATA_STACK_SIZE; + /* If top points to entry 0, then _PyThreadState_PopLocals willl try to pop this chunk */ + tstate->datastack_top = &tstate->datastack_chunk->data[1]; + tstate->datastack_limit = (PyObject **)(((char *)tstate->datastack_chunk) + DATA_STACK_CHUNK_SIZE); if (init) { _PyThreadState_Init(tstate); @@ -932,7 +941,7 @@ _PyThreadState_Delete(PyThreadState *tstate, int check_current) } } tstate_delete_common(tstate, gilstate); - _PyObject_VirtualFree(tstate->datastack_base, sizeof(PyObject *)*DATA_STACK_SIZE); + _PyObject_VirtualFree(tstate->datastack_chunk, tstate->datastack_chunk->size); PyMem_RawFree(tstate); } @@ -1985,28 +1994,34 @@ _Py_GetConfig(void) return _PyInterpreterState_GetConfig(tstate->interp); } +#define MINIMUM_OVERHEAD 1000 + PyObject ** -_PyThreadState_PushLocals(PyThreadState *tstate, int size) +_PyThreadState_PushLocals(PyThreadState *tstate, size_t size) { PyObject **res = tstate->datastack_top; PyObject **top = res + size; - if (top >= tstate->datastack_soft_limit) { - if (top >= tstate->datastack_hard_limit) { - if (tstate->recursion_headroom) { - Py_FatalError("Cannot recover from data-stack overflow."); - } - else { - Py_FatalError("Rapid data-stack overflow."); - } + if (top >= tstate->datastack_limit) { + size_t allocate_size = DATA_STACK_CHUNK_SIZE; + while (allocate_size < sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { + allocate_size *= 2; } - tstate->recursion_headroom++; - _PyErr_Format(tstate, PyExc_RecursionError, - "data stack overflow"); - tstate->recursion_headroom--; - return NULL; + _PyStackChunk *new = allocate_chunk(allocate_size, tstate->datastack_chunk); + if (new == NULL) { + _PyErr_SetString(tstate, PyExc_MemoryError, "Out of memory"); + return NULL; + } + printf("Pushing chunk\n"); + tstate->datastack_chunk->top = tstate->datastack_top - &tstate->datastack_chunk->data[0]; + tstate->datastack_chunk = new; + tstate->datastack_limit = (PyObject **)(((char *)new) + allocate_size); + res = &new->data[0]; + tstate->datastack_top = res + size; + } + else { + tstate->datastack_top = top; } - tstate->datastack_top = top; - for (Py_ssize_t i=0; i < size; i++) { + for (size_t i=0; i < size; i++) { res[i] = NULL; } return res; @@ -2015,8 +2030,19 @@ _PyThreadState_PushLocals(PyThreadState *tstate, int size) void _PyThreadState_PopLocals(PyThreadState *tstate, PyObject **locals) { - assert(tstate->datastack_top >= locals); - tstate->datastack_top = locals; + if (locals == &tstate->datastack_chunk->data[0]) { + printf("Popping chunk\n"); + _PyStackChunk *chunk = tstate->datastack_chunk; + _PyStackChunk *previous = chunk->previous; + tstate->datastack_top = &previous->data[previous->top]; + tstate->datastack_chunk = previous; + _PyObject_VirtualFree(chunk, chunk->size); + tstate->datastack_limit = (PyObject **)(((char *)tstate->datastack_chunk) + DATA_STACK_CHUNK_SIZE); + } + else { + assert(tstate->datastack_top >= locals); + tstate->datastack_top = locals; + } } From 8a9ae0156310962abb630a339e53083311404cd0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 4 May 2021 14:37:37 +0100 Subject: [PATCH 08/21] Delete obsolete comment and debug print statements --- Objects/frameobject.c | 15 +-------------- Python/pystate.c | 2 -- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 0eda8fd4240f2a..236314ddc80983 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -564,21 +564,8 @@ static PyGetSetDef frame_getsetlist[] = { frame is on the free list, only the following members have a meaning: ob_type == &Frametype f_back next item on free list, or NULL - f_stacksize size of value stack - ob_size size of localsplus - Note that the value and block stacks are preserved -- this can save - another malloc() call or two (and two free() calls as well!). - Also note that, unlike for integers, each frame object is a - malloc'ed object in its own right -- it is only the actual calls to - malloc() that we are trying to save here, not the administration. - After all, while a typical program may make millions of calls, a - call depth of more than 20 or 30 is probably already exceptional - unless the program contains run-away recursion. I hope. - - Later, PyFrame_MAXFREELIST was added to bound the # of frames saved on - free_list. Else programs creating lots of cyclic trash involving - frames could provoke free_list into growing without bound. */ + /* max value for numfree */ #define PyFrame_MAXFREELIST 200 diff --git a/Python/pystate.c b/Python/pystate.c index 4f681fddbaa1f7..5e83703f6105f8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2011,7 +2011,6 @@ _PyThreadState_PushLocals(PyThreadState *tstate, size_t size) _PyErr_SetString(tstate, PyExc_MemoryError, "Out of memory"); return NULL; } - printf("Pushing chunk\n"); tstate->datastack_chunk->top = tstate->datastack_top - &tstate->datastack_chunk->data[0]; tstate->datastack_chunk = new; tstate->datastack_limit = (PyObject **)(((char *)new) + allocate_size); @@ -2031,7 +2030,6 @@ void _PyThreadState_PopLocals(PyThreadState *tstate, PyObject **locals) { if (locals == &tstate->datastack_chunk->data[0]) { - printf("Popping chunk\n"); _PyStackChunk *chunk = tstate->datastack_chunk; _PyStackChunk *previous = chunk->previous; tstate->datastack_top = &previous->data[previous->top]; From 5a4803c2cdbc9b043d5d7789b7842cc418ecf3d7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 5 May 2021 14:37:38 +0100 Subject: [PATCH 09/21] Move globals and builtins from frame object to per-thread stack. --- Include/cpython/frameobject.h | 13 +++-- Include/genobject.h | 2 +- Lib/test/test_sys.py | 2 +- Objects/frameobject.c | 93 ++++++++++++++++++++++++++--------- Objects/genobject.c | 12 ++--- Python/_warnings.c | 2 +- Python/ceval.c | 86 ++++++++++++++++---------------- Python/suggestions.c | 7 +-- Tools/gdb/libpython.py | 18 ++++++- 9 files changed, 152 insertions(+), 83 deletions(-) diff --git a/Include/cpython/frameobject.h b/Include/cpython/frameobject.h index 16257450a39b65..69e57e23c99d58 100644 --- a/Include/cpython/frameobject.h +++ b/Include/cpython/frameobject.h @@ -19,12 +19,16 @@ enum _framestate { typedef signed char PyFrameState; +enum { + FRAME_SPECIALS_GLOBALS_OFFSET = 0, + FRAME_SPECIALS_BUILTINS_OFFSET = 1, + FRAME_SPECIALS_SIZE = 2 +}; + struct _frame { PyObject_HEAD struct _frame *f_back; /* previous frame, or NULL */ PyCodeObject *f_code; /* code segment */ - PyObject *f_builtins; /* builtin symbol table (PyDictObject) */ - PyObject *f_globals; /* global symbol table (PyDictObject) */ PyObject *f_locals; /* local symbol table (any mapping) */ PyObject **f_valuestack; /* points after the last local */ PyObject *f_trace; /* Trace function */ @@ -79,4 +83,7 @@ PyAPI_FUNC(void) _PyFrame_DebugMallocStats(FILE *out); PyAPI_FUNC(PyFrameObject *) PyFrame_GetBack(PyFrameObject *frame); -int _PyFrame_MakeCopyOfLocals(PyFrameObject *f); +int _PyFrame_StealLocals(PyFrameObject *f); + +PyObject *_PyFrame_GetGlobals(PyFrameObject *f); +PyObject *_PyFrame_GetBuiltins(PyFrameObject *f); diff --git a/Include/genobject.h b/Include/genobject.h index e965334a0140c8..094d4e14fbe7cf 100644 --- a/Include/genobject.h +++ b/Include/genobject.h @@ -18,7 +18,7 @@ extern "C" { /* Note: gi_frame can be NULL if the generator is "finished" */ \ PyFrameObject *prefix##_frame; \ /* The code object backing the generator */ \ - PyObject *prefix##_code; \ + PyCodeObject *prefix##_code; \ /* List of weak reference. */ \ PyObject *prefix##_weakreflist; \ /* Name of the generator. */ \ diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index e497c52e13666d..5a9d4c8f0b5f00 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1274,7 +1274,7 @@ class C(object): pass # frame import inspect x = inspect.currentframe() - check(x, size('8P3i4cP')) + check(x, size('6P3i4cP')) # function def func(): pass check(func, size('14P')) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 236314ddc80983..a0a453091e1808 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -13,9 +13,6 @@ static PyMemberDef frame_memberlist[] = { {"f_back", T_OBJECT, OFF(f_back), READONLY}, - {"f_code", T_OBJECT, OFF(f_code), READONLY|PY_AUDIT_READ}, - {"f_builtins", T_OBJECT, OFF(f_builtins), READONLY}, - {"f_globals", T_OBJECT, OFF(f_globals), READONLY}, {"f_trace_lines", T_BOOL, OFF(f_trace_lines), 0}, {"f_trace_opcodes", T_BOOL, OFF(f_trace_opcodes), 0}, {NULL} /* Sentinel */ @@ -38,6 +35,11 @@ frame_getlocals(PyFrameObject *f, void *closure) return f->f_locals; } +static inline PyObject ** +_PyFrame_Specials(PyFrameObject *f) { + return &f->f_valuestack[-FRAME_SPECIALS_SIZE]; +} + int PyFrame_GetLineNumber(PyFrameObject *f) { @@ -71,6 +73,50 @@ frame_getlasti(PyFrameObject *f, void *closure) return PyLong_FromLong(f->f_lasti*2); } +/* Returns a *borrowed* reference. Not part of the API. */ +PyObject * +_PyFrame_GetGlobals(PyFrameObject *f) +{ + return _PyFrame_Specials(f)[FRAME_SPECIALS_GLOBALS_OFFSET]; +} + +/* Returns a *borrowed* reference. Not part of the API. */ +PyObject * +_PyFrame_GetBuiltins(PyFrameObject *f) +{ + return _PyFrame_Specials(f)[FRAME_SPECIALS_BUILTINS_OFFSET]; +} + +static PyObject * +frame_getglobals(PyFrameObject *f, void *closure) +{ + PyObject *globals = _PyFrame_GetGlobals(f); + if (globals == NULL) { + globals = Py_None; + } + Py_INCREF(globals); + return globals; +} + +static PyObject * +frame_getbuiltins(PyFrameObject *f, void *closure) +{ + PyObject *builtins = _PyFrame_GetBuiltins(f); + if (builtins == NULL) { + builtins = Py_None; + } + Py_INCREF(builtins); + return builtins; +} + +static PyObject * +frame_getcode(PyFrameObject *f, void *closure) +{ + if (PySys_Audit("object.__getattr__", "Os", f, "f_code") < 0) { + return NULL; + } + return (PyObject *)PyFrame_GetCode(f); +} /* Given the index of the effective opcode, scan back to construct the oparg with EXTENDED_ARG */ @@ -554,6 +600,9 @@ static PyGetSetDef frame_getsetlist[] = { (setter)frame_setlineno, NULL}, {"f_trace", (getter)frame_gettrace, (setter)frame_settrace, NULL}, {"f_lasti", (getter)frame_getlasti, NULL, NULL}, + {"f_globals", (getter)frame_getglobals, NULL, NULL}, + {"f_builtins", (getter)frame_getbuiltins, NULL, NULL}, + {"f_code", (getter)frame_getcode, NULL, NULL}, {0} }; @@ -581,7 +630,7 @@ frame_dealloc(PyFrameObject *f) /* Kill all local variables */ if (f->f_own_locals_memory) { - for (int i = 0; i < co->co_nlocalsplus; i++) { + for (int i = 0; i < co->co_nlocalsplus+FRAME_SPECIALS_SIZE; i++) { Py_CLEAR(f->f_localsptr[i]); } /* Free items on stack */ @@ -593,8 +642,6 @@ frame_dealloc(PyFrameObject *f) } f->f_stackdepth = 0; Py_XDECREF(f->f_back); - Py_DECREF(f->f_builtins); - Py_DECREF(f->f_globals); Py_CLEAR(f->f_locals); Py_CLEAR(f->f_trace); struct _Py_frame_state *state = get_frame_state(); @@ -618,17 +665,13 @@ frame_dealloc(PyFrameObject *f) static inline Py_ssize_t frame_nslots(PyFrameObject *frame) { - PyCodeObject *code = frame->f_code; - return code->co_nlocalsplus; + return frame->f_valuestack - frame->f_localsptr; } static int frame_traverse(PyFrameObject *f, visitproc visit, void *arg) { Py_VISIT(f->f_back); - Py_VISIT(f->f_code); - Py_VISIT(f->f_builtins); - Py_VISIT(f->f_globals); Py_VISIT(f->f_locals); Py_VISIT(f->f_trace); @@ -766,7 +809,8 @@ frame_alloc(PyCodeObject *code, PyObject **localsarray) int owns; PyFrameObject *f; if (localsarray == NULL) { - localsarray = PyMem_Malloc(sizeof(PyObject *)*(code->co_nlocalsplus+code->co_stacksize)); + int size = code->co_nlocalsplus+code->co_stacksize + FRAME_SPECIALS_SIZE; + localsarray = PyMem_Malloc(sizeof(PyObject *)*size); if (localsarray == NULL) { PyErr_NoMemory(); return NULL; @@ -803,29 +847,32 @@ frame_alloc(PyCodeObject *code, PyObject **localsarray) } f->f_localsptr = localsarray; f->f_own_locals_memory = owns; - f->f_valuestack = f->f_localsptr + code->co_nlocalsplus; + f->f_valuestack = f->f_localsptr + code->co_nlocalsplus + FRAME_SPECIALS_SIZE; return f; } int -_PyFrame_MakeCopyOfLocals(PyFrameObject *f) +_PyFrame_StealLocals(PyFrameObject *f) { - if (f->f_own_locals_memory) { - return 0; - } - PyObject **copy = PyMem_Malloc(sizeof(PyObject *)*(f->f_code->co_nlocalsplus+f->f_code->co_stacksize)); + assert(f->f_own_locals_memory == 0); + assert(f->f_stackdepth == 0); + int size = frame_nslots(f); + PyObject **copy = PyMem_Malloc(sizeof(PyObject *)*size); if (copy == NULL) { + for (int i = 0; i < size; i++) { + PyObject *o = f->f_localsptr[i]; + Py_XDECREF(o); + } PyErr_NoMemory(); return -1; } - for (int i = 0; i < f->f_code->co_nlocalsplus+f->f_stackdepth; i++) { + for (int i = 0; i < size; i++) { PyObject *o = f->f_localsptr[i]; - Py_XINCREF(o); copy[i] = o; } f->f_own_locals_memory = 1; f->f_localsptr = copy; - f->f_valuestack = f->f_localsptr + f->f_code->co_nlocalsplus; + f->f_valuestack = copy + size; return 0; } @@ -845,8 +892,8 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *l f->f_back = (PyFrameObject*)Py_XNewRef(tstate->frame); f->f_code = (PyCodeObject *)Py_NewRef(con->fc_code); - f->f_builtins = Py_NewRef(con->fc_builtins); - f->f_globals = Py_NewRef(con->fc_globals); + _PyFrame_Specials(f)[FRAME_SPECIALS_BUILTINS_OFFSET] = Py_NewRef(con->fc_builtins); + _PyFrame_Specials(f)[FRAME_SPECIALS_GLOBALS_OFFSET] = Py_NewRef(con->fc_globals); f->f_locals = Py_XNewRef(locals); // f_valuestack initialized by frame_alloc() f->f_trace = NULL; diff --git a/Objects/genobject.c b/Objects/genobject.c index 1889df1d137786..db00d19a3464e0 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -176,7 +176,7 @@ gen_send_ex2(PyGenObject *gen, PyObject *arg, PyObject **presult, } assert(_PyFrame_IsRunnable(f)); - assert(f->f_lasti >= 0 || ((unsigned char *)PyBytes_AS_STRING(f->f_code->co_code))[0] == GEN_START); + assert(f->f_lasti >= 0 || ((unsigned char *)PyBytes_AS_STRING(gen->gi_code->co_code))[0] == GEN_START); /* Push arg onto the frame's value stack */ result = arg ? arg : Py_None; Py_INCREF(result); @@ -331,7 +331,7 @@ _PyGen_yf(PyGenObject *gen) PyFrameObject *f = gen->gi_frame; if (f) { - PyObject *bytecode = f->f_code->co_code; + PyObject *bytecode = gen->gi_code->co_code; unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode); if (f->f_lasti < 0) { @@ -826,8 +826,7 @@ gen_new_with_qualname(PyTypeObject *type, PyFrameObject *f, } gen->gi_frame = f; f->f_gen = (PyObject *) gen; - Py_INCREF(f->f_code); - gen->gi_code = (PyObject *)(f->f_code); + gen->gi_code = PyFrame_GetCode(f); gen->gi_weakreflist = NULL; gen->gi_exc_state.exc_type = NULL; gen->gi_exc_state.exc_value = NULL; @@ -836,7 +835,7 @@ gen_new_with_qualname(PyTypeObject *type, PyFrameObject *f, if (name != NULL) gen->gi_name = name; else - gen->gi_name = ((PyCodeObject *)gen->gi_code)->co_name; + gen->gi_name = gen->gi_code->co_name; Py_INCREF(gen->gi_name); if (qualname != NULL) gen->gi_qualname = qualname; @@ -1167,11 +1166,12 @@ compute_cr_origin(int origin_depth) } frame = PyEval_GetFrame(); for (int i = 0; i < frame_count; ++i) { - PyCodeObject *code = frame->f_code; + PyCodeObject *code = PyFrame_GetCode(frame); PyObject *frameinfo = Py_BuildValue("OiO", code->co_filename, PyFrame_GetLineNumber(frame), code->co_name); + Py_DECREF(code); if (!frameinfo) { Py_DECREF(cr_origin); return NULL; diff --git a/Python/_warnings.c b/Python/_warnings.c index 2c9a2a76872676..4d8db730b473cf 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -853,7 +853,7 @@ setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno, *lineno = 1; } else { - globals = f->f_globals; + globals = _PyFrame_GetGlobals(f); PyCodeObject *code = PyFrame_GetCode(f); *filename = code->co_filename; Py_DECREF(code); diff --git a/Python/ceval.c b/Python/ceval.c index 571bb41c4ef8a2..09748e4bf0a3e0 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1547,6 +1547,8 @@ eval_frame_handle_pending(PyThreadState *tstate) #endif +#define GLOBALS() specials[FRAME_SPECIALS_GLOBALS_OFFSET] +#define BUILTINS() specials[FRAME_SPECIALS_BUILTINS_OFFSET] PyObject* _Py_HOT_FUNCTION _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) @@ -1565,7 +1567,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) const _Py_CODEUNIT *next_instr; int opcode; /* Current opcode */ int oparg; /* Current opcode argument, if any */ - PyObject **fastlocals, **freevars; + PyObject **fastlocals, **freevars, **specials; PyObject *retval = NULL; /* Return value */ _Py_atomic_int * const eval_breaker = &tstate->interp->ceval.eval_breaker; PyCodeObject *co; @@ -1598,6 +1600,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* push frame */ tstate->frame = f; + specials = f->f_valuestack - FRAME_SPECIALS_SIZE; co = f->f_code; if (trace_info.cframe.use_tracing) { @@ -1692,7 +1695,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) #ifdef LLTRACE { - int r = _PyDict_ContainsId(f->f_globals, &PyId___ltrace__); + int r = _PyDict_ContainsId(GLOBALS(), &PyId___ltrace__); if (r < 0) { goto exit_eval_frame; } @@ -2726,8 +2729,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) _Py_IDENTIFIER(__build_class__); PyObject *bc; - if (PyDict_CheckExact(f->f_builtins)) { - bc = _PyDict_GetItemIdWithError(f->f_builtins, &PyId___build_class__); + if (PyDict_CheckExact(BUILTINS())) { + bc = _PyDict_GetItemIdWithError(BUILTINS(), &PyId___build_class__); if (bc == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_SetString(tstate, PyExc_NameError, @@ -2741,7 +2744,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject *build_class_str = _PyUnicode_FromId(&PyId___build_class__); if (build_class_str == NULL) goto error; - bc = PyObject_GetItem(f->f_builtins, build_class_str); + bc = PyObject_GetItem(BUILTINS(), build_class_str); if (bc == NULL) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) _PyErr_SetString(tstate, PyExc_NameError, @@ -2868,7 +2871,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject *name = GETITEM(names, oparg); PyObject *v = POP(); int err; - err = PyDict_SetItem(f->f_globals, name, v); + err = PyDict_SetItem(GLOBALS(), name, v); Py_DECREF(v); if (err != 0) goto error; @@ -2878,7 +2881,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(DELETE_GLOBAL): { PyObject *name = GETITEM(names, oparg); int err; - err = PyDict_DelItem(f->f_globals, name); + err = PyDict_DelItem(GLOBALS(), name); if (err != 0) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { format_exc_check_arg(tstate, PyExc_NameError, @@ -2916,7 +2919,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } } if (v == NULL) { - v = PyDict_GetItemWithError(f->f_globals, name); + v = PyDict_GetItemWithError(GLOBALS(), name); if (v != NULL) { Py_INCREF(v); } @@ -2924,8 +2927,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) goto error; } else { - if (PyDict_CheckExact(f->f_builtins)) { - v = PyDict_GetItemWithError(f->f_builtins, name); + if (PyDict_CheckExact(BUILTINS())) { + v = PyDict_GetItemWithError(BUILTINS(), name); if (v == NULL) { if (!_PyErr_Occurred(tstate)) { format_exc_check_arg( @@ -2937,7 +2940,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) Py_INCREF(v); } else { - v = PyObject_GetItem(f->f_builtins, name); + v = PyObject_GetItem(BUILTINS(), name); if (v == NULL) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { format_exc_check_arg( @@ -2956,17 +2959,17 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(LOAD_GLOBAL): { PyObject *name; PyObject *v; - if (PyDict_CheckExact(f->f_globals) - && PyDict_CheckExact(f->f_builtins)) + if (PyDict_CheckExact(GLOBALS()) + && PyDict_CheckExact(BUILTINS())) { OPCACHE_CHECK(); if (co_opcache != NULL && co_opcache->optimized > 0) { _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg; if (lg->globals_ver == - ((PyDictObject *)f->f_globals)->ma_version_tag + ((PyDictObject *)GLOBALS())->ma_version_tag && lg->builtins_ver == - ((PyDictObject *)f->f_builtins)->ma_version_tag) + ((PyDictObject *)BUILTINS())->ma_version_tag) { PyObject *ptr = lg->ptr; OPCACHE_STAT_GLOBAL_HIT(); @@ -2978,8 +2981,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } name = GETITEM(names, oparg); - v = _PyDict_LoadGlobal((PyDictObject *)f->f_globals, - (PyDictObject *)f->f_builtins, + v = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(), + (PyDictObject *)BUILTINS(), name); if (v == NULL) { if (!_PyErr_Occurred(tstate)) { @@ -3003,9 +3006,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) co_opcache->optimized = 1; lg->globals_ver = - ((PyDictObject *)f->f_globals)->ma_version_tag; + ((PyDictObject *)GLOBALS())->ma_version_tag; lg->builtins_ver = - ((PyDictObject *)f->f_builtins)->ma_version_tag; + ((PyDictObject *)BUILTINS())->ma_version_tag; lg->ptr = v; /* borrowed */ } @@ -3016,7 +3019,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) /* namespace 1: globals */ name = GETITEM(names, oparg); - v = PyObject_GetItem(f->f_globals, name); + v = PyObject_GetItem(GLOBALS(), name); if (v == NULL) { if (!_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { goto error; @@ -3024,7 +3027,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) _PyErr_Clear(tstate); /* namespace 2: builtins */ - v = PyObject_GetItem(f->f_builtins, name); + v = PyObject_GetItem(BUILTINS(), name); if (v == NULL) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { format_exc_check_arg( @@ -4297,7 +4300,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject *qualname = POP(); PyObject *codeobj = POP(); PyFunctionObject *func = (PyFunctionObject *) - PyFunction_NewWithQualName(codeobj, f->f_globals, qualname); + PyFunction_NewWithQualName(codeobj, GLOBALS(), qualname); Py_DECREF(codeobj); Py_DECREF(qualname); @@ -5137,12 +5140,15 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con, { PyObject **localsarray; PyCodeObject *code = (PyCodeObject *)con->fc_code; - int is_coro = code->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR); + int is_coro = code->co_flags & + (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR); if (is_coro) { localsarray = NULL; } else { - localsarray = _PyThreadState_PushLocals(tstate, code->co_nlocalsplus + code->co_stacksize); + int size = code->co_nlocalsplus + code->co_stacksize + + FRAME_SPECIALS_SIZE; + localsarray = _PyThreadState_PushLocals(tstate, size); if (localsarray == NULL) { return NULL; } @@ -5166,27 +5172,24 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con, current Python frame (f), the associated C stack is still in use, so recursion_depth must be boosted for the duration. */ + assert (!is_coro); + assert(f->f_own_locals_memory == 0); + assert(f->f_stackdepth == 0); if (Py_REFCNT(f) > 1) { Py_DECREF(f); _PyObject_GC_TRACK(f); - if (_PyFrame_MakeCopyOfLocals(f)) { - Py_XDECREF(retval); - if (!is_coro) { - _PyThreadState_PopLocals(tstate, localsarray); - } - return NULL; + if (_PyFrame_StealLocals(f)) { + Py_CLEAR(retval); } } else { + for (int i = 0; i < code->co_nlocalsplus + FRAME_SPECIALS_SIZE; i++) { + Py_XDECREF(localsarray[i]); + } ++tstate->recursion_depth; Py_DECREF(f); --tstate->recursion_depth; } - assert (!is_coro); - - for (int i = 0; i < code->co_nlocalsplus; i++) { - Py_XDECREF(localsarray[i]); - } _PyThreadState_PopLocals(tstate, localsarray); return retval; } @@ -5793,7 +5796,7 @@ _PyEval_GetBuiltins(PyThreadState *tstate) { PyFrameObject *frame = tstate->frame; if (frame != NULL) { - return frame->f_builtins; + return _PyFrame_GetBuiltins(frame); } return tstate->interp->builtins; } @@ -5846,9 +5849,7 @@ PyEval_GetGlobals(void) if (current_frame == NULL) { return NULL; } - - assert(current_frame->f_globals != NULL); - return current_frame->f_globals; + return _PyFrame_GetGlobals(current_frame); } int @@ -6099,14 +6100,13 @@ import_name(PyThreadState *tstate, PyFrameObject *f, PyObject *import_func, *res; PyObject* stack[5]; - import_func = _PyDict_GetItemIdWithError(f->f_builtins, &PyId___import__); + import_func = _PyDict_GetItemIdWithError(_PyFrame_GetBuiltins(f), &PyId___import__); if (import_func == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_SetString(tstate, PyExc_ImportError, "__import__ not found"); } return NULL; } - /* Fast path for not overloaded __import__. */ if (import_func == tstate->interp->import_func) { int ilevel = _PyLong_AsInt(level); @@ -6115,7 +6115,7 @@ import_name(PyThreadState *tstate, PyFrameObject *f, } res = PyImport_ImportModuleLevelObject( name, - f->f_globals, + _PyFrame_GetGlobals(f), f->f_locals == NULL ? Py_None : f->f_locals, fromlist, ilevel); @@ -6125,7 +6125,7 @@ import_name(PyThreadState *tstate, PyFrameObject *f, Py_INCREF(import_func); stack[0] = name; - stack[1] = f->f_globals; + stack[1] = _PyFrame_GetGlobals(f); stack[2] = f->f_locals == NULL ? Py_None : f->f_locals; stack[3] = fromlist; stack[4] = level; diff --git a/Python/suggestions.c b/Python/suggestions.c index 6fb01f10cd37c9..6a75e2bab847c4 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -208,9 +208,10 @@ offer_suggestions_for_name_error(PyNameErrorObject *exc) PyFrameObject *frame = traceback->tb_frame; assert(frame != NULL); - PyCodeObject *code = frame->f_code; + PyCodeObject *code = PyFrame_GetCode(frame); assert(code != NULL && code->co_varnames != NULL); PyObject *dir = PySequence_List(code->co_varnames); + Py_DECREF(code); if (dir == NULL) { return NULL; } @@ -221,7 +222,7 @@ offer_suggestions_for_name_error(PyNameErrorObject *exc) return suggestions; } - dir = PySequence_List(frame->f_globals); + dir = PySequence_List(_PyFrame_GetGlobals(frame)); if (dir == NULL) { return NULL; } @@ -231,7 +232,7 @@ offer_suggestions_for_name_error(PyNameErrorObject *exc) return suggestions; } - dir = PySequence_List(frame->f_builtins); + dir = PySequence_List(_PyFrame_GetBuiltins(frame)); if (dir == NULL) { return NULL; } diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index ae2c1799eb4423..b726b353b77ab8 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -854,6 +854,8 @@ class PyNoneStructPtr(PyObjectPtr): def proxyval(self, visited): return None +FRAME_SPECIALS_GLOBAL_OFFSET = 0 +FRAME_SPECIALS_BUILTINS_OFFSET = 1 class PyFrameObjectPtr(PyObjectPtr): _typename = 'PyFrameObject' @@ -886,6 +888,12 @@ def iter_locals(self): pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) yield (pyop_name, pyop_value) + def _f_globals(self): + f_localsplus = self.field('f_localsptr') + nlocalsplus = int_from_int(self.co.field('co_nlocalsplus')) + index = nlocalsplus + FRAME_SPECIALS_GLOBAL_OFFSET + return PyObjectPtr.from_pyobject_ptr(f_localsplus[index]) + def iter_globals(self): ''' Yield a sequence of (name,value) pairs of PyObjectPtr instances, for @@ -894,9 +902,15 @@ def iter_globals(self): if self.is_optimized_out(): return () - pyop_globals = self.pyop_field('f_globals') + pyop_globals = self._f_globals() return pyop_globals.iteritems() + def _f_builtins(self): + f_localsplus = self.field('f_localsptr') + nlocalsplus = int_from_int(self.co.field('co_nlocalsplus')) + index = nlocalsplus + FRAME_SPECIALS_BUILTINS_OFFSET + return PyObjectPtr.from_pyobject_ptr(f_localsplus[index]) + def iter_builtins(self): ''' Yield a sequence of (name,value) pairs of PyObjectPtr instances, for @@ -905,7 +919,7 @@ def iter_builtins(self): if self.is_optimized_out(): return () - pyop_builtins = self.pyop_field('f_builtins') + pyop_builtins = self._f_builtins() return pyop_builtins.iteritems() def get_var_by_name(self, name): From a5553931ca7904258d59f91232a8212449b0901f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 7 May 2021 11:40:33 +0100 Subject: [PATCH 10/21] Move (slow) locals frame object to per-thread stack. --- Include/cpython/frameobject.h | 4 ++-- Lib/test/test_sys.py | 2 +- Objects/frameobject.c | 27 ++++++++++++------------- Python/ceval.c | 38 ++++++++++++++++++++--------------- 4 files changed, 38 insertions(+), 33 deletions(-) diff --git a/Include/cpython/frameobject.h b/Include/cpython/frameobject.h index 69e57e23c99d58..4f2d407b8907ee 100644 --- a/Include/cpython/frameobject.h +++ b/Include/cpython/frameobject.h @@ -22,14 +22,14 @@ typedef signed char PyFrameState; enum { FRAME_SPECIALS_GLOBALS_OFFSET = 0, FRAME_SPECIALS_BUILTINS_OFFSET = 1, - FRAME_SPECIALS_SIZE = 2 + FRAME_SPECIALS_LOCALS_OFFSET = 2, + FRAME_SPECIALS_SIZE = 3 }; struct _frame { PyObject_HEAD struct _frame *f_back; /* previous frame, or NULL */ PyCodeObject *f_code; /* code segment */ - PyObject *f_locals; /* local symbol table (any mapping) */ PyObject **f_valuestack; /* points after the last local */ PyObject *f_trace; /* Trace function */ /* Borrowed reference to a generator, or NULL */ diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 5a9d4c8f0b5f00..6574c4f9b70273 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1274,7 +1274,7 @@ class C(object): pass # frame import inspect x = inspect.currentframe() - check(x, size('6P3i4cP')) + check(x, size('5P3i4cP')) # function def func(): pass check(func, size('14P')) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index a0a453091e1808..1f3e18a1b47c0a 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -25,19 +25,20 @@ get_frame_state(void) return &interp->frame; } +static inline PyObject ** +_PyFrame_Specials(PyFrameObject *f) { + return &f->f_valuestack[-FRAME_SPECIALS_SIZE]; +} + static PyObject * frame_getlocals(PyFrameObject *f, void *closure) { if (PyFrame_FastToLocalsWithError(f) < 0) return NULL; - Py_INCREF(f->f_locals); - return f->f_locals; -} - -static inline PyObject ** -_PyFrame_Specials(PyFrameObject *f) { - return &f->f_valuestack[-FRAME_SPECIALS_SIZE]; + PyObject *locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET]; + Py_INCREF(locals); + return locals; } int @@ -642,7 +643,6 @@ frame_dealloc(PyFrameObject *f) } f->f_stackdepth = 0; Py_XDECREF(f->f_back); - Py_CLEAR(f->f_locals); Py_CLEAR(f->f_trace); struct _Py_frame_state *state = get_frame_state(); #ifdef Py_DEBUG @@ -672,7 +672,6 @@ static int frame_traverse(PyFrameObject *f, visitproc visit, void *arg) { Py_VISIT(f->f_back); - Py_VISIT(f->f_locals); Py_VISIT(f->f_trace); /* locals */ @@ -894,7 +893,7 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *l f->f_code = (PyCodeObject *)Py_NewRef(con->fc_code); _PyFrame_Specials(f)[FRAME_SPECIALS_BUILTINS_OFFSET] = Py_NewRef(con->fc_builtins); _PyFrame_Specials(f)[FRAME_SPECIALS_GLOBALS_OFFSET] = Py_NewRef(con->fc_globals); - f->f_locals = Py_XNewRef(locals); + _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET] = Py_XNewRef(locals); // f_valuestack initialized by frame_alloc() f->f_trace = NULL; f->f_stackdepth = 0; @@ -1046,9 +1045,9 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f) PyErr_BadInternalCall(); return -1; } - locals = f->f_locals; + locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET]; if (locals == NULL) { - locals = f->f_locals = PyDict_New(); + locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET] = PyDict_New(); if (locals == NULL) return -1; } @@ -1107,7 +1106,7 @@ PyFrame_FastToLocals(PyFrameObject *f) void PyFrame_LocalsToFast(PyFrameObject *f, int clear) { - /* Merge f->f_locals into fast locals */ + /* Merge locals into fast locals */ PyObject *locals, *map; PyObject **fast; PyObject *error_type, *error_value, *error_traceback; @@ -1116,7 +1115,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear) Py_ssize_t ncells, nfreevars; if (f == NULL) return; - locals = f->f_locals; + locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET]; co = f->f_code; map = co->co_varnames; if (locals == NULL) diff --git a/Python/ceval.c b/Python/ceval.c index 09748e4bf0a3e0..171f868e539b5f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1549,6 +1549,7 @@ eval_frame_handle_pending(PyThreadState *tstate) #define GLOBALS() specials[FRAME_SPECIALS_GLOBALS_OFFSET] #define BUILTINS() specials[FRAME_SPECIALS_BUILTINS_OFFSET] +#define LOCALS() specials[FRAME_SPECIALS_LOCALS_OFFSET] PyObject* _Py_HOT_FUNCTION _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) @@ -2759,7 +2760,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(STORE_NAME): { PyObject *name = GETITEM(names, oparg); PyObject *v = POP(); - PyObject *ns = f->f_locals; + PyObject *ns = LOCALS(); int err; if (ns == NULL) { _PyErr_Format(tstate, PyExc_SystemError, @@ -2779,7 +2780,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(DELETE_NAME): { PyObject *name = GETITEM(names, oparg); - PyObject *ns = f->f_locals; + PyObject *ns = LOCALS(); int err; if (ns == NULL) { _PyErr_Format(tstate, PyExc_SystemError, @@ -2894,7 +2895,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(LOAD_NAME): { PyObject *name = GETITEM(names, oparg); - PyObject *locals = f->f_locals; + PyObject *locals = LOCALS(); PyObject *v; if (locals == NULL) { _PyErr_Format(tstate, PyExc_SystemError, @@ -3076,7 +3077,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } case TARGET(LOAD_CLASSDEREF): { - PyObject *name, *value, *locals = f->f_locals; + PyObject *name, *value, *locals = LOCALS(); Py_ssize_t idx; assert(locals); assert(oparg >= PyTuple_GET_SIZE(co->co_cellvars)); @@ -3269,14 +3270,14 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) _Py_IDENTIFIER(__annotations__); int err; PyObject *ann_dict; - if (f->f_locals == NULL) { + if (LOCALS() == NULL) { _PyErr_Format(tstate, PyExc_SystemError, "no locals found when setting up annotations"); goto error; } /* check if __annotations__ in locals()... */ - if (PyDict_CheckExact(f->f_locals)) { - ann_dict = _PyDict_GetItemIdWithError(f->f_locals, + if (PyDict_CheckExact(LOCALS())) { + ann_dict = _PyDict_GetItemIdWithError(LOCALS(), &PyId___annotations__); if (ann_dict == NULL) { if (_PyErr_Occurred(tstate)) { @@ -3287,7 +3288,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) if (ann_dict == NULL) { goto error; } - err = _PyDict_SetItemId(f->f_locals, + err = _PyDict_SetItemId(LOCALS(), &PyId___annotations__, ann_dict); Py_DECREF(ann_dict); if (err != 0) { @@ -3301,7 +3302,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) if (ann_str == NULL) { goto error; } - ann_dict = PyObject_GetItem(f->f_locals, ann_str); + ann_dict = PyObject_GetItem(LOCALS(), ann_str); if (ann_dict == NULL) { if (!_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { goto error; @@ -3311,7 +3312,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) if (ann_dict == NULL) { goto error; } - err = PyObject_SetItem(f->f_locals, ann_str, ann_dict); + err = PyObject_SetItem(LOCALS(), ann_str, ann_dict); Py_DECREF(ann_dict); if (err != 0) { goto error; @@ -3710,7 +3711,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) goto error; } - locals = f->f_locals; + locals = LOCALS(); if (locals == NULL) { _PyErr_SetString(tstate, PyExc_SystemError, "no locals found during 'import *'"); @@ -5837,8 +5838,10 @@ PyEval_GetLocals(void) return NULL; } - assert(current_frame->f_locals != NULL); - return current_frame->f_locals; + PyObject *locals = current_frame->f_valuestack[ + FRAME_SPECIALS_LOCALS_OFFSET-FRAME_SPECIALS_SIZE]; + assert(locals != NULL); + return locals; } PyObject * @@ -6107,6 +6110,8 @@ import_name(PyThreadState *tstate, PyFrameObject *f, } return NULL; } + PyObject *locals = f->f_valuestack[ + FRAME_SPECIALS_LOCALS_OFFSET-FRAME_SPECIALS_SIZE]; /* Fast path for not overloaded __import__. */ if (import_func == tstate->interp->import_func) { int ilevel = _PyLong_AsInt(level); @@ -6116,7 +6121,7 @@ import_name(PyThreadState *tstate, PyFrameObject *f, res = PyImport_ImportModuleLevelObject( name, _PyFrame_GetGlobals(f), - f->f_locals == NULL ? Py_None : f->f_locals, + locals == NULL ? Py_None :locals, fromlist, ilevel); return res; @@ -6126,7 +6131,7 @@ import_name(PyThreadState *tstate, PyFrameObject *f, stack[0] = name; stack[1] = _PyFrame_GetGlobals(f); - stack[2] = f->f_locals == NULL ? Py_None : f->f_locals; + stack[2] = locals == NULL ? Py_None : locals; stack[3] = fromlist; stack[4] = level; res = _PyObject_FastCall(import_func, stack, 5); @@ -6471,7 +6476,8 @@ unicode_concatenate(PyThreadState *tstate, PyObject *v, PyObject *w, { PyObject *names = f->f_code->co_names; PyObject *name = GETITEM(names, oparg); - PyObject *locals = f->f_locals; + PyObject *locals = f->f_valuestack[ + FRAME_SPECIALS_LOCALS_OFFSET-FRAME_SPECIALS_SIZE]; if (locals && PyDict_CheckExact(locals)) { PyObject *w = PyDict_GetItemWithError(locals, name); if ((w == v && PyDict_DelItem(locals, name) != 0) || From f238598336d137696cf832e2f896e5c00bd7bf9b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 12 May 2021 11:56:26 +0100 Subject: [PATCH 11/21] Add back comment. --- Python/ceval.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 171f868e539b5f..55928b6f7482a8 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4775,6 +4775,10 @@ positional_only_passed_as_keyword(PyThreadState *tstate, PyCodeObject *co, } +/* Exception table parsing code. + * See Objects/exception_table_notes.txt for details. + */ + static inline unsigned char * parse_varint(unsigned char *p, int *result) { int val = p[0] & 63; From 6cab045c1357f9e08eb3327a36d0ad9f7aed4bdd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 12 May 2021 16:07:43 +0100 Subject: [PATCH 12/21] Fix limit when popping block from data stack. --- Python/pystate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index 5e83703f6105f8..5e7fcb52fca85f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2035,7 +2035,7 @@ _PyThreadState_PopLocals(PyThreadState *tstate, PyObject **locals) tstate->datastack_top = &previous->data[previous->top]; tstate->datastack_chunk = previous; _PyObject_VirtualFree(chunk, chunk->size); - tstate->datastack_limit = (PyObject **)(((char *)tstate->datastack_chunk) + DATA_STACK_CHUNK_SIZE); + tstate->datastack_limit = (PyObject **)(((char *)previous) + previous->size); } else { assert(tstate->datastack_top >= locals); From 89f9496116d384b909177e0840d5542c11973a7f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 12 May 2021 16:26:45 +0100 Subject: [PATCH 13/21] Tidy up frame creation a bit. --- Objects/frameobject.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 1f3e18a1b47c0a..6b8d1607e422a4 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -846,7 +846,6 @@ frame_alloc(PyCodeObject *code, PyObject **localsarray) } f->f_localsptr = localsarray; f->f_own_locals_memory = owns; - f->f_valuestack = f->f_localsptr + code->co_nlocalsplus + FRAME_SPECIALS_SIZE; return f; } @@ -883,18 +882,20 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *l assert(con->fc_builtins != NULL); assert(con->fc_code != NULL); assert(locals == NULL || PyMapping_Check(locals)); + PyCodeObject *code = (PyCodeObject *)con->fc_code; - PyFrameObject *f = frame_alloc((PyCodeObject *)con->fc_code, localsarray); + PyFrameObject *f = frame_alloc(code, localsarray); if (f == NULL) { return NULL; } + PyObject **specials = f->f_localsptr + code->co_nlocalsplus; + f->f_valuestack = specials + FRAME_SPECIALS_SIZE; f->f_back = (PyFrameObject*)Py_XNewRef(tstate->frame); f->f_code = (PyCodeObject *)Py_NewRef(con->fc_code); - _PyFrame_Specials(f)[FRAME_SPECIALS_BUILTINS_OFFSET] = Py_NewRef(con->fc_builtins); - _PyFrame_Specials(f)[FRAME_SPECIALS_GLOBALS_OFFSET] = Py_NewRef(con->fc_globals); - _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET] = Py_XNewRef(locals); - // f_valuestack initialized by frame_alloc() + specials[FRAME_SPECIALS_BUILTINS_OFFSET] = Py_NewRef(con->fc_builtins); + specials[FRAME_SPECIALS_GLOBALS_OFFSET] = Py_NewRef(con->fc_globals); + specials[FRAME_SPECIALS_LOCALS_OFFSET] = Py_XNewRef(locals); f->f_trace = NULL; f->f_stackdepth = 0; f->f_trace_lines = 1; @@ -903,7 +904,6 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyFrameConstructor *con, PyObject *l f->f_lasti = -1; f->f_lineno = 0; f->f_state = FRAME_CREATED; - // f_blockstack and f_localsplus initialized by frame_alloc() return f; } From 0091341eec5eb439152d4c0281cee40eafb65042 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 12 May 2021 16:32:09 +0100 Subject: [PATCH 14/21] Improve function name --- Include/cpython/frameobject.h | 4 ++-- Objects/frameobject.c | 2 +- Python/ceval.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/cpython/frameobject.h b/Include/cpython/frameobject.h index 4f2d407b8907ee..700d3277a909ec 100644 --- a/Include/cpython/frameobject.h +++ b/Include/cpython/frameobject.h @@ -83,7 +83,7 @@ PyAPI_FUNC(void) _PyFrame_DebugMallocStats(FILE *out); PyAPI_FUNC(PyFrameObject *) PyFrame_GetBack(PyFrameObject *frame); -int _PyFrame_StealLocals(PyFrameObject *f); - +/** Internal -- Not to be used outside of the interpreter core */ +int _PyFrame_TakeLocals(PyFrameObject *f); PyObject *_PyFrame_GetGlobals(PyFrameObject *f); PyObject *_PyFrame_GetBuiltins(PyFrameObject *f); diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 6b8d1607e422a4..6558173979e88c 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -850,7 +850,7 @@ frame_alloc(PyCodeObject *code, PyObject **localsarray) } int -_PyFrame_StealLocals(PyFrameObject *f) +_PyFrame_TakeLocals(PyFrameObject *f) { assert(f->f_own_locals_memory == 0); assert(f->f_stackdepth == 0); diff --git a/Python/ceval.c b/Python/ceval.c index 55928b6f7482a8..7b59e35b6996dd 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -5183,7 +5183,7 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con, if (Py_REFCNT(f) > 1) { Py_DECREF(f); _PyObject_GC_TRACK(f); - if (_PyFrame_StealLocals(f)) { + if (_PyFrame_TakeLocals(f)) { Py_CLEAR(retval); } } From 2c149399eed80bd83563e1ea01f41e2ffeb0d627 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 13 May 2021 08:43:59 +0100 Subject: [PATCH 15/21] Make sure datastack memory is freed after fork. --- Objects/frameobject.c | 8 +++++--- Python/ceval.c | 4 ++-- Python/pystate.c | 8 +++++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 6558173979e88c..36ca8fe768ac32 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -630,7 +630,7 @@ frame_dealloc(PyFrameObject *f) PyCodeObject *co = f->f_code; /* Kill all local variables */ - if (f->f_own_locals_memory) { + if (f->f_localsptr) { for (int i = 0; i < co->co_nlocalsplus+FRAME_SPECIALS_SIZE; i++) { Py_CLEAR(f->f_localsptr[i]); } @@ -638,8 +638,10 @@ frame_dealloc(PyFrameObject *f) for (int i = 0; i < f->f_stackdepth; i++) { Py_XDECREF(f->f_valuestack[i]); } - PyMem_Free(f->f_localsptr); - f->f_own_locals_memory = 0; + if (f->f_own_locals_memory) { + PyMem_Free(f->f_localsptr); + f->f_own_locals_memory = 0; + } } f->f_stackdepth = 0; Py_XDECREF(f->f_back); diff --git a/Python/ceval.c b/Python/ceval.c index 7b59e35b6996dd..16d122cc8c15a1 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -5179,7 +5179,6 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con, */ assert (!is_coro); assert(f->f_own_locals_memory == 0); - assert(f->f_stackdepth == 0); if (Py_REFCNT(f) > 1) { Py_DECREF(f); _PyObject_GC_TRACK(f); @@ -5188,10 +5187,11 @@ _PyEval_Vector(PyThreadState *tstate, PyFrameConstructor *con, } } else { + ++tstate->recursion_depth; + f->f_localsptr = NULL; for (int i = 0; i < code->co_nlocalsplus + FRAME_SPECIALS_SIZE; i++) { Py_XDECREF(localsarray[i]); } - ++tstate->recursion_depth; Py_DECREF(f); --tstate->recursion_depth; } diff --git a/Python/pystate.c b/Python/pystate.c index 5e7fcb52fca85f..483c800926df17 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -897,6 +897,13 @@ PyThreadState_Clear(PyThreadState *tstate) if (tstate->on_delete != NULL) { tstate->on_delete(tstate->on_delete_data); } + _PyStackChunk *chunk = tstate->datastack_chunk; + tstate->datastack_chunk = NULL; + while (chunk != NULL) { + _PyStackChunk *prev = chunk->previous; + _PyObject_VirtualFree(chunk, chunk->size); + chunk = prev; + } } @@ -941,7 +948,6 @@ _PyThreadState_Delete(PyThreadState *tstate, int check_current) } } tstate_delete_common(tstate, gilstate); - _PyObject_VirtualFree(tstate->datastack_chunk, tstate->datastack_chunk->size); PyMem_RawFree(tstate); } From ed93a22f51eaabfa19193125eb66a40e570de649 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 14 May 2021 18:01:01 +0100 Subject: [PATCH 16/21] Fix compiler warnings. --- Include/internal/pycore_pystate.h | 2 +- Objects/codeobject.c | 2 +- Objects/obmalloc.c | 2 +- Python/pystate.c | 13 ++++++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 58e24774e0a3e9..6601ce2f80bf7e 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -147,7 +147,7 @@ PyAPI_FUNC(int) _PyState_AddModule( PyAPI_FUNC(int) _PyOS_InterruptOccurred(PyThreadState *tstate); -PyObject **_PyThreadState_PushLocals(PyThreadState *, size_t size); +PyObject **_PyThreadState_PushLocals(PyThreadState *, int size); void _PyThreadState_PopLocals(PyThreadState *, PyObject **); #ifdef __cplusplus diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 0df1cde2ef66b8..84d887eb0ab7f8 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -240,7 +240,7 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, co->co_kwonlyargcount = kwonlyargcount; co->co_nlocals = nlocals; co->co_nlocalsplus = nlocals + - PyTuple_GET_SIZE(freevars) + PyTuple_GET_SIZE(cellvars); + (int)PyTuple_GET_SIZE(freevars) + (int)PyTuple_GET_SIZE(cellvars); co->co_stacksize = stacksize; co->co_flags = flags; Py_INCREF(code); diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index bd607161d55476..903ca1c9e4b983 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -561,7 +561,7 @@ _PyObject_VirtualAlloc(size_t size) void _PyObject_VirtualFree(void *obj, size_t size) { - return _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size); + _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size); } void diff --git a/Python/pystate.c b/Python/pystate.c index 483c800926df17..5adbb6c6e6db54 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2003,19 +2003,19 @@ _Py_GetConfig(void) #define MINIMUM_OVERHEAD 1000 PyObject ** -_PyThreadState_PushLocals(PyThreadState *tstate, size_t size) +_PyThreadState_PushLocals(PyThreadState *tstate, int size) { + assert(((unsigned)size) < INT_MAX/sizeof(PyObject*)/2); PyObject **res = tstate->datastack_top; PyObject **top = res + size; if (top >= tstate->datastack_limit) { - size_t allocate_size = DATA_STACK_CHUNK_SIZE; - while (allocate_size < sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { + int allocate_size = DATA_STACK_CHUNK_SIZE; + while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { allocate_size *= 2; } _PyStackChunk *new = allocate_chunk(allocate_size, tstate->datastack_chunk); if (new == NULL) { - _PyErr_SetString(tstate, PyExc_MemoryError, "Out of memory"); - return NULL; + goto error; } tstate->datastack_chunk->top = tstate->datastack_top - &tstate->datastack_chunk->data[0]; tstate->datastack_chunk = new; @@ -2030,6 +2030,9 @@ _PyThreadState_PushLocals(PyThreadState *tstate, size_t size) res[i] = NULL; } return res; +error: + _PyErr_SetString(tstate, PyExc_MemoryError, "Out of memory"); + return NULL; } void From 920418970ce13a5fb10db88de068f023c680a917 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 14 May 2021 20:03:53 +0100 Subject: [PATCH 17/21] Add NEWS item --- .../Core and Builtins/2021-05-14-20-03-32.bpo-44032.OzT1ob.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-05-14-20-03-32.bpo-44032.OzT1ob.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-05-14-20-03-32.bpo-44032.OzT1ob.rst b/Misc/NEWS.d/next/Core and Builtins/2021-05-14-20-03-32.bpo-44032.OzT1ob.rst new file mode 100644 index 00000000000000..fd2dec80cddf10 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-05-14-20-03-32.bpo-44032.OzT1ob.rst @@ -0,0 +1,2 @@ +Move 'fast' locals and other variables from the frame object to a per-thread +datastack. From 4e60017f1c6064addf51f6f7f338373a08a96d13 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 19 May 2021 11:36:59 +0100 Subject: [PATCH 18/21] Move internal frame functions to internal header. --- Include/cpython/frameobject.h | 12 ----------- Include/internal/pycore_frame.h | 37 +++++++++++++++++++++++++++++++++ Objects/frameobject.c | 20 +----------------- Python/_warnings.c | 1 + Python/ceval.c | 1 + Python/pystate.c | 2 +- Python/suggestions.c | 1 + 7 files changed, 42 insertions(+), 32 deletions(-) create mode 100644 Include/internal/pycore_frame.h diff --git a/Include/cpython/frameobject.h b/Include/cpython/frameobject.h index 700d3277a909ec..fc20bc2ff89b0c 100644 --- a/Include/cpython/frameobject.h +++ b/Include/cpython/frameobject.h @@ -19,13 +19,6 @@ enum _framestate { typedef signed char PyFrameState; -enum { - FRAME_SPECIALS_GLOBALS_OFFSET = 0, - FRAME_SPECIALS_BUILTINS_OFFSET = 1, - FRAME_SPECIALS_LOCALS_OFFSET = 2, - FRAME_SPECIALS_SIZE = 3 -}; - struct _frame { PyObject_HEAD struct _frame *f_back; /* previous frame, or NULL */ @@ -82,8 +75,3 @@ PyAPI_FUNC(void) PyFrame_FastToLocals(PyFrameObject *); PyAPI_FUNC(void) _PyFrame_DebugMallocStats(FILE *out); PyAPI_FUNC(PyFrameObject *) PyFrame_GetBack(PyFrameObject *frame); - -/** Internal -- Not to be used outside of the interpreter core */ -int _PyFrame_TakeLocals(PyFrameObject *f); -PyObject *_PyFrame_GetGlobals(PyFrameObject *f); -PyObject *_PyFrame_GetBuiltins(PyFrameObject *f); diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h new file mode 100644 index 00000000000000..96f7b51ba905dc --- /dev/null +++ b/Include/internal/pycore_frame.h @@ -0,0 +1,37 @@ +#ifndef Py_INTERNAL_FRAME_H +#define Py_INTERNAL_FRAME_H +#ifdef __cplusplus +extern "C" { +#endif + +enum { + FRAME_SPECIALS_GLOBALS_OFFSET = 0, + FRAME_SPECIALS_BUILTINS_OFFSET = 1, + FRAME_SPECIALS_LOCALS_OFFSET = 2, + FRAME_SPECIALS_SIZE = 3 +}; + +static inline PyObject ** +_PyFrame_Specials(PyFrameObject *f) { + return &f->f_valuestack[-FRAME_SPECIALS_SIZE]; +} + +/* Returns a *borrowed* reference. */ +static inline PyObject * +_PyFrame_GetGlobals(PyFrameObject *f) +{ + return _PyFrame_Specials(f)[FRAME_SPECIALS_GLOBALS_OFFSET]; +} + +static inline PyObject * +_PyFrame_GetBuiltins(PyFrameObject *f) +{ + return _PyFrame_Specials(f)[FRAME_SPECIALS_BUILTINS_OFFSET]; +} + +int _PyFrame_TakeLocals(PyFrameObject *f); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FRAME_H */ diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 36ca8fe768ac32..1781c3cff73786 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -6,6 +6,7 @@ #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "frameobject.h" // PyFrameObject +#include "pycore_frame.h" #include "opcode.h" // EXTENDED_ARG #include "structmember.h" // PyMemberDef @@ -25,11 +26,6 @@ get_frame_state(void) return &interp->frame; } -static inline PyObject ** -_PyFrame_Specials(PyFrameObject *f) { - return &f->f_valuestack[-FRAME_SPECIALS_SIZE]; -} - static PyObject * frame_getlocals(PyFrameObject *f, void *closure) @@ -74,20 +70,6 @@ frame_getlasti(PyFrameObject *f, void *closure) return PyLong_FromLong(f->f_lasti*2); } -/* Returns a *borrowed* reference. Not part of the API. */ -PyObject * -_PyFrame_GetGlobals(PyFrameObject *f) -{ - return _PyFrame_Specials(f)[FRAME_SPECIALS_GLOBALS_OFFSET]; -} - -/* Returns a *borrowed* reference. Not part of the API. */ -PyObject * -_PyFrame_GetBuiltins(PyFrameObject *f) -{ - return _PyFrame_Specials(f)[FRAME_SPECIALS_BUILTINS_OFFSET]; -} - static PyObject * frame_getglobals(PyFrameObject *f, void *closure) { diff --git a/Python/_warnings.c b/Python/_warnings.c index 4d8db730b473cf..9c8815c1a3e204 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -5,6 +5,7 @@ #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() #include "frameobject.h" // PyFrame_GetBack() +#include "pycore_frame.h" #include "clinic/_warnings.c.h" #define MODULE_NAME "_warnings" diff --git a/Python/ceval.c b/Python/ceval.c index 16d122cc8c15a1..7e6b5eb3f9bb56 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -26,6 +26,7 @@ #include "code.h" #include "dictobject.h" #include "frameobject.h" +#include "pycore_frame.h" #include "opcode.h" #include "pydtrace.h" #include "setobject.h" diff --git a/Python/pystate.c b/Python/pystate.c index 5adbb6c6e6db54..9d091f12735c7d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -680,7 +680,7 @@ new_threadstate(PyInterpreterState *interp, int init) PyMem_RawFree(tstate); return NULL; } - /* If top points to entry 0, then _PyThreadState_PopLocals willl try to pop this chunk */ + /* If top points to entry 0, then _PyThreadState_PopLocals will try to pop this chunk */ tstate->datastack_top = &tstate->datastack_chunk->data[1]; tstate->datastack_limit = (PyObject **)(((char *)tstate->datastack_chunk) + DATA_STACK_CHUNK_SIZE); diff --git a/Python/suggestions.c b/Python/suggestions.c index 6a75e2bab847c4..2e76551f363ed4 100644 --- a/Python/suggestions.c +++ b/Python/suggestions.c @@ -1,5 +1,6 @@ #include "Python.h" #include "frameobject.h" +#include "pycore_frame.h" #include "pycore_pyerrors.h" From 7c53a6f539b1ca3a9335c3b10a568786b36a286b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 19 May 2021 14:05:47 +0100 Subject: [PATCH 19/21] Fix compiler warning --- Python/pystate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index 9d091f12735c7d..36057ee13bded9 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2026,7 +2026,7 @@ _PyThreadState_PushLocals(PyThreadState *tstate, int size) else { tstate->datastack_top = top; } - for (size_t i=0; i < size; i++) { + for (int i=0; i < size; i++) { res[i] = NULL; } return res; From 61c3753a89a9ab0ff3a25fa3b362dfb17191e0e2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 19 May 2021 14:10:52 +0100 Subject: [PATCH 20/21] Add missing comment. --- Include/internal/pycore_frame.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 96f7b51ba905dc..44f58fb6948712 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -23,6 +23,7 @@ _PyFrame_GetGlobals(PyFrameObject *f) return _PyFrame_Specials(f)[FRAME_SPECIALS_GLOBALS_OFFSET]; } +/* Returns a *borrowed* reference. */ static inline PyObject * _PyFrame_GetBuiltins(PyFrameObject *f) { From c5ccb7d9c5052bf5a5f813a0399e1aa85f00728a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 20 May 2021 11:01:24 +0100 Subject: [PATCH 21/21] Add comment about what _PyObject_VirtualAlloc does. --- Include/internal/pycore_pymem.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index d1fa158e3a803c..d59ab490493ba4 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -94,7 +94,8 @@ struct _PyTraceMalloc_Config { PyAPI_DATA(struct _PyTraceMalloc_Config) _Py_tracemalloc_config; - +/* Allocate memory directly from the O/S virtual memory system, + * where supported. Otherwise fallback on malloc */ void *_PyObject_VirtualAlloc(size_t size); void _PyObject_VirtualFree(void *, size_t size);