From c8f0e376d472f7d0741b547ad01e7541b5c96365 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Oct 2021 17:06:40 +0900 Subject: [PATCH 1/7] Minimum implementation of global freelist. --- Include/cpython/objimpl.h | 13 ++++++ Modules/gcmodule.c | 45 +++++++++++++++--- Objects/obmalloc.c | 96 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 7 deletions(-) diff --git a/Include/cpython/objimpl.h b/Include/cpython/objimpl.h index d83700e2a4647f..6c5080fa5cbac1 100644 --- a/Include/cpython/objimpl.h +++ b/Include/cpython/objimpl.h @@ -98,3 +98,16 @@ PyAPI_FUNC(PyObject *) _PyObject_GC_Calloc(size_t size); #define PyType_SUPPORTS_WEAKREFS(t) ((t)->tp_weaklistoffset > 0) PyAPI_FUNC(PyObject **) PyObject_GET_WEAKREFS_LISTPTR(PyObject *op); + +// Freelist + +#if SIZEOF_VOID_P > 4 +#define _PY_FREELIST_ALIGNMENT (16) +#else +#define _PY_FREELIST_ALIGNMENT (8) +#endif + +void* _PyFreelist_Malloc(size_t size); +void _PyFreelist_Free(void *ptr, size_t size); +void _PyObject_GC_Recycle(void *, PyTypeObject *tp); +void _PyObject_GC_RecycleVar(void *, PyTypeObject *tp, Py_ssize_t nitems); diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 7d1a45bcaeabf8..d850b80568db22 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -2243,15 +2243,13 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) size_t size = sizeof(PyGC_Head) + basicsize; PyGC_Head *g; - if (use_calloc) { - g = (PyGC_Head *)PyObject_Calloc(1, size); - } - else { - g = (PyGC_Head *)PyObject_Malloc(size); - } + g = _PyFreelist_Malloc(size); if (g == NULL) { return _PyErr_NoMemory(tstate); } + if (use_calloc) { + memset((void*)g, 0, size); + } assert(((uintptr_t)g & 3) == 0); // g must be aligned 4bytes boundary g->_gc_next = 0; @@ -2322,8 +2320,11 @@ _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) return (PyVarObject *)PyErr_NoMemory(); } + size_t size = sizeof(PyGC_Head) + basicsize; + // upsize for freelist + size = (size + _PY_FREELIST_ALIGNMENT - 1) & ~(_PY_FREELIST_ALIGNMENT - 1); PyGC_Head *g = AS_GC(op); - g = (PyGC_Head *)PyObject_Realloc(g, sizeof(PyGC_Head) + basicsize); + g = (PyGC_Head *)PyObject_Realloc(g, size); if (g == NULL) return (PyVarObject *)PyErr_NoMemory(); op = (PyVarObject *) FROM_GC(g); @@ -2345,6 +2346,36 @@ PyObject_GC_Del(void *op) PyObject_Free(g); } +void +_PyObject_GC_Recycle(void *op, PyTypeObject *tp) +{ + PyGC_Head *g = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op)) { + gc_list_remove(g); + } + GCState *gcstate = get_gc_state(); + if (gcstate->generations[0].count > 0) { + gcstate->generations[0].count--; + } + size_t size = sizeof(PyGC_Head) + _PyObject_SIZE(tp); + _PyFreelist_Free(g, size); +} + +void +_PyObject_GC_RecycleVar(void *op, PyTypeObject *tp, Py_ssize_t nitems) +{ + PyGC_Head *g = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op)) { + gc_list_remove(g); + } + GCState *gcstate = get_gc_state(); + if (gcstate->generations[0].count > 0) { + gcstate->generations[0].count--; + } + size_t size = sizeof(PyGC_Head) + _PyObject_VAR_SIZE(tp, nitems); + _PyFreelist_Free(g, size); +} + int PyObject_GC_IsTracked(PyObject* obj) { diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d8d6f6dea0d532..64ba86ec39e8f0 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -29,6 +29,7 @@ static void _PyObject_DebugDumpAddress(const void *p); static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p); static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain); +static void _PyFreelist_DebugMallocStats(FILE *out); #if defined(__has_feature) /* Clang */ # if __has_feature(address_sanitizer) /* is ASAN enabled? */ @@ -3079,7 +3080,102 @@ _PyObject_DebugMallocStats(FILE *out) #endif #endif + _PyFreelist_DebugMallocStats(out); return 1; } #endif /* #ifdef WITH_PYMALLOC */ + +// Freelist support. See Include/cpython/objimpl.h + +#define _PY_FREELIST_MAXSIZECLASS (256/_PY_FREELIST_ALIGNMENT) +#define _PY_FREELIST_MAXLENGTH (1000) +#define _PY_FREELIST_STAT 1 + +typedef struct { + void *ptr; + size_t nfree; +#if _PY_FREELIST_STAT + size_t reused; + size_t allocated; +#endif +} _Py_freelist_slot; + +static _Py_freelist_slot _Py_global_freelist[_PY_FREELIST_MAXSIZECLASS]; + +void* +_PyFreelist_Malloc(size_t size) +{ + assert(size > 0); + size_t sc = (size-1) / _PY_FREELIST_ALIGNMENT; + if (sc < _PY_FREELIST_MAXSIZECLASS) { + _Py_freelist_slot *slot = &_Py_global_freelist[sc]; + if (slot->nfree > 0) { + void *ret = slot->ptr; + slot->ptr = *((void**)ret); + slot->nfree--; +#if _PY_FREELIST_STAT + slot->reused++; +#endif + return ret; + } +#if _PY_FREELIST_STAT + slot->allocated++; +#endif + size = (sc+1) * _PY_FREELIST_ALIGNMENT; + } + return PyObject_Malloc(size); +} + +void +_PyFreelist_Free(void *ptr, size_t size) +{ + assert(size > 0); + size_t sc = (size-1) / _PY_FREELIST_ALIGNMENT; + if (sc < _PY_FREELIST_MAXSIZECLASS) { + _Py_freelist_slot *slot = &_Py_global_freelist[sc]; + if (slot->nfree < _PY_FREELIST_MAXLENGTH) { + *((void**)ptr) = slot->ptr; + slot->ptr = ptr; + slot->nfree++; + return; + } + } + PyObject_Free(ptr); +} + +static void +_PyFreelist_DebugMallocStats(FILE *out) +{ + char buf[128]; + +#if _PY_FREELIST_STAT + fputs("\nFreelist stats:\n\n" + "size num free total bytes reused alloc\n" + "---- -------- ----------- -------- --------\n", + out); + + for (int i = 0; i < _PY_FREELIST_MAXSIZECLASS; i++) { + int n = (int)_Py_global_freelist[i].nfree; + int s = (i + 1) * _PY_FREELIST_ALIGNMENT; + PyOS_snprintf(buf, sizeof(buf), "%4d %8d %11d %8ld %8ld\n", + s, n, s * n, + _Py_global_freelist[i].reused, + _Py_global_freelist[i].allocated); + fputs(buf, out); + } +#else + fputs("\nFreelist stats:\n\n" + "size num free total bytes\n" + "---- -------- -----------\n", + out); + + for (int i = 0; i < _PY_FREELIST_MAXSIZECLASS; i++) { + int n = (int)_Py_global_freelist[i].nfree; + int s = (i + 1) * _PY_FREELIST_ALIGNMENT; + PyOS_snprintf(buf, sizeof(buf), "%4d %8d %11d\n", + s, n, s * n); + fputs(buf, out); + } +#endif +} From 4cb792f061b5b82030c37b5145627ffaece3e109 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Oct 2021 17:19:05 +0900 Subject: [PATCH 2/7] frame uses global freelist --- Objects/frameobject.c | 43 +++++++++++-------------------------------- 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 5271790f018af5..b970959bb98c8d 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -643,14 +643,7 @@ frame_dealloc(PyFrameObject *f) // frame_dealloc() must not be called after _PyFrame_Fini() assert(state->numfree != -1); #endif - if (state->numfree < PyFrame_MAXFREELIST) { - ++state->numfree; - f->f_back = state->free_list; - state->free_list = f; - } - else { - PyObject_GC_Del(f); - } + _PyObject_GC_Recycle((void*)f, &PyFrame_Type); Py_XDECREF(co); Py_TRASHCAN_END; @@ -801,31 +794,16 @@ static inline PyFrameObject* frame_alloc(InterpreterFrame *frame, int owns) { PyFrameObject *f; - struct _Py_frame_state *state = get_frame_state(); - if (state->free_list == NULL) - { - f = PyObject_GC_New(PyFrameObject, &PyFrame_Type); - if (f == NULL) { - if (owns) { - Py_XDECREF(frame->f_code); - Py_XDECREF(frame->f_builtins); - Py_XDECREF(frame->f_globals); - Py_XDECREF(frame->f_locals); - PyMem_Free(frame); - } - return NULL; + f = PyObject_GC_New(PyFrameObject, &PyFrame_Type); + if (f == NULL) { + if (owns) { + Py_XDECREF(frame->f_code); + Py_XDECREF(frame->f_builtins); + Py_XDECREF(frame->f_globals); + Py_XDECREF(frame->f_locals); + PyMem_Free(frame); } - } - else { -#ifdef Py_DEBUG - // frame_alloc() must not be called after _PyFrame_Fini() - assert(state->numfree != -1); -#endif - assert(state->numfree > 0); - --state->numfree; - f = state->free_list; - state->free_list = state->free_list->f_back; - _Py_NewReference((PyObject *)f); + return NULL; } f->f_frame = frame; f->f_own_locals_memory = owns; @@ -1069,6 +1047,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear) void _PyFrame_ClearFreeList(PyInterpreterState *interp) { + // TODO: Remove frame freelist struct _Py_frame_state *state = &interp->frame; while (state->free_list != NULL) { PyFrameObject *f = state->free_list; From 47e114c9e36023db6f2975ef28b1532dfd30124c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Oct 2021 17:54:02 +0900 Subject: [PATCH 3/7] tuple uses global freelist --- Objects/tupleobject.c | 55 ++++++------------------------------------- 1 file changed, 7 insertions(+), 48 deletions(-) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 051683086ea2c5..3e98c0d50caec5 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -66,35 +66,12 @@ tuple_alloc(Py_ssize_t size) return NULL; } -#if PyTuple_MAXSAVESIZE > 0 - struct _Py_tuple_state *state = get_tuple_state(); -#ifdef Py_DEBUG - // tuple_alloc() must not be called after _PyTuple_Fini() - assert(state->numfree[0] != -1); -#endif - if (size < PyTuple_MAXSAVESIZE && (op = state->free_list[size]) != NULL) { - assert(size != 0); - state->free_list[size] = (PyTupleObject *) op->ob_item[0]; - state->numfree[size]--; - /* Inlined _PyObject_InitVar() without _PyType_HasFeature() test */ -#ifdef Py_TRACE_REFS - Py_SET_SIZE(op, size); - Py_SET_TYPE(op, &PyTuple_Type); -#endif - _Py_NewReference((PyObject *)op); - } - else -#endif - { - /* Check for overflow */ - if ((size_t)size > ((size_t)PY_SSIZE_T_MAX - (sizeof(PyTupleObject) - - sizeof(PyObject *))) / sizeof(PyObject *)) { - return (PyTupleObject *)PyErr_NoMemory(); - } - op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); - if (op == NULL) - return NULL; + /* Check for overflow */ + if ((size_t)size > ((size_t)PY_SSIZE_T_MAX - (sizeof(PyTupleObject) - + sizeof(PyObject *))) / sizeof(PyObject *)) { + return (PyTupleObject *)PyErr_NoMemory(); } + op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); return op; } @@ -271,22 +248,7 @@ tupledealloc(PyTupleObject *op) while (--i >= 0) { Py_XDECREF(op->ob_item[i]); } -#if PyTuple_MAXSAVESIZE > 0 - struct _Py_tuple_state *state = get_tuple_state(); -#ifdef Py_DEBUG - // tupledealloc() must not be called after _PyTuple_Fini() - assert(state->numfree[0] != -1); -#endif - if (len < PyTuple_MAXSAVESIZE - && state->numfree[len] < PyTuple_MAXFREELIST - && Py_IS_TYPE(op, &PyTuple_Type)) - { - op->ob_item[0] = (PyObject *) state->free_list[len]; - state->numfree[len]++; - state->free_list[len] = op; - goto done; /* return */ - } -#endif + _PyObject_GC_RecycleVar(op, &PyTuple_Type, len); } #if defined(Py_DEBUG) && PyTuple_MAXSAVESIZE > 0 else { @@ -297,13 +259,10 @@ tupledealloc(PyTupleObject *op) if (op == state->free_list[0] && state->numfree[0] != 0) { _Py_FatalRefcountError("deallocating the empty tuple singleton"); } + Py_TYPE(op)->tp_free((PyObject *)op); } #endif - Py_TYPE(op)->tp_free((PyObject *)op); -#if PyTuple_MAXSAVESIZE > 0 -done: -#endif Py_TRASHCAN_END } From d239b20e6e92b00c2acc005dae792f88d49f7562 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Oct 2021 20:19:53 +0900 Subject: [PATCH 4/7] unicode allocate memory from freelist. But not recycle it. --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 61fc34d71da3ce..7cd11718a06ade 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1451,7 +1451,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) * PyObject_New() so we are able to allocate space for the object and * it's data buffer. */ - obj = (PyObject *) PyObject_Malloc(struct_size + (size + 1) * char_size); + obj = (PyObject *) _PyFreelist_Malloc(struct_size + (size + 1) * char_size); if (obj == NULL) { return PyErr_NoMemory(); } From 4d6f7c33b604a4692a0f6e5f70810612be197abc Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Oct 2021 20:53:42 +0900 Subject: [PATCH 5/7] list uses freelist --- Objects/listobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index ed5324155f6275..93f4955ccc8455 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -349,8 +349,8 @@ list_dealloc(PyListObject *op) // list_dealloc() must not be called after _PyList_Fini() assert(state->numfree != -1); #endif - if (state->numfree < PyList_MAXFREELIST && PyList_CheckExact(op)) { - state->free_list[state->numfree++] = op; + if (PyList_CheckExact(op)) { + _PyObject_GC_Recycle(op, &PyList_Type); } else { Py_TYPE(op)->tp_free((PyObject *)op); From 38175d0ca78587699477e6c7890845d6bc4dc6dc Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Oct 2021 20:57:08 +0900 Subject: [PATCH 6/7] list uses global freelist --- Objects/listobject.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 93f4955ccc8455..ed75cf19f89e62 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -144,22 +144,14 @@ PyList_New(Py_ssize_t size) return NULL; } - struct _Py_list_state *state = get_list_state(); - PyListObject *op; #ifdef Py_DEBUG + struct _Py_list_state *state = get_list_state(); // PyList_New() must not be called after _PyList_Fini() assert(state->numfree != -1); #endif - if (state->numfree) { - state->numfree--; - op = state->free_list[state->numfree]; - _Py_NewReference((PyObject *)op); - } - else { - op = PyObject_GC_New(PyListObject, &PyList_Type); - if (op == NULL) { - return NULL; - } + PyListObject *op = PyObject_GC_New(PyListObject, &PyList_Type); + if (op == NULL) { + return NULL; } if (size <= 0) { op->ob_item = NULL; @@ -344,8 +336,8 @@ list_dealloc(PyListObject *op) } PyMem_Free(op->ob_item); } - struct _Py_list_state *state = get_list_state(); #ifdef Py_DEBUG + struct _Py_list_state *state = get_list_state(); // list_dealloc() must not be called after _PyList_Fini() assert(state->numfree != -1); #endif From b3def6fe6ef123056665c2b393af22f275bdf137 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Oct 2021 21:06:44 +0900 Subject: [PATCH 7/7] dict object uses global freelist. Note that dictkeys still use own freelist. --- Objects/dictobject.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3d6e4c1e17e1f0..ffdd6e1cb8b6c9 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -638,26 +638,18 @@ new_dict(PyDictKeysObject *keys, PyDictValues *values, Py_ssize_t used, int free { PyDictObject *mp; assert(keys != NULL); - struct _Py_dict_state *state = get_dict_state(); #ifdef Py_DEBUG + struct _Py_dict_state *state = get_dict_state(); // new_dict() must not be called after _PyDict_Fini() assert(state->numfree != -1); #endif - if (state->numfree) { - mp = state->free_list[--state->numfree]; - assert (mp != NULL); - assert (Py_IS_TYPE(mp, &PyDict_Type)); - _Py_NewReference((PyObject *)mp); - } - else { - mp = PyObject_GC_New(PyDictObject, &PyDict_Type); - if (mp == NULL) { - dictkeys_decref(keys); - if (free_values_on_failure) { - free_values(values); - } - return NULL; + mp = PyObject_GC_New(PyDictObject, &PyDict_Type); + if (mp == NULL) { + dictkeys_decref(keys); + if (free_values_on_failure) { + free_values(values); } + return NULL; } mp->ma_keys = keys; mp->ma_values = values; @@ -1987,13 +1979,13 @@ dict_dealloc(PyDictObject *mp) assert(keys->dk_refcnt == 1); dictkeys_decref(keys); } - struct _Py_dict_state *state = get_dict_state(); #ifdef Py_DEBUG + struct _Py_dict_state *state = get_dict_state(); // new_dict() must not be called after _PyDict_Fini() assert(state->numfree != -1); #endif - if (state->numfree < PyDict_MAXFREELIST && Py_IS_TYPE(mp, &PyDict_Type)) { - state->free_list[state->numfree++] = mp; + if (Py_IS_TYPE(mp, &PyDict_Type)) { + _PyObject_GC_Recycle(mp, &PyDict_Type); } else { Py_TYPE(mp)->tp_free((PyObject *)mp);