From fcbeb6daa91e64d583bb132e8ee4beed583e0e7e Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 7 Jun 2023 16:19:04 -0600 Subject: [PATCH 1/7] Add wrapper allocators with locking when needed. --- Include/internal/pycore_pymem.h | 4 + Objects/obmalloc.c | 215 ++++++++++++++++++++++++++++++-- 2 files changed, 212 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 81a707a0a5ddf3..c6bc7f2d544727 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -30,6 +30,10 @@ struct _pymem_allocators { debug_alloc_api_t obj; } debug; PyObjectArenaAllocator obj_arena; + struct { + PyMemAllocatorEx mem; + PyMemAllocatorEx obj; + } wrapped_with_lock; }; diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 9620a8fbb44cac..692e04c3809cbe 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -209,6 +209,8 @@ _PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr, #define _PyObject (_PyRuntime.allocators.standard.obj) #define _PyMem_Debug (_PyRuntime.allocators.debug) #define _PyObject_Arena (_PyRuntime.allocators.obj_arena) +#define _PyMem_Wrapped (_PyRuntime.allocators.wrapped_with_lock.mem) +#define _PyObject_Wrapped (_PyRuntime.allocators.wrapped_with_lock.obj) /***************************/ @@ -530,14 +532,32 @@ PyMem_SetupDebugHooks(void) PyThread_release_lock(ALLOCATORS_MUTEX); } +static int has_locking_wrapper(PyMemAllocatorEx *allocator); + static void get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { switch(domain) { - case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break; - case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break; - case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break; + case PYMEM_DOMAIN_RAW: + *allocator = _PyMem_Raw; + break; + case PYMEM_DOMAIN_MEM: + if (has_locking_wrapper(&_PyMem)) { + *allocator = *(PyMemAllocatorEx *)_PyMem.ctx; + } + else { + *allocator = _PyMem; + } + break; + case PYMEM_DOMAIN_OBJ: + if (has_locking_wrapper(&_PyObject)) { + *allocator = *(PyMemAllocatorEx *)_PyObject.ctx; + } + else { + *allocator = _PyObject; + } + break; default: /* unknown domain: set all attributes to NULL */ allocator->ctx = NULL; @@ -548,15 +568,28 @@ get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) } } +static void maybe_add_locking_wrapper( + PyMemAllocatorDomain, PyMemAllocatorEx *, PyMemAllocatorEx *); + static void set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { switch(domain) { - case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break; - case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break; - case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break; - /* ignore unknown domain */ + case PYMEM_DOMAIN_RAW: + _PyMem_Raw = *allocator; + break; + case PYMEM_DOMAIN_MEM: + maybe_add_locking_wrapper(domain, allocator, &_PyMem_Wrapped); + _PyMem = *allocator; + break; + case PYMEM_DOMAIN_OBJ: + maybe_add_locking_wrapper(domain, allocator, &_PyObject_Wrapped); + _PyObject = *allocator; + break; + default: + /* ignore unknown domain */ + return; } } @@ -627,6 +660,174 @@ PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) * around these uses of the runtime-global allocators state. */ +/************************************/ +/* locking around custom allocators */ +/************************************/ + +static int +should_lock(void) +{ + // XXX check for main interpreter, etc. + return 1; +} + +static void +acquire_custom_allocator_lock(void) +{ + // XXX acquire the main interpreter's GIL + return; +} + +static void +release_custom_allocator_lock(void) +{ + // XXX release the main interpreter's GIL + return; +} + +static void * +_PyMem_MallocLocked(void *ctx, size_t size) +{ + PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; + void *ptr; + if (should_lock()) { + acquire_custom_allocator_lock(); + ptr = wrapped->malloc(wrapped->ctx, size); + release_custom_allocator_lock(); + } + else { + ptr = wrapped->malloc(wrapped->ctx, size); + } + return ptr; +} + +static void * +_PyMem_CallocLocked(void *ctx, size_t nelem, size_t elsize) +{ + PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; + void *ptr; + if (should_lock()) { + acquire_custom_allocator_lock(); + ptr = wrapped->calloc(wrapped->ctx, nelem, elsize); + release_custom_allocator_lock(); + } + else { + ptr = wrapped->calloc(wrapped->ctx, nelem, elsize); + } + return ptr; +} + +static void * +_PyMem_ReallocLocked(void *ctx, void *ptr, size_t new_size) +{ + PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; + if (should_lock()) { + acquire_custom_allocator_lock(); + ptr = wrapped->realloc(wrapped->ctx, ptr, new_size); + release_custom_allocator_lock(); + } + else { + ptr = wrapped->realloc(wrapped->ctx, ptr, new_size); + } + return ptr; +} + +static void +_PyMem_FreeLocked(void *ctx, void *ptr) +{ + PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; + if (should_lock()) { + acquire_custom_allocator_lock(); + wrapped->free(wrapped->ctx, ptr); + release_custom_allocator_lock(); + } + else { + wrapped->free(wrapped->ctx, ptr); + } +} + +static int +has_locking_wrapper(PyMemAllocatorEx *allocator) +{ + if (allocator->ctx == NULL) { + return 0; + } + return (allocator->malloc == _PyMem_MallocLocked + || allocator->calloc == _PyMem_CallocLocked + || allocator->realloc == _PyMem_ReallocLocked + || allocator->free == _PyMem_FreeLocked); +} + +static void +maybe_add_locking_wrapper(PyMemAllocatorDomain domain, + PyMemAllocatorEx *allocator, + PyMemAllocatorEx *wrapped) +{ + assert(domain == PYMEM_DOMAIN_MEM || domain == PYMEM_DOMAIN_OBJ); + + *wrapped = (PyMemAllocatorEx){0}; + + if (allocator->malloc == _PyMem_DebugMalloc) { + /* The debug allocator only wraps an already set allocator, + * which would have gone through this function already. */ + return; + } + + void *ctx = allocator->ctx; + + /* What is the likelihood of reentrancy with the wrapper funcs? + * For now we assume it is effectively zero. */ + + if (allocator->malloc != _PyMem_RawMalloc +#ifdef WITH_PYMALLOC + && allocator->malloc != _PyObject_Malloc +#endif + && allocator->malloc != _PyMem_MallocLocked) + { + wrapped->ctx = ctx; + wrapped->malloc = allocator->malloc; + allocator->ctx = wrapped; + allocator->malloc = _PyMem_MallocLocked; + } + + if (allocator->calloc != _PyMem_RawCalloc +#ifdef WITH_PYMALLOC + && allocator->calloc != _PyObject_Calloc +#endif + && allocator->calloc != _PyMem_CallocLocked) + { + wrapped->ctx = ctx; + wrapped->calloc = allocator->calloc; + allocator->ctx = wrapped; + allocator->calloc = _PyMem_CallocLocked; + } + + if (allocator->realloc != _PyMem_RawRealloc +#ifdef WITH_PYMALLOC + && allocator->realloc != _PyObject_Realloc +#endif + && allocator->realloc != _PyMem_ReallocLocked) + { + wrapped->ctx = ctx; + wrapped->realloc = allocator->realloc; + allocator->ctx = wrapped; + allocator->realloc = _PyMem_ReallocLocked; + } + + if (allocator->free != _PyMem_RawFree +#ifdef WITH_PYMALLOC + && allocator->free != _PyObject_Free +#endif + && allocator->free != _PyMem_FreeLocked) + { + wrapped->ctx = ctx; + wrapped->free = allocator->free; + allocator->ctx = wrapped; + allocator->free = _PyMem_FreeLocked; + } +} + + /*************************/ /* the "arena" allocator */ /*************************/ From fbe26ebccf37a7f4175cbd253a9b64941de86105 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 9 Jun 2023 18:22:03 -0600 Subject: [PATCH 2/7] Fix set_allocator_unlocked. --- Objects/obmalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 692e04c3809cbe..caac6436d53fae 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -580,12 +580,12 @@ set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) _PyMem_Raw = *allocator; break; case PYMEM_DOMAIN_MEM: - maybe_add_locking_wrapper(domain, allocator, &_PyMem_Wrapped); _PyMem = *allocator; + maybe_add_locking_wrapper(domain, &_PyMem, &_PyMem_Wrapped); break; case PYMEM_DOMAIN_OBJ: - maybe_add_locking_wrapper(domain, allocator, &_PyObject_Wrapped); _PyObject = *allocator; + maybe_add_locking_wrapper(domain, &_PyObject, &_PyObject_Wrapped); break; default: /* ignore unknown domain */ From 71916298ec746f53c8790f64d0f9e8c86ceb3679 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 7 Jun 2023 16:41:00 -0600 Subject: [PATCH 3/7] Implement should_lock(). --- Include/internal/pycore_pymem.h | 1 + Objects/obmalloc.c | 9 ++++++++- Python/pylifecycle.c | 9 +++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index c6bc7f2d544727..736681e98dfcf5 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -30,6 +30,7 @@ struct _pymem_allocators { debug_alloc_api_t obj; } debug; PyObjectArenaAllocator obj_arena; + int num_gils; struct { PyMemAllocatorEx mem; PyMemAllocatorEx obj; diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index caac6436d53fae..0f874713d5fe1e 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -667,7 +667,14 @@ PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) static int should_lock(void) { - // XXX check for main interpreter, etc. + if (_PyRuntime.allocators.num_gils <= 1) { + return 0; + } + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + if (interp->ceval.gil == main_interp->ceval.gil) { + return 0; + } return 1; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 9ac5630959f8f5..912737e30d1a7c 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -586,6 +586,7 @@ static PyStatus init_interp_create_gil(PyThreadState *tstate, int own_gil) { PyStatus status; + _PyRuntimeState *runtime = tstate->interp->runtime; /* finalize_interp_delete() comment explains why _PyEval_FiniGIL() is only called here. */ @@ -603,6 +604,9 @@ init_interp_create_gil(PyThreadState *tstate, int own_gil) if (_PyStatus_EXCEPTION(status)) { return status; } + HEAD_LOCK(runtime); + runtime->allocators.num_gils++; + HEAD_UNLOCK(runtime); return _PyStatus_OK(); } @@ -1730,6 +1734,11 @@ finalize_interp_delete(PyInterpreterState *interp) /* Cleanup auto-thread-state */ _PyGILState_Fini(interp); + _PyRuntimeState *runtime = interp->runtime; + HEAD_LOCK(runtime); + runtime->allocators.num_gils--; + HEAD_UNLOCK(runtime); + /* We can't call _PyEval_FiniGIL() here because destroying the GIL lock can fail when it is being awaited by another running daemon thread (see bpo-9901). Instead pycore_create_interpreter() destroys the previously From dd609899265a349b778c2b8c060022e7f5ff44b5 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 7 Jun 2023 17:05:58 -0600 Subject: [PATCH 4/7] Implement acquire_custom_allocator_lock() and release_custom_allocator_lock(). --- Objects/obmalloc.c | 114 +++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 51 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 0f874713d5fe1e..d9e36fa9c66f4b 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_code.h" // stats #include "pycore_pystate.h" // _PyInterpreterState_GET +#include "pycore_ceval.h" // _PyEval_AcquireLock() #include "pycore_obmalloc.h" #include "pycore_pymem.h" @@ -664,93 +665,104 @@ PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) /* locking around custom allocators */ /************************************/ -static int -should_lock(void) +static inline int +should_lock(PyInterpreterState *interp) { - if (_PyRuntime.allocators.num_gils <= 1) { - return 0; - } - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - if (interp->ceval.gil == main_interp->ceval.gil) { - return 0; + return interp->ceval.gil != _PyInterpreterState_Main()->ceval.gil; +} + +static PyThreadState * +get_tstate_for_main_gil(void) +{ + PyThreadState *tstate = _PyRuntimeState_GetFinalizing(&_PyRuntime); + if (tstate == NULL) { + /* To use its GIL, we only need the pointer and one field. */ + static const PyThreadState _main_tstate = { + .interp = &_PyRuntime._main_interpreter, + }; + tstate = (PyThreadState *)&_main_tstate; } - return 1; + return tstate; } -static void -acquire_custom_allocator_lock(void) +static inline void +acquire_custom_allocator_lock(PyThreadState *tstate) { - // XXX acquire the main interpreter's GIL - return; + _PyEval_AcquireLock(tstate); } -static void -release_custom_allocator_lock(void) +static inline void +release_custom_allocator_lock(PyThreadState *tstate) { - // XXX release the main interpreter's GIL - return; + _PyEval_ReleaseLock(tstate->interp, tstate); } static void * _PyMem_MallocLocked(void *ctx, size_t size) { PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; - void *ptr; - if (should_lock()) { - acquire_custom_allocator_lock(); - ptr = wrapped->malloc(wrapped->ctx, size); - release_custom_allocator_lock(); - } - else { - ptr = wrapped->malloc(wrapped->ctx, size); + if (_PyRuntime.allocators.num_gils > 1) { + PyThreadState *tstate = _PyThreadState_GET(); + if (should_lock(tstate->interp)) { + tstate = get_tstate_for_main_gil(); + acquire_custom_allocator_lock(tstate); + void *ptr = wrapped->malloc(wrapped->ctx, size); + release_custom_allocator_lock(tstate); + return ptr; + } } - return ptr; + return wrapped->malloc(wrapped->ctx, size); } static void * _PyMem_CallocLocked(void *ctx, size_t nelem, size_t elsize) { PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; - void *ptr; - if (should_lock()) { - acquire_custom_allocator_lock(); - ptr = wrapped->calloc(wrapped->ctx, nelem, elsize); - release_custom_allocator_lock(); - } - else { - ptr = wrapped->calloc(wrapped->ctx, nelem, elsize); + if (_PyRuntime.allocators.num_gils > 1) { + PyThreadState *tstate = _PyThreadState_GET(); + if (should_lock(tstate->interp)) { + tstate = get_tstate_for_main_gil(); + acquire_custom_allocator_lock(tstate); + void *ptr = wrapped->calloc(wrapped->ctx, nelem, elsize); + release_custom_allocator_lock(tstate); + return ptr; + } } - return ptr; + return wrapped->calloc(wrapped->ctx, nelem, elsize); } static void * _PyMem_ReallocLocked(void *ctx, void *ptr, size_t new_size) { PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; - if (should_lock()) { - acquire_custom_allocator_lock(); - ptr = wrapped->realloc(wrapped->ctx, ptr, new_size); - release_custom_allocator_lock(); - } - else { - ptr = wrapped->realloc(wrapped->ctx, ptr, new_size); + if (_PyRuntime.allocators.num_gils > 1) { + PyThreadState *tstate = _PyThreadState_GET(); + if (should_lock(tstate->interp)) { + tstate = get_tstate_for_main_gil(); + acquire_custom_allocator_lock(tstate); + ptr = wrapped->realloc(wrapped->ctx, ptr, new_size); + release_custom_allocator_lock(tstate); + return ptr; + } } - return ptr; + return wrapped->realloc(wrapped->ctx, ptr, new_size); } static void _PyMem_FreeLocked(void *ctx, void *ptr) { PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; - if (should_lock()) { - acquire_custom_allocator_lock(); - wrapped->free(wrapped->ctx, ptr); - release_custom_allocator_lock(); - } - else { - wrapped->free(wrapped->ctx, ptr); + if (_PyRuntime.allocators.num_gils > 1) { + PyThreadState *tstate = _PyThreadState_GET(); + if (should_lock(tstate->interp)) { + tstate = get_tstate_for_main_gil(); + acquire_custom_allocator_lock(tstate); + wrapped->free(wrapped->ctx, ptr); + release_custom_allocator_lock(tstate); + return; + } } + wrapped->free(wrapped->ctx, ptr); } static int From 321d1c2efa0f09f98e8837d3d97f459e5c2bb23d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 27 Jul 2023 14:49:52 -0600 Subject: [PATCH 5/7] Check for overflow/underflow. --- Python/pylifecycle.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index ab71c3ff662a2a..67f27c70bc7006 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -619,11 +619,18 @@ init_interp_create_gil(PyThreadState *tstate, int gil) if (_PyStatus_EXCEPTION(status)) { return status; } + HEAD_LOCK(runtime); - runtime->allocators.num_gils++; + if (runtime->allocators.num_gils == INT_MAX) { + status = _PyStatus_ERR("allocators GIL overflow"); + } + else { + status =_PyStatus_OK(); + runtime->allocators.num_gils++; + } HEAD_UNLOCK(runtime); - return _PyStatus_OK(); + return status; } @@ -1748,6 +1755,7 @@ finalize_interp_delete(PyInterpreterState *interp) _PyRuntimeState *runtime = interp->runtime; HEAD_LOCK(runtime); + assert(runtime->allocators.num_gils > 0); runtime->allocators.num_gils--; HEAD_UNLOCK(runtime); From 52c01ea5f04c7e0efc852cb8df9da929a1074bfe Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 27 Jul 2023 14:50:04 -0600 Subject: [PATCH 6/7] Use an unsigned int. --- Include/internal/pycore_pymem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 3c93007c6dac9b..56832960af6a14 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -42,7 +42,7 @@ struct _pymem_allocators { debug_alloc_api_t obj; } debug; PyObjectArenaAllocator obj_arena; - int num_gils; + unsigned int num_gils; struct { PyMemAllocatorEx mem; PyMemAllocatorEx obj; From a2135973fd0bab33c73c546cf87785892956300f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 28 Jul 2023 11:05:52 -0600 Subject: [PATCH 7/7] Only modify num_gils if the interpreter has its own GIL. --- Python/pylifecycle.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 67f27c70bc7006..bca533dd44fc86 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -620,15 +620,17 @@ init_interp_create_gil(PyThreadState *tstate, int gil) return status; } - HEAD_LOCK(runtime); - if (runtime->allocators.num_gils == INT_MAX) { - status = _PyStatus_ERR("allocators GIL overflow"); - } - else { - status =_PyStatus_OK(); - runtime->allocators.num_gils++; + if (own_gil) { + HEAD_LOCK(runtime); + if (runtime->allocators.num_gils == INT_MAX) { + status = _PyStatus_ERR("allocators GIL overflow"); + } + else { + status =_PyStatus_OK(); + runtime->allocators.num_gils++; + } + HEAD_UNLOCK(runtime); } - HEAD_UNLOCK(runtime); return status; } @@ -1754,10 +1756,12 @@ finalize_interp_delete(PyInterpreterState *interp) _PyGILState_Fini(interp); _PyRuntimeState *runtime = interp->runtime; - HEAD_LOCK(runtime); - assert(runtime->allocators.num_gils > 0); - runtime->allocators.num_gils--; - HEAD_UNLOCK(runtime); + if (interp->ceval.own_gil) { + HEAD_LOCK(runtime); + assert(runtime->allocators.num_gils > 0); + runtime->allocators.num_gils--; + HEAD_UNLOCK(runtime); + } /* We can't call _PyEval_FiniGIL() here because destroying the GIL lock can fail when it is being awaited by another running daemon thread (see