From 6bf34f54d997b0557bf4d7bdf4a99cafb23c80d4 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 4 Nov 2021 14:32:26 -0700 Subject: [PATCH 1/5] gh-111569: Implement Python critical section API Critical sections are helpers to replace the global interpreter lock with finer grained locking. They provide similar guarantees to the GIL and avoid the deadlock risk that plain locking involves. Critical sections are implicitly ended whenever the GIL would be released. They are resumed when the GIL would be acquired. Nested critical sections behave as if the sections were interleaved. --- Include/cpython/pystate.h | 7 + Include/internal/pycore_critical_section.h | 204 ++++++++++++++++++ Include/internal/pycore_lock.h | 17 +- Include/object.h | 6 +- Makefile.pre.in | 2 + ...-10-31-14-58-17.gh-issue-111569._V8iu4.rst | 3 + Modules/Setup.stdlib.in | 2 +- Modules/_testinternalcapi.c | 3 + Modules/_testinternalcapi/parts.h | 1 + .../test_critical_sections.c | 204 ++++++++++++++++++ Objects/object.c | 2 +- PCbuild/_freeze_module.vcxproj | 3 + PCbuild/_freeze_module.vcxproj.filters | 9 + PCbuild/_testinternalcapi.vcxproj | 1 + PCbuild/_testinternalcapi.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/critical_section.c | 100 +++++++++ Python/pystate.c | 10 + 19 files changed, 578 insertions(+), 7 deletions(-) create mode 100644 Include/internal/pycore_critical_section.h create mode 100644 Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst create mode 100644 Modules/_testinternalcapi/test_critical_sections.c create mode 100644 Python/critical_section.c diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index ec99f90d669d12..4fad4c60abbafe 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -149,6 +149,13 @@ struct _ts { struct _py_trashcan trash; + /* Tagged pointer to top-most critical section, or zero if there is no + * active critical section. Critical sections are only used in + * `--disable-gil` builds (i.e., when Py_NOGIL is defined to 1). In the + * default build, this field is always zero. + */ + uintptr_t critical_section; + /* Called when a thread state is deleted normally, but not when it * is destroyed after fork(). * Pain: to prevent rare but fatal shutdown errors (issue 18808), diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h new file mode 100644 index 00000000000000..c2875b38f55a0d --- /dev/null +++ b/Include/internal/pycore_critical_section.h @@ -0,0 +1,204 @@ +#ifndef Py_INTERNAL_CRITICAL_SECTION_H +#define Py_INTERNAL_CRITICAL_SECTION_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex +#include "pycore_pystate.h" // _PyThreadState_GET() +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Implementation of Python critical sections: helpers to replace the global +// interpreter lock with fine grained locking. +// +// A Python critical section is a region of code that can only be executed by +// a single thread at at time. The regions begin with a call to +// Py_BEGIN_CRITICAL_SECTION and ends with Py_END_CRITICAL_SECTION. Active +// critical sections are *implicitly* suspended whenever the thread calls +// `_PyThreadState_Detach()` (i.e., at tiems when the interpreter would have +// released the GIL). +// +// The most recent critical section is resumed when `_PyThreadState_Attach()` +// is called. See `_PyCriticalSection_Resume()`. +// +// The purpose of implicitly ending critical sections is to avoid deadlocks +// when locking multiple objects. Any time a thread would have released the +// GIL, it releases all locks from active critical sections. This includes +// times when a thread blocks while attempting to acquire a lock. +// +// Note that the macros Py_BEGIN_CRITICAL_SECTION and Py_END_CRITICAL_SECTION +// are no-ops in non-free-threaded builds. +// +// Example usage: +// Py_BEGIN_CRITICAL_SECTION(op); +// ... +// Py_END_CRITICAL_SECTION; +// +// To lock two objects at once: +// Py_BEGIN_CRITICAL_SECTION2(op1, op2); +// ... +// Py_END_CRITICAL_SECTION2; + + +// Tagged pointers to critical sections use the two least significant bits to +// mark if the pointed-to critical section is inactive and whether it is a +// _PyCriticalSection2 object. +#define _Py_CRITICAL_SECTION_INACTIVE 0x1 +#define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2 +#define _Py_CRITICAL_SECTION_MASK 0x3 + +#ifdef Py_NOGIL +#define Py_BEGIN_CRITICAL_SECTION(op) { \ + _PyCriticalSection _cs; \ + _PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex) + +#define Py_END_CRITICAL_SECTION \ + _PyCriticalSection_End(&_cs); \ +} + +#define Py_BEGIN_CRITICAL_SECTION2(a, b) { \ + _PyCriticalSection2 _cs2; \ + _PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex) + +#define Py_END_CRITICAL_SECTION2 \ + _PyCriticalSection2_End(&_cs2); \ +} +#else +// The critical section APIs are no-ops with the GIL. +#define Py_BEGIN_CRITICAL_SECTION(op) +#define Py_END_CRITICAL_SECTION +#define Py_BEGIN_CRITICAL_SECTION2(a, b) +#define Py_END_CRITICAL_SECTION2 +#endif + +typedef struct { + // Tagged pointer to an outer active critical section (or 0). + // The two least-significant-bits indicate whether the pointed-to critical + // section is inactive and whether it is a _PyCriticalSection2 object. + uintptr_t prev; + + // Mutex used to protect critical section + PyMutex *mutex; +} _PyCriticalSection; + +// A critical section protected by two mutexes. Use +// _PyCriticalSection2_Begin and _PyCriticalSection2_End. +typedef struct { + _PyCriticalSection base; + + PyMutex *mutex2; +} _PyCriticalSection2; + +static inline int +_PyCriticalSection_IsActive(uintptr_t tag) +{ + return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0; +} + +// Resumes the top-most critical section. +PyAPI_FUNC(void) +_PyCriticalSection_Resume(PyThreadState *tstate); + +// (private) slow path for locking the mutex +PyAPI_FUNC(void) +_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m); + +PyAPI_FUNC(void) +_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, + int is_m1_locked); + +static inline void +_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) +{ + if (PyMutex_LockFast(&m->v)) { + PyThreadState *tstate = _PyThreadState_GET(); + c->mutex = m; + c->prev = tstate->critical_section; + tstate->critical_section = (uintptr_t)c; + } + else { + _PyCriticalSection_BeginSlow(c, m); + } +} + +// Removes the top-most critical section from the thread's of critical +// sections. If the new top-most critical section is inactive, then it is +// resumed. +static inline void +_PyCriticalSection_Pop(_PyCriticalSection *c) +{ + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t prev = c->prev; + tstate->critical_section = prev; + + if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) { + _PyCriticalSection_Resume(tstate); + } +} + +static inline void +_PyCriticalSection_End(_PyCriticalSection *c) +{ + PyMutex_Unlock(c->mutex); + _PyCriticalSection_Pop(c); +} + +static inline void +_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +{ + if (m1 == m2) { + // If the two mutex arguments are the same, treat this as a critical + // section with a single mutex. + c->mutex2 = NULL; + _PyCriticalSection_Begin(&c->base, m1); + return; + } + + if ((uintptr_t)m2 < (uintptr_t)m1) { + // Sort the mutexes so that the lower address is locked first. + PyMutex *tmp = m1; + m1 = m2; + m2 = tmp; + } + + if (PyMutex_LockFast(&m1->v)) { + if (PyMutex_LockFast(&m2->v)) { + PyThreadState *tstate = _PyThreadState_GET(); + c->base.mutex = m1; + c->mutex2 = m2; + c->base.prev = tstate->critical_section; + + uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; + tstate->critical_section = p; + } + else { + _PyCriticalSection2_BeginSlow(c, m1, m2, 1); + } + } + else { + _PyCriticalSection2_BeginSlow(c, m1, m2, 0); + } +} + +static inline void +_PyCriticalSection2_End(_PyCriticalSection2 *c) +{ + if (c->mutex2) { + PyMutex_Unlock(c->mutex2); + } + PyMutex_Unlock(c->base.mutex); + _PyCriticalSection_Pop(&c->base); +} + +PyAPI_FUNC(void) +_PyCriticalSection_SuspendAll(PyThreadState *tstate); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CRITICAL_SECTION_H */ diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index c4bb76a40e7b12..d92b5632662ca1 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -32,9 +32,13 @@ extern "C" { // PyMutex_Lock(&m); // ... // PyMutex_Unlock(&m); -typedef struct _PyMutex { - uint8_t v; -} PyMutex; + +// NOTE: In Py_NOGIL builds, struct _PyMutex is defined in Include/object.h +#ifndef Py_NOGIL +struct _PyMutex { uint8_t v; }; +#endif + +typedef struct _PyMutex PyMutex; #define _Py_UNLOCKED 0 #define _Py_LOCKED 1 @@ -46,6 +50,13 @@ PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m); // (private) slow path for unlocking the mutex PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m); +static inline int +PyMutex_LockFast(uint8_t *lock_bits) +{ + uint8_t expected = _Py_UNLOCKED; + return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED); +} + // Locks the mutex. // // If the mutex is currently locked, the calling thread will be parked until diff --git a/Include/object.h b/Include/object.h index 1c7d7f407fe23e..54d4d5b9909afb 100644 --- a/Include/object.h +++ b/Include/object.h @@ -119,7 +119,7 @@ check by comparing the reference count field to the immortality reference count. { \ 0, \ 0, \ - 0, \ + { 0 }, \ 0, \ _Py_IMMORTAL_REFCNT_LOCAL, \ 0, \ @@ -204,10 +204,12 @@ struct _object { // Create a shared field from a refcnt and desired flags #define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags)) +struct _PyMutex { uint8_t v; }; + struct _object { uintptr_t ob_tid; // thread id (or zero) uint16_t _padding; - uint8_t ob_mutex; // per-object lock + struct _PyMutex ob_mutex; // per-object lock uint8_t ob_gc_bits; // gc-related state uint32_t ob_ref_local; // local reference count Py_ssize_t ob_ref_shared; // shared (atomic) reference count diff --git a/Makefile.pre.in b/Makefile.pre.in index f2b252cce9775c..c66a873abfbdf5 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -409,6 +409,7 @@ PYTHON_OBJS= \ Python/codecs.o \ Python/compile.o \ Python/context.o \ + Python/critical_section.o \ Python/crossinterp.o \ Python/dynamic_annotations.o \ Python/errors.o \ @@ -1801,6 +1802,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_complexobject.h \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_dict.h \ $(srcdir)/Include/internal/pycore_dict_state.h \ diff --git a/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst b/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst new file mode 100644 index 00000000000000..c2bd3ae36e6439 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-31-14-58-17.gh-issue-111569._V8iu4.rst @@ -0,0 +1,3 @@ +Implement "Python Critical Sections" from :pep:`703`. These are macros to +help replace the GIL with per-object locks in the ``--disable-gil`` build of +CPython. The macros are no-ops in the default build. diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index c73522b8ecf426..24b17d2ae47493 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -158,7 +158,7 @@ @MODULE_XXSUBTYPE_TRUE@xxsubtype xxsubtype.c @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c -@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c +@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c @MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/pyos.c _testcapi/immortal.c _testcapi/heaptype_relative.c _testcapi/gc.c _testcapi/sys.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index a71e7e1dcc1256..f5c77de3566ef0 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1719,6 +1719,9 @@ module_exec(PyObject *module) if (_PyTestInternalCapi_Init_Set(module) < 0) { return 1; } + if (_PyTestInternalCapi_Init_CriticalSection(module) < 0) { + return 1; + } if (PyModule_Add(module, "SIZEOF_PYGC_HEAD", PyLong_FromSsize_t(sizeof(PyGC_Head))) < 0) { diff --git a/Modules/_testinternalcapi/parts.h b/Modules/_testinternalcapi/parts.h index 3d2774e3f1b404..49a1395f499fc3 100644 --- a/Modules/_testinternalcapi/parts.h +++ b/Modules/_testinternalcapi/parts.h @@ -13,5 +13,6 @@ int _PyTestInternalCapi_Init_Lock(PyObject *module); int _PyTestInternalCapi_Init_PyTime(PyObject *module); int _PyTestInternalCapi_Init_Set(PyObject *module); +int _PyTestInternalCapi_Init_CriticalSection(PyObject *module); #endif // Py_TESTINTERNALCAPI_PARTS_H diff --git a/Modules/_testinternalcapi/test_critical_sections.c b/Modules/_testinternalcapi/test_critical_sections.c new file mode 100644 index 00000000000000..0d6a6bd534f9dd --- /dev/null +++ b/Modules/_testinternalcapi/test_critical_sections.c @@ -0,0 +1,204 @@ +/* + * C Extension module to test pycore_critical_section.h API. + */ + +#include "parts.h" + +#include "pycore_critical_section.h" + +#ifdef Py_NOGIL +#define assert_nogil assert +#define assert_gil(x) +#else +#define assert_gil assert +#define assert_nogil(x) +#endif + + +static PyObject * +test_critical_sections(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyObject *d1 = PyDict_New(); + assert(d1 != NULL); + + PyObject *d2 = PyDict_New(); + assert(d2 != NULL); + + // Beginning a critical section should lock the associated object and + // push the critical section onto the thread's stack (in Py_NOGIL builds). + Py_BEGIN_CRITICAL_SECTION(d1); + assert_nogil(PyMutex_IsLocked(&d1->ob_mutex)); + assert_nogil(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert_gil(PyThreadState_GET()->critical_section == 0); + Py_END_CRITICAL_SECTION; + assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex)); + + Py_BEGIN_CRITICAL_SECTION2(d1, d2); + assert_nogil(PyMutex_IsLocked(&d1->ob_mutex)); + assert_nogil(PyMutex_IsLocked(&d2->ob_mutex)); + Py_END_CRITICAL_SECTION2; + + // Passing the same object twice should work (and not deadlock). + Py_BEGIN_CRITICAL_SECTION2(d2, d2); + assert_nogil(PyMutex_IsLocked(&d2->ob_mutex)); + Py_END_CRITICAL_SECTION2; + + Py_DECREF(d2); + Py_DECREF(d1); + Py_RETURN_NONE; +} + +static void +lock_unlock_object(PyObject *obj, int recurse_depth) +{ + Py_BEGIN_CRITICAL_SECTION(obj); + if (recurse_depth > 0) { + lock_unlock_object(obj, recurse_depth - 1); + } + Py_END_CRITICAL_SECTION; +} + +static void +lock_unlock_two_objects(PyObject *a, PyObject *b, int recurse_depth) +{ + Py_BEGIN_CRITICAL_SECTION2(a, b); + if (recurse_depth > 0) { + lock_unlock_two_objects(a, b, recurse_depth - 1); + } + Py_END_CRITICAL_SECTION2; +} + + +// Test that nested critical sections do not deadlock if they attempt to lock +// the same object. +static PyObject * +test_critical_sections_nest(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyObject *a = PyDict_New(); + assert(a != NULL); + PyObject *b = PyDict_New(); + assert(b != NULL); + + // Locking an object recursively with this API should not deadlock. + Py_BEGIN_CRITICAL_SECTION(a); + lock_unlock_object(a, 10); + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + Py_END_CRITICAL_SECTION; + + // Same test but with two objects. + Py_BEGIN_CRITICAL_SECTION2(b, a); + lock_unlock_two_objects(a, b, 10); + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + assert_nogil(PyMutex_IsLocked(&b->ob_mutex)); + Py_END_CRITICAL_SECTION2; + + Py_DECREF(b); + Py_DECREF(a); + Py_RETURN_NONE; +} + +// Test that a critical section is suspened by a Py_BEGIN_ALLOW_THREADS and +// resumed by a Py_END_ALLOW_THREADS. +static PyObject * +test_critical_sections_suspend(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyObject *a = PyDict_New(); + assert(a != NULL); + + Py_BEGIN_CRITICAL_SECTION(a); + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + + // Py_BEGIN_ALLOW_THREADS should suspend the active critical section + Py_BEGIN_ALLOW_THREADS + assert_nogil(!PyMutex_IsLocked(&a->ob_mutex)); + Py_END_ALLOW_THREADS; + + // After Py_END_ALLOW_THREADS the critical section should be resumed. + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); + Py_END_CRITICAL_SECTION; + + Py_DECREF(a); + Py_RETURN_NONE; +} + +struct test_data { + PyObject *obj1; + PyObject *obj2; + PyObject *obj3; + Py_ssize_t countdown; + PyEvent done_event; +}; + +static void +thread_critical_sections(void *arg) +{ + const Py_ssize_t NUM_ITERS = 200; + struct test_data *test_data = arg; + PyGILState_STATE gil = PyGILState_Ensure(); + + for (Py_ssize_t i = 0; i < NUM_ITERS; i++) { + Py_BEGIN_CRITICAL_SECTION(test_data->obj1); + Py_END_CRITICAL_SECTION; + + Py_BEGIN_CRITICAL_SECTION(test_data->obj2); + lock_unlock_object(test_data->obj1, 1); + Py_END_CRITICAL_SECTION; + + Py_BEGIN_CRITICAL_SECTION2(test_data->obj3, test_data->obj1); + lock_unlock_object(test_data->obj2, 2); + Py_END_CRITICAL_SECTION2; + + Py_BEGIN_CRITICAL_SECTION(test_data->obj3); + Py_BEGIN_ALLOW_THREADS + Py_END_ALLOW_THREADS + Py_END_CRITICAL_SECTION; + } + + PyGILState_Release(gil); + if (_Py_atomic_add_ssize(&test_data->countdown, -1) == 1) { + // last thread to finish sets done_event + _PyEvent_Notify(&test_data->done_event); + } +} + +static PyObject * +test_critical_sections_threads(PyObject *self, PyObject *Py_UNUSED(args)) +{ + const Py_ssize_t NUM_THREADS = 4; + struct test_data test_data = { + .obj1 = PyDict_New(), + .obj2 = PyDict_New(), + .obj3 = PyDict_New(), + .countdown = NUM_THREADS, + }; + assert(test_data.obj1 != NULL); + assert(test_data.obj2 != NULL); + assert(test_data.obj3 != NULL); + + for (int i = 0; i < NUM_THREADS; i++) { + PyThread_start_new_thread(&thread_critical_sections, &test_data); + } + PyEvent_Wait(&test_data.done_event); + + Py_DECREF(test_data.obj3); + Py_DECREF(test_data.obj2); + Py_DECREF(test_data.obj1); + Py_RETURN_NONE; +} + +static PyMethodDef test_methods[] = { + {"test_critical_sections", test_critical_sections, METH_NOARGS}, + {"test_critical_sections_nest", test_critical_sections_nest, METH_NOARGS}, + {"test_critical_sections_suspend", test_critical_sections_suspend, METH_NOARGS}, + {"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS}, + {NULL, NULL} /* sentinel */ +}; + +int +_PyTestInternalCapi_Init_CriticalSection(PyObject *mod) +{ + if (PyModule_AddFunctions(mod, test_methods) < 0) { + return -1; + } + return 0; +} diff --git a/Objects/object.c b/Objects/object.c index 35c7e7bf33b135..d7414579376d5b 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2365,7 +2365,7 @@ new_reference(PyObject *op) #else op->ob_tid = _Py_ThreadId(); op->_padding = 0; - op->ob_mutex = 0; + op->ob_mutex = (struct _PyMutex){ 0 }; op->ob_gc_bits = 0; op->ob_ref_local = 1; op->ob_ref_shared = 0; diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 05b8bfdc38a99c..b4d97c9f0774da 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -209,6 +209,7 @@ + @@ -235,12 +236,14 @@ + + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index d6cbd2d3d47361..2a9ec0ef2d8475 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -103,6 +103,9 @@ Source Files + + Source Files + Source Files @@ -226,6 +229,9 @@ Source Files + + Source Files + Source Files @@ -334,6 +340,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/_testinternalcapi.vcxproj b/PCbuild/_testinternalcapi.vcxproj index a729ab3877d91f..558f66ca95cd33 100644 --- a/PCbuild/_testinternalcapi.vcxproj +++ b/PCbuild/_testinternalcapi.vcxproj @@ -95,6 +95,7 @@ + diff --git a/PCbuild/_testinternalcapi.vcxproj.filters b/PCbuild/_testinternalcapi.vcxproj.filters index 9c8a5d793ee0f4..abfeeb39630daf 100644 --- a/PCbuild/_testinternalcapi.vcxproj.filters +++ b/PCbuild/_testinternalcapi.vcxproj.filters @@ -15,6 +15,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 954a59a0bc7019..e60a6ca0f43cad 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -216,6 +216,7 @@ + @@ -562,6 +563,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 2f8b206f973f34..34c0060f194000 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -579,6 +579,9 @@ Include\internal + + Include\internal + Include\internal @@ -1283,6 +1286,9 @@ Python + + Python + Source Files diff --git a/Python/critical_section.c b/Python/critical_section.c new file mode 100644 index 00000000000000..2214d80eeb297b --- /dev/null +++ b/Python/critical_section.c @@ -0,0 +1,100 @@ +#include "Python.h" + +#include "pycore_lock.h" +#include "pycore_critical_section.h" + +static_assert(_Alignof(_PyCriticalSection) >= 4, + "critical section must be aligned to at least 4 bytes"); + +void +_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m) +{ + PyThreadState *tstate = _PyThreadState_GET(); + c->mutex = NULL; + c->prev = (uintptr_t)tstate->critical_section; + tstate->critical_section = (uintptr_t)c; + + _PyMutex_LockSlow(m); + c->mutex = m; +} + +void +_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, + int is_m1_locked) +{ + PyThreadState *tstate = _PyThreadState_GET(); + c->base.mutex = NULL; + c->mutex2 = NULL; + c->base.prev = tstate->critical_section; + tstate->critical_section = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; + + if (!is_m1_locked) { + PyMutex_Lock(m1); + } + PyMutex_Lock(m2); + c->base.mutex = m1; + c->mutex2 = m2; +} + +static _PyCriticalSection * +untag_critical_section(uintptr_t tag) +{ + return (_PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK); +} + +// Release all locks held by critical sections. This is called by +// _PyThreadState_Detach. +void +_PyCriticalSection_SuspendAll(PyThreadState *tstate) +{ + uintptr_t *tagptr = &tstate->critical_section; + while (_PyCriticalSection_IsActive(*tagptr)) { + _PyCriticalSection *c = untag_critical_section(*tagptr); + + if (c->mutex) { + PyMutex_Unlock(c->mutex); + if ((*tagptr & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { + _PyCriticalSection2 *c2 = (_PyCriticalSection2 *)c; + if (c2->mutex2) { + PyMutex_Unlock(c2->mutex2); + } + } + } + + *tagptr |= _Py_CRITICAL_SECTION_INACTIVE; + tagptr = &c->prev; + } +} + +void +_PyCriticalSection_Resume(PyThreadState *tstate) +{ + uintptr_t p = tstate->critical_section; + _PyCriticalSection *c = untag_critical_section(p); + assert(!_PyCriticalSection_IsActive(p)); + + PyMutex *m1 = c->mutex; + c->mutex = NULL; + + PyMutex *m2 = NULL; + _PyCriticalSection2 *c2 = NULL; + if ((p & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { + c2 = (_PyCriticalSection2 *)c; + m2 = c2->mutex2; + c2->mutex2 = NULL; + } + + if (m1) { + PyMutex_Lock(m1); + } + if (m2) { + PyMutex_Lock(m2); + } + + c->mutex = m1; + if (m2) { + c2->mutex2 = m2; + } + + tstate->critical_section &= ~_Py_CRITICAL_SECTION_INACTIVE; +} diff --git a/Python/pystate.c b/Python/pystate.c index d97a03caf491c4..fba586404d1466 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_ceval.h" #include "pycore_code.h" // stats +#include "pycore_critical_section.h" // _PyCriticalSection_Resume() #include "pycore_dtoa.h" // _dtoa_state_INIT() #include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init() #include "pycore_frame.h" @@ -1930,6 +1931,12 @@ _PyThreadState_Attach(PyThreadState *tstate) Py_FatalError("thread attach failed"); } + // Resume previous critical section. This acquires the lock(s) from the + // top-most critical section. + if (tstate->critical_section != 0) { + _PyCriticalSection_Resume(tstate); + } + #if defined(Py_DEBUG) errno = err; #endif @@ -1941,6 +1948,9 @@ _PyThreadState_Detach(PyThreadState *tstate) // XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate)); assert(tstate->state == _Py_THREAD_ATTACHED); assert(tstate == current_fast_get(&_PyRuntime)); + if (tstate->critical_section != 0) { + _PyCriticalSection_SuspendAll(tstate); + } tstate_set_detached(tstate); tstate_deactivate(tstate); current_fast_clear(&_PyRuntime); From da3c6c361184d8258f2b33e606171c716241281f Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 6 Nov 2023 18:46:03 -0500 Subject: [PATCH 2/5] Changes from review. --- Include/internal/pycore_critical_section.h | 105 ++++++++++++------ .../test_critical_sections.c | 35 +++--- 2 files changed, 91 insertions(+), 49 deletions(-) diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index c2875b38f55a0d..668a251f331dbd 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -14,35 +14,64 @@ extern "C" { #endif // Implementation of Python critical sections: helpers to replace the global -// interpreter lock with fine grained locking. +// interpreter lock with per-object locks, while avoiding deadlocks. // -// A Python critical section is a region of code that can only be executed by -// a single thread at at time. The regions begin with a call to -// Py_BEGIN_CRITICAL_SECTION and ends with Py_END_CRITICAL_SECTION. Active -// critical sections are *implicitly* suspended whenever the thread calls -// `_PyThreadState_Detach()` (i.e., at tiems when the interpreter would have -// released the GIL). +// NOTE: These APIs are no-ops in non-free-threaded builds. // -// The most recent critical section is resumed when `_PyThreadState_Attach()` -// is called. See `_PyCriticalSection_Resume()`. +// Straightforward per-object locking could introduce deadlocks that were not +// present when running with the GIL. Threads may hold locks for multiple +// objects simultaneously because Python operations can nest. If threads were +// to acquire the same locks in different orders, they would deadlock. // -// The purpose of implicitly ending critical sections is to avoid deadlocks -// when locking multiple objects. Any time a thread would have released the -// GIL, it releases all locks from active critical sections. This includes -// times when a thread blocks while attempting to acquire a lock. +// One way to avoid deadlocks is to allow threads to hold only the lock (or +// locks) for a single operation at a time (typically a single lock, but some +// operations involve two locks). When a thread begins a nested operation it +// could suspend the locks for any outer operation: before beginning the nested +// operation, the locks for the outer operation are released and when the +// nested operation completes, the locks for the outer operation are +// reacquired. // -// Note that the macros Py_BEGIN_CRITICAL_SECTION and Py_END_CRITICAL_SECTION -// are no-ops in non-free-threaded builds. +// To improve performance, this API uses a variation of the above scheme. +// Instead of immediately suspending locks any time a nested operation begins, +// locks are only suspended if the thread would block. This reduces the number +// of lock acquisitions and releases for nested operations, while avoiding +// deadlocks. +// +// Additionally, the locks for any active operation are suspended around +// other potentially blocking operations, such as I/O. This is because the +// interaction between locks and blocking operations can lead to deadlocks in +// the same way as the interaction between multiple locks. +// +// Each thread's critical sections and their corresponding locks are tracked in +// a stack in `PyThreadState.critical_section`. When a thread calls +// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when +// waiting to acquiring a lock, the thread suspends all of it's active critical +// sections, temporarily releasing the associated locks. When the thread calls +// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) +// critical section by reacquiring the associated lock or locks. See +// `_PyCriticalSection_Resume()`. +// +// NOTE: Only the top-most critical section is guaranteed to be active. +// Operations that need to lock two objects at once must use +// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections +// to lock more than objects at once, because the inner critical section may +// suspend the outer critical sections. This API does not provide a way to +// lock more than two objects at once. +// +// NOTE: Critical sections implicitly behave like reentrant locks because +// attempting to acquire the same lock will suspend any outer (earlier) +// critical sections. However, they are less efficient for this use case than +// purposefully designed reentrant locks. // // Example usage: // Py_BEGIN_CRITICAL_SECTION(op); // ... -// Py_END_CRITICAL_SECTION; +// Py_END_CRITICAL_SECTION(); // // To lock two objects at once: // Py_BEGIN_CRITICAL_SECTION2(op1, op2); // ... -// Py_END_CRITICAL_SECTION2; +// Py_END_CRITICAL_SECTION2(); // Tagged pointers to critical sections use the two least significant bits to @@ -53,28 +82,30 @@ extern "C" { #define _Py_CRITICAL_SECTION_MASK 0x3 #ifdef Py_NOGIL -#define Py_BEGIN_CRITICAL_SECTION(op) { \ - _PyCriticalSection _cs; \ - _PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex) +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { \ + _PyCriticalSection _cs; \ + _PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex) -#define Py_END_CRITICAL_SECTION \ - _PyCriticalSection_End(&_cs); \ -} +# define Py_END_CRITICAL_SECTION() \ + _PyCriticalSection_End(&_cs); \ + } -#define Py_BEGIN_CRITICAL_SECTION2(a, b) { \ - _PyCriticalSection2 _cs2; \ - _PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex) +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { \ + _PyCriticalSection2 _cs2; \ + _PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex) -#define Py_END_CRITICAL_SECTION2 \ - _PyCriticalSection2_End(&_cs2); \ -} -#else +# define Py_END_CRITICAL_SECTION2() \ + _PyCriticalSection2_End(&_cs2); \ + } +#else /* !Py_NOGIL */ // The critical section APIs are no-ops with the GIL. -#define Py_BEGIN_CRITICAL_SECTION(op) -#define Py_END_CRITICAL_SECTION -#define Py_BEGIN_CRITICAL_SECTION2(a, b) -#define Py_END_CRITICAL_SECTION2 -#endif +# define Py_BEGIN_CRITICAL_SECTION(op) +# define Py_END_CRITICAL_SECTION() +# define Py_BEGIN_CRITICAL_SECTION2(a, b) +# define Py_END_CRITICAL_SECTION2() +#endif /* !Py_NOGIL */ typedef struct { // Tagged pointer to an outer active critical section (or 0). @@ -126,7 +157,7 @@ _PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) } } -// Removes the top-most critical section from the thread's of critical +// Removes the top-most critical section from the thread's stack of critical // sections. If the new top-most critical section is inactive, then it is // resumed. static inline void @@ -161,6 +192,8 @@ _PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) if ((uintptr_t)m2 < (uintptr_t)m1) { // Sort the mutexes so that the lower address is locked first. + // We need to acquire the mutexes in a consistent order to avoid + // lock ordering deadlocks. PyMutex *tmp = m1; m1 = m2; m2 = tmp; diff --git a/Modules/_testinternalcapi/test_critical_sections.c b/Modules/_testinternalcapi/test_critical_sections.c index 0d6a6bd534f9dd..7813ce7561dea5 100644 --- a/Modules/_testinternalcapi/test_critical_sections.c +++ b/Modules/_testinternalcapi/test_critical_sections.c @@ -30,18 +30,24 @@ test_critical_sections(PyObject *self, PyObject *Py_UNUSED(args)) assert_nogil(PyMutex_IsLocked(&d1->ob_mutex)); assert_nogil(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); assert_gil(PyThreadState_GET()->critical_section == 0); - Py_END_CRITICAL_SECTION; + Py_END_CRITICAL_SECTION(); assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex)); + assert(!PyMutex_IsLocked(&d1->ob_mutex)); + assert(!PyMutex_IsLocked(&d2->ob_mutex)); Py_BEGIN_CRITICAL_SECTION2(d1, d2); assert_nogil(PyMutex_IsLocked(&d1->ob_mutex)); assert_nogil(PyMutex_IsLocked(&d2->ob_mutex)); - Py_END_CRITICAL_SECTION2; + Py_END_CRITICAL_SECTION2(); + assert(!PyMutex_IsLocked(&d1->ob_mutex)); + assert(!PyMutex_IsLocked(&d2->ob_mutex)); // Passing the same object twice should work (and not deadlock). + assert(!PyMutex_IsLocked(&d2->ob_mutex)); Py_BEGIN_CRITICAL_SECTION2(d2, d2); assert_nogil(PyMutex_IsLocked(&d2->ob_mutex)); - Py_END_CRITICAL_SECTION2; + Py_END_CRITICAL_SECTION2(); + assert(!PyMutex_IsLocked(&d2->ob_mutex)); Py_DECREF(d2); Py_DECREF(d1); @@ -55,7 +61,7 @@ lock_unlock_object(PyObject *obj, int recurse_depth) if (recurse_depth > 0) { lock_unlock_object(obj, recurse_depth - 1); } - Py_END_CRITICAL_SECTION; + Py_END_CRITICAL_SECTION(); } static void @@ -65,7 +71,7 @@ lock_unlock_two_objects(PyObject *a, PyObject *b, int recurse_depth) if (recurse_depth > 0) { lock_unlock_two_objects(a, b, recurse_depth - 1); } - Py_END_CRITICAL_SECTION2; + Py_END_CRITICAL_SECTION2(); } @@ -80,24 +86,27 @@ test_critical_sections_nest(PyObject *self, PyObject *Py_UNUSED(args)) assert(b != NULL); // Locking an object recursively with this API should not deadlock. + assert(!PyMutex_IsLocked(&a->ob_mutex)); Py_BEGIN_CRITICAL_SECTION(a); + assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); lock_unlock_object(a, 10); assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); - Py_END_CRITICAL_SECTION; + Py_END_CRITICAL_SECTION(); + assert(!PyMutex_IsLocked(&a->ob_mutex)); // Same test but with two objects. Py_BEGIN_CRITICAL_SECTION2(b, a); lock_unlock_two_objects(a, b, 10); assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); assert_nogil(PyMutex_IsLocked(&b->ob_mutex)); - Py_END_CRITICAL_SECTION2; + Py_END_CRITICAL_SECTION2(); Py_DECREF(b); Py_DECREF(a); Py_RETURN_NONE; } -// Test that a critical section is suspened by a Py_BEGIN_ALLOW_THREADS and +// Test that a critical section is suspended by a Py_BEGIN_ALLOW_THREADS and // resumed by a Py_END_ALLOW_THREADS. static PyObject * test_critical_sections_suspend(PyObject *self, PyObject *Py_UNUSED(args)) @@ -115,7 +124,7 @@ test_critical_sections_suspend(PyObject *self, PyObject *Py_UNUSED(args)) // After Py_END_ALLOW_THREADS the critical section should be resumed. assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); - Py_END_CRITICAL_SECTION; + Py_END_CRITICAL_SECTION(); Py_DECREF(a); Py_RETURN_NONE; @@ -138,20 +147,20 @@ thread_critical_sections(void *arg) for (Py_ssize_t i = 0; i < NUM_ITERS; i++) { Py_BEGIN_CRITICAL_SECTION(test_data->obj1); - Py_END_CRITICAL_SECTION; + Py_END_CRITICAL_SECTION(); Py_BEGIN_CRITICAL_SECTION(test_data->obj2); lock_unlock_object(test_data->obj1, 1); - Py_END_CRITICAL_SECTION; + Py_END_CRITICAL_SECTION(); Py_BEGIN_CRITICAL_SECTION2(test_data->obj3, test_data->obj1); lock_unlock_object(test_data->obj2, 2); - Py_END_CRITICAL_SECTION2; + Py_END_CRITICAL_SECTION2(); Py_BEGIN_CRITICAL_SECTION(test_data->obj3); Py_BEGIN_ALLOW_THREADS Py_END_ALLOW_THREADS - Py_END_CRITICAL_SECTION; + Py_END_CRITICAL_SECTION(); } PyGILState_Release(gil); From 3fbc42dfca26570d19ee7f9737931758e10471a1 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 7 Nov 2023 13:41:02 -0500 Subject: [PATCH 3/5] Update comments --- Include/internal/pycore_critical_section.h | 4 ++-- Include/internal/pycore_lock.h | 5 ++++- Include/object.h | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 668a251f331dbd..3360196c8a47fd 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -192,8 +192,8 @@ _PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) if ((uintptr_t)m2 < (uintptr_t)m1) { // Sort the mutexes so that the lower address is locked first. - // We need to acquire the mutexes in a consistent order to avoid - // lock ordering deadlocks. + // The exact order does not matter, but we need to acquire the mutexes + // in a consistent order to avoid lock ordering deadlocks. PyMutex *tmp = m1; m1 = m2; m2 = tmp; diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index d92b5632662ca1..fe5e21fad221e4 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -33,7 +33,10 @@ extern "C" { // ... // PyMutex_Unlock(&m); -// NOTE: In Py_NOGIL builds, struct _PyMutex is defined in Include/object.h +// NOTE: In Py_NOGIL builds, `struct _PyMutex` is defined in Include/object.h. +// The Py_NOGIL builds need the definition in Include/object.h for the +// `ob_mutex` field in PyObject. For the default (non-free-threaded) build, +// we define the struct here to avoid exposing it in the public API. #ifndef Py_NOGIL struct _PyMutex { uint8_t v; }; #endif diff --git a/Include/object.h b/Include/object.h index 34162f81240957..f6693562d211c5 100644 --- a/Include/object.h +++ b/Include/object.h @@ -204,6 +204,8 @@ struct _object { // Create a shared field from a refcnt and desired flags #define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags)) +// NOTE: In non-free-threaded builds, `struct _PyMutex` is defined in +// pycore_lock.h. See pycore_lock.h for more details. struct _PyMutex { uint8_t v; }; struct _object { From d4349048dff01f38586cd210065eba893eeb8189 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 7 Nov 2023 13:46:23 -0500 Subject: [PATCH 4/5] Fix default build of test_critical_sections.c --- .../_testinternalcapi/test_critical_sections.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Modules/_testinternalcapi/test_critical_sections.c b/Modules/_testinternalcapi/test_critical_sections.c index 7813ce7561dea5..238f29c3c62e64 100644 --- a/Modules/_testinternalcapi/test_critical_sections.c +++ b/Modules/_testinternalcapi/test_critical_sections.c @@ -33,21 +33,21 @@ test_critical_sections(PyObject *self, PyObject *Py_UNUSED(args)) Py_END_CRITICAL_SECTION(); assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex)); - assert(!PyMutex_IsLocked(&d1->ob_mutex)); - assert(!PyMutex_IsLocked(&d2->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); Py_BEGIN_CRITICAL_SECTION2(d1, d2); assert_nogil(PyMutex_IsLocked(&d1->ob_mutex)); assert_nogil(PyMutex_IsLocked(&d2->ob_mutex)); Py_END_CRITICAL_SECTION2(); - assert(!PyMutex_IsLocked(&d1->ob_mutex)); - assert(!PyMutex_IsLocked(&d2->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&d1->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); // Passing the same object twice should work (and not deadlock). - assert(!PyMutex_IsLocked(&d2->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); Py_BEGIN_CRITICAL_SECTION2(d2, d2); assert_nogil(PyMutex_IsLocked(&d2->ob_mutex)); Py_END_CRITICAL_SECTION2(); - assert(!PyMutex_IsLocked(&d2->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); Py_DECREF(d2); Py_DECREF(d1); @@ -86,13 +86,13 @@ test_critical_sections_nest(PyObject *self, PyObject *Py_UNUSED(args)) assert(b != NULL); // Locking an object recursively with this API should not deadlock. - assert(!PyMutex_IsLocked(&a->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&a->ob_mutex)); Py_BEGIN_CRITICAL_SECTION(a); assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); lock_unlock_object(a, 10); assert_nogil(PyMutex_IsLocked(&a->ob_mutex)); Py_END_CRITICAL_SECTION(); - assert(!PyMutex_IsLocked(&a->ob_mutex)); + assert_nogil(!PyMutex_IsLocked(&a->ob_mutex)); // Same test but with two objects. Py_BEGIN_CRITICAL_SECTION2(b, a); From 4c83a1a52b0468fbdee8b4f08f143a043c27d7b1 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 8 Nov 2023 10:57:59 -0500 Subject: [PATCH 5/5] Update pycore_critical_section.h comments from review --- Include/internal/pycore_critical_section.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 3360196c8a47fd..73c2e243f20bcc 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -13,8 +13,12 @@ extern "C" { #endif -// Implementation of Python critical sections: helpers to replace the global -// interpreter lock with per-object locks, while avoiding deadlocks. +// Implementation of Python critical sections +// +// Conceptually, critical sections are a deadlock avoidance layer on top of +// per-object locks. These helpers, in combination with those locks, replace +// our usage of the global interpreter lock to provide thread-safety for +// otherwise thread-unsafe objects, such as dict. // // NOTE: These APIs are no-ops in non-free-threaded builds. // @@ -34,8 +38,8 @@ extern "C" { // To improve performance, this API uses a variation of the above scheme. // Instead of immediately suspending locks any time a nested operation begins, // locks are only suspended if the thread would block. This reduces the number -// of lock acquisitions and releases for nested operations, while avoiding -// deadlocks. +// of lock acquisitions and releases for nested operations, while still +// avoiding deadlocks. // // Additionally, the locks for any active operation are suspended around // other potentially blocking operations, such as I/O. This is because the @@ -45,7 +49,7 @@ extern "C" { // Each thread's critical sections and their corresponding locks are tracked in // a stack in `PyThreadState.critical_section`. When a thread calls // `_PyThreadState_Detach()`, such as before a blocking I/O operation or when -// waiting to acquiring a lock, the thread suspends all of it's active critical +// waiting to acquire a lock, the thread suspends all of its active critical // sections, temporarily releasing the associated locks. When the thread calls // `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) // critical section by reacquiring the associated lock or locks. See @@ -54,9 +58,10 @@ extern "C" { // NOTE: Only the top-most critical section is guaranteed to be active. // Operations that need to lock two objects at once must use // `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections -// to lock more than objects at once, because the inner critical section may -// suspend the outer critical sections. This API does not provide a way to -// lock more than two objects at once. +// to lock more than one object at once, because the inner critical section +// may suspend the outer critical sections. This API does not provide a way +// to lock more than two objects at once (though it could be added later +// if actually needed). // // NOTE: Critical sections implicitly behave like reentrant locks because // attempting to acquire the same lock will suspend any outer (earlier)