Skip to content

Commit 31c90d5

Browse files
authored
gh-111569: Implement Python critical section API (gh-111571)
Critical sections are helpers to replace the global interpreter lock with finer grained locking. They provide similar guarantees to the GIL and avoid the deadlock risk that plain locking involves. Critical sections are implicitly ended whenever the GIL would be released. They are resumed when the GIL would be acquired. Nested critical sections behave as if the sections were interleaved.
1 parent 0b718e6 commit 31c90d5

19 files changed

+630
-7
lines changed

Include/cpython/pystate.h

+7
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ struct _ts {
149149

150150
struct _py_trashcan trash;
151151

152+
/* Tagged pointer to top-most critical section, or zero if there is no
153+
* active critical section. Critical sections are only used in
154+
* `--disable-gil` builds (i.e., when Py_NOGIL is defined to 1). In the
155+
* default build, this field is always zero.
156+
*/
157+
uintptr_t critical_section;
158+
152159
/* Called when a thread state is deleted normally, but not when it
153160
* is destroyed after fork().
154161
* Pain: to prevent rare but fatal shutdown errors (issue 18808),
+242
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#ifndef Py_INTERNAL_CRITICAL_SECTION_H
2+
#define Py_INTERNAL_CRITICAL_SECTION_H
3+
4+
#ifndef Py_BUILD_CORE
5+
# error "this header requires Py_BUILD_CORE define"
6+
#endif
7+
8+
#include "pycore_lock.h" // PyMutex
9+
#include "pycore_pystate.h" // _PyThreadState_GET()
10+
#include <stdint.h>
11+
12+
#ifdef __cplusplus
13+
extern "C" {
14+
#endif
15+
16+
// Implementation of Python critical sections
17+
//
18+
// Conceptually, critical sections are a deadlock avoidance layer on top of
19+
// per-object locks. These helpers, in combination with those locks, replace
20+
// our usage of the global interpreter lock to provide thread-safety for
21+
// otherwise thread-unsafe objects, such as dict.
22+
//
23+
// NOTE: These APIs are no-ops in non-free-threaded builds.
24+
//
25+
// Straightforward per-object locking could introduce deadlocks that were not
26+
// present when running with the GIL. Threads may hold locks for multiple
27+
// objects simultaneously because Python operations can nest. If threads were
28+
// to acquire the same locks in different orders, they would deadlock.
29+
//
30+
// One way to avoid deadlocks is to allow threads to hold only the lock (or
31+
// locks) for a single operation at a time (typically a single lock, but some
32+
// operations involve two locks). When a thread begins a nested operation it
33+
// could suspend the locks for any outer operation: before beginning the nested
34+
// operation, the locks for the outer operation are released and when the
35+
// nested operation completes, the locks for the outer operation are
36+
// reacquired.
37+
//
38+
// To improve performance, this API uses a variation of the above scheme.
39+
// Instead of immediately suspending locks any time a nested operation begins,
40+
// locks are only suspended if the thread would block. This reduces the number
41+
// of lock acquisitions and releases for nested operations, while still
42+
// avoiding deadlocks.
43+
//
44+
// Additionally, the locks for any active operation are suspended around
45+
// other potentially blocking operations, such as I/O. This is because the
46+
// interaction between locks and blocking operations can lead to deadlocks in
47+
// the same way as the interaction between multiple locks.
48+
//
49+
// Each thread's critical sections and their corresponding locks are tracked in
50+
// a stack in `PyThreadState.critical_section`. When a thread calls
51+
// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when
52+
// waiting to acquire a lock, the thread suspends all of its active critical
53+
// sections, temporarily releasing the associated locks. When the thread calls
54+
// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent)
55+
// critical section by reacquiring the associated lock or locks. See
56+
// `_PyCriticalSection_Resume()`.
57+
//
58+
// NOTE: Only the top-most critical section is guaranteed to be active.
59+
// Operations that need to lock two objects at once must use
60+
// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections
61+
// to lock more than one object at once, because the inner critical section
62+
// may suspend the outer critical sections. This API does not provide a way
63+
// to lock more than two objects at once (though it could be added later
64+
// if actually needed).
65+
//
66+
// NOTE: Critical sections implicitly behave like reentrant locks because
67+
// attempting to acquire the same lock will suspend any outer (earlier)
68+
// critical sections. However, they are less efficient for this use case than
69+
// purposefully designed reentrant locks.
70+
//
71+
// Example usage:
72+
// Py_BEGIN_CRITICAL_SECTION(op);
73+
// ...
74+
// Py_END_CRITICAL_SECTION();
75+
//
76+
// To lock two objects at once:
77+
// Py_BEGIN_CRITICAL_SECTION2(op1, op2);
78+
// ...
79+
// Py_END_CRITICAL_SECTION2();
80+
81+
82+
// Tagged pointers to critical sections use the two least significant bits to
83+
// mark if the pointed-to critical section is inactive and whether it is a
84+
// _PyCriticalSection2 object.
85+
#define _Py_CRITICAL_SECTION_INACTIVE 0x1
86+
#define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2
87+
#define _Py_CRITICAL_SECTION_MASK 0x3
88+
89+
#ifdef Py_NOGIL
90+
# define Py_BEGIN_CRITICAL_SECTION(op) \
91+
{ \
92+
_PyCriticalSection _cs; \
93+
_PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex)
94+
95+
# define Py_END_CRITICAL_SECTION() \
96+
_PyCriticalSection_End(&_cs); \
97+
}
98+
99+
# define Py_BEGIN_CRITICAL_SECTION2(a, b) \
100+
{ \
101+
_PyCriticalSection2 _cs2; \
102+
_PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex)
103+
104+
# define Py_END_CRITICAL_SECTION2() \
105+
_PyCriticalSection2_End(&_cs2); \
106+
}
107+
#else /* !Py_NOGIL */
108+
// The critical section APIs are no-ops with the GIL.
109+
# define Py_BEGIN_CRITICAL_SECTION(op)
110+
# define Py_END_CRITICAL_SECTION()
111+
# define Py_BEGIN_CRITICAL_SECTION2(a, b)
112+
# define Py_END_CRITICAL_SECTION2()
113+
#endif /* !Py_NOGIL */
114+
115+
typedef struct {
116+
// Tagged pointer to an outer active critical section (or 0).
117+
// The two least-significant-bits indicate whether the pointed-to critical
118+
// section is inactive and whether it is a _PyCriticalSection2 object.
119+
uintptr_t prev;
120+
121+
// Mutex used to protect critical section
122+
PyMutex *mutex;
123+
} _PyCriticalSection;
124+
125+
// A critical section protected by two mutexes. Use
126+
// _PyCriticalSection2_Begin and _PyCriticalSection2_End.
127+
typedef struct {
128+
_PyCriticalSection base;
129+
130+
PyMutex *mutex2;
131+
} _PyCriticalSection2;
132+
133+
static inline int
134+
_PyCriticalSection_IsActive(uintptr_t tag)
135+
{
136+
return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0;
137+
}
138+
139+
// Resumes the top-most critical section.
140+
PyAPI_FUNC(void)
141+
_PyCriticalSection_Resume(PyThreadState *tstate);
142+
143+
// (private) slow path for locking the mutex
144+
PyAPI_FUNC(void)
145+
_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m);
146+
147+
PyAPI_FUNC(void)
148+
_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
149+
int is_m1_locked);
150+
151+
static inline void
152+
_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m)
153+
{
154+
if (PyMutex_LockFast(&m->v)) {
155+
PyThreadState *tstate = _PyThreadState_GET();
156+
c->mutex = m;
157+
c->prev = tstate->critical_section;
158+
tstate->critical_section = (uintptr_t)c;
159+
}
160+
else {
161+
_PyCriticalSection_BeginSlow(c, m);
162+
}
163+
}
164+
165+
// Removes the top-most critical section from the thread's stack of critical
166+
// sections. If the new top-most critical section is inactive, then it is
167+
// resumed.
168+
static inline void
169+
_PyCriticalSection_Pop(_PyCriticalSection *c)
170+
{
171+
PyThreadState *tstate = _PyThreadState_GET();
172+
uintptr_t prev = c->prev;
173+
tstate->critical_section = prev;
174+
175+
if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) {
176+
_PyCriticalSection_Resume(tstate);
177+
}
178+
}
179+
180+
static inline void
181+
_PyCriticalSection_End(_PyCriticalSection *c)
182+
{
183+
PyMutex_Unlock(c->mutex);
184+
_PyCriticalSection_Pop(c);
185+
}
186+
187+
static inline void
188+
_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
189+
{
190+
if (m1 == m2) {
191+
// If the two mutex arguments are the same, treat this as a critical
192+
// section with a single mutex.
193+
c->mutex2 = NULL;
194+
_PyCriticalSection_Begin(&c->base, m1);
195+
return;
196+
}
197+
198+
if ((uintptr_t)m2 < (uintptr_t)m1) {
199+
// Sort the mutexes so that the lower address is locked first.
200+
// The exact order does not matter, but we need to acquire the mutexes
201+
// in a consistent order to avoid lock ordering deadlocks.
202+
PyMutex *tmp = m1;
203+
m1 = m2;
204+
m2 = tmp;
205+
}
206+
207+
if (PyMutex_LockFast(&m1->v)) {
208+
if (PyMutex_LockFast(&m2->v)) {
209+
PyThreadState *tstate = _PyThreadState_GET();
210+
c->base.mutex = m1;
211+
c->mutex2 = m2;
212+
c->base.prev = tstate->critical_section;
213+
214+
uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES;
215+
tstate->critical_section = p;
216+
}
217+
else {
218+
_PyCriticalSection2_BeginSlow(c, m1, m2, 1);
219+
}
220+
}
221+
else {
222+
_PyCriticalSection2_BeginSlow(c, m1, m2, 0);
223+
}
224+
}
225+
226+
static inline void
227+
_PyCriticalSection2_End(_PyCriticalSection2 *c)
228+
{
229+
if (c->mutex2) {
230+
PyMutex_Unlock(c->mutex2);
231+
}
232+
PyMutex_Unlock(c->base.mutex);
233+
_PyCriticalSection_Pop(&c->base);
234+
}
235+
236+
PyAPI_FUNC(void)
237+
_PyCriticalSection_SuspendAll(PyThreadState *tstate);
238+
239+
#ifdef __cplusplus
240+
}
241+
#endif
242+
#endif /* !Py_INTERNAL_CRITICAL_SECTION_H */

Include/internal/pycore_lock.h

+17-3
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,16 @@ extern "C" {
3232
// PyMutex_Lock(&m);
3333
// ...
3434
// PyMutex_Unlock(&m);
35-
typedef struct _PyMutex {
36-
uint8_t v;
37-
} PyMutex;
35+
36+
// NOTE: In Py_NOGIL builds, `struct _PyMutex` is defined in Include/object.h.
37+
// The Py_NOGIL builds need the definition in Include/object.h for the
38+
// `ob_mutex` field in PyObject. For the default (non-free-threaded) build,
39+
// we define the struct here to avoid exposing it in the public API.
40+
#ifndef Py_NOGIL
41+
struct _PyMutex { uint8_t v; };
42+
#endif
43+
44+
typedef struct _PyMutex PyMutex;
3845

3946
#define _Py_UNLOCKED 0
4047
#define _Py_LOCKED 1
@@ -46,6 +53,13 @@ PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);
4653
// (private) slow path for unlocking the mutex
4754
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);
4855

56+
static inline int
57+
PyMutex_LockFast(uint8_t *lock_bits)
58+
{
59+
uint8_t expected = _Py_UNLOCKED;
60+
return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED);
61+
}
62+
4963
// Locks the mutex.
5064
//
5165
// If the mutex is currently locked, the calling thread will be parked until

Include/object.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ check by comparing the reference count field to the immortality reference count.
119119
{ \
120120
0, \
121121
0, \
122-
0, \
122+
{ 0 }, \
123123
0, \
124124
_Py_IMMORTAL_REFCNT_LOCAL, \
125125
0, \
@@ -204,10 +204,14 @@ struct _object {
204204
// Create a shared field from a refcnt and desired flags
205205
#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags))
206206

207+
// NOTE: In non-free-threaded builds, `struct _PyMutex` is defined in
208+
// pycore_lock.h. See pycore_lock.h for more details.
209+
struct _PyMutex { uint8_t v; };
210+
207211
struct _object {
208212
uintptr_t ob_tid; // thread id (or zero)
209213
uint16_t _padding;
210-
uint8_t ob_mutex; // per-object lock
214+
struct _PyMutex ob_mutex; // per-object lock
211215
uint8_t ob_gc_bits; // gc-related state
212216
uint32_t ob_ref_local; // local reference count
213217
Py_ssize_t ob_ref_shared; // shared (atomic) reference count

Makefile.pre.in

+2
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ PYTHON_OBJS= \
409409
Python/codecs.o \
410410
Python/compile.o \
411411
Python/context.o \
412+
Python/critical_section.o \
412413
Python/crossinterp.o \
413414
Python/dynamic_annotations.o \
414415
Python/errors.o \
@@ -1802,6 +1803,7 @@ PYTHON_HEADERS= \
18021803
$(srcdir)/Include/internal/pycore_complexobject.h \
18031804
$(srcdir)/Include/internal/pycore_condvar.h \
18041805
$(srcdir)/Include/internal/pycore_context.h \
1806+
$(srcdir)/Include/internal/pycore_critical_section.h \
18051807
$(srcdir)/Include/internal/pycore_crossinterp.h \
18061808
$(srcdir)/Include/internal/pycore_dict.h \
18071809
$(srcdir)/Include/internal/pycore_dict_state.h \
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Implement "Python Critical Sections" from :pep:`703`. These are macros to
2+
help replace the GIL with per-object locks in the ``--disable-gil`` build of
3+
CPython. The macros are no-ops in the default build.

Modules/Setup.stdlib.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@
158158
@MODULE_XXSUBTYPE_TRUE@xxsubtype xxsubtype.c
159159
@MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c
160160
@MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
161-
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c
161+
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c
162162
@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/bytearray.c _testcapi/bytes.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/pyos.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/heaptype_relative.c _testcapi/gc.c _testcapi/sys.c
163163
@MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c
164164
@MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c

Modules/_testinternalcapi.c

+3
Original file line numberDiff line numberDiff line change
@@ -1687,6 +1687,9 @@ module_exec(PyObject *module)
16871687
if (_PyTestInternalCapi_Init_Set(module) < 0) {
16881688
return 1;
16891689
}
1690+
if (_PyTestInternalCapi_Init_CriticalSection(module) < 0) {
1691+
return 1;
1692+
}
16901693

16911694
if (PyModule_Add(module, "SIZEOF_PYGC_HEAD",
16921695
PyLong_FromSsize_t(sizeof(PyGC_Head))) < 0) {

Modules/_testinternalcapi/parts.h

+1
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@
1313
int _PyTestInternalCapi_Init_Lock(PyObject *module);
1414
int _PyTestInternalCapi_Init_PyTime(PyObject *module);
1515
int _PyTestInternalCapi_Init_Set(PyObject *module);
16+
int _PyTestInternalCapi_Init_CriticalSection(PyObject *module);
1617

1718
#endif // Py_TESTINTERNALCAPI_PARTS_H

0 commit comments

Comments
 (0)