|
| 1 | +#ifndef Py_INTERNAL_CRITICAL_SECTION_H |
| 2 | +#define Py_INTERNAL_CRITICAL_SECTION_H |
| 3 | + |
| 4 | +#ifndef Py_BUILD_CORE |
| 5 | +# error "this header requires Py_BUILD_CORE define" |
| 6 | +#endif |
| 7 | + |
| 8 | +#include "pycore_lock.h" // PyMutex |
| 9 | +#include "pycore_pystate.h" // _PyThreadState_GET() |
| 10 | +#include <stdint.h> |
| 11 | + |
| 12 | +#ifdef __cplusplus |
| 13 | +extern "C" { |
| 14 | +#endif |
| 15 | + |
| 16 | +// Implementation of Python critical sections |
| 17 | +// |
| 18 | +// Conceptually, critical sections are a deadlock avoidance layer on top of |
| 19 | +// per-object locks. These helpers, in combination with those locks, replace |
| 20 | +// our usage of the global interpreter lock to provide thread-safety for |
| 21 | +// otherwise thread-unsafe objects, such as dict. |
| 22 | +// |
| 23 | +// NOTE: These APIs are no-ops in non-free-threaded builds. |
| 24 | +// |
| 25 | +// Straightforward per-object locking could introduce deadlocks that were not |
| 26 | +// present when running with the GIL. Threads may hold locks for multiple |
| 27 | +// objects simultaneously because Python operations can nest. If threads were |
| 28 | +// to acquire the same locks in different orders, they would deadlock. |
| 29 | +// |
| 30 | +// One way to avoid deadlocks is to allow threads to hold only the lock (or |
| 31 | +// locks) for a single operation at a time (typically a single lock, but some |
| 32 | +// operations involve two locks). When a thread begins a nested operation it |
| 33 | +// could suspend the locks for any outer operation: before beginning the nested |
| 34 | +// operation, the locks for the outer operation are released and when the |
| 35 | +// nested operation completes, the locks for the outer operation are |
| 36 | +// reacquired. |
| 37 | +// |
| 38 | +// To improve performance, this API uses a variation of the above scheme. |
| 39 | +// Instead of immediately suspending locks any time a nested operation begins, |
| 40 | +// locks are only suspended if the thread would block. This reduces the number |
| 41 | +// of lock acquisitions and releases for nested operations, while still |
| 42 | +// avoiding deadlocks. |
| 43 | +// |
| 44 | +// Additionally, the locks for any active operation are suspended around |
| 45 | +// other potentially blocking operations, such as I/O. This is because the |
| 46 | +// interaction between locks and blocking operations can lead to deadlocks in |
| 47 | +// the same way as the interaction between multiple locks. |
| 48 | +// |
| 49 | +// Each thread's critical sections and their corresponding locks are tracked in |
| 50 | +// a stack in `PyThreadState.critical_section`. When a thread calls |
| 51 | +// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when |
| 52 | +// waiting to acquire a lock, the thread suspends all of its active critical |
| 53 | +// sections, temporarily releasing the associated locks. When the thread calls |
| 54 | +// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) |
| 55 | +// critical section by reacquiring the associated lock or locks. See |
| 56 | +// `_PyCriticalSection_Resume()`. |
| 57 | +// |
| 58 | +// NOTE: Only the top-most critical section is guaranteed to be active. |
| 59 | +// Operations that need to lock two objects at once must use |
| 60 | +// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections |
| 61 | +// to lock more than one object at once, because the inner critical section |
| 62 | +// may suspend the outer critical sections. This API does not provide a way |
| 63 | +// to lock more than two objects at once (though it could be added later |
| 64 | +// if actually needed). |
| 65 | +// |
| 66 | +// NOTE: Critical sections implicitly behave like reentrant locks because |
| 67 | +// attempting to acquire the same lock will suspend any outer (earlier) |
| 68 | +// critical sections. However, they are less efficient for this use case than |
| 69 | +// purposefully designed reentrant locks. |
| 70 | +// |
| 71 | +// Example usage: |
| 72 | +// Py_BEGIN_CRITICAL_SECTION(op); |
| 73 | +// ... |
| 74 | +// Py_END_CRITICAL_SECTION(); |
| 75 | +// |
| 76 | +// To lock two objects at once: |
| 77 | +// Py_BEGIN_CRITICAL_SECTION2(op1, op2); |
| 78 | +// ... |
| 79 | +// Py_END_CRITICAL_SECTION2(); |
| 80 | + |
| 81 | + |
| 82 | +// Tagged pointers to critical sections use the two least significant bits to |
| 83 | +// mark if the pointed-to critical section is inactive and whether it is a |
| 84 | +// _PyCriticalSection2 object. |
| 85 | +#define _Py_CRITICAL_SECTION_INACTIVE 0x1 |
| 86 | +#define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2 |
| 87 | +#define _Py_CRITICAL_SECTION_MASK 0x3 |
| 88 | + |
| 89 | +#ifdef Py_NOGIL |
| 90 | +# define Py_BEGIN_CRITICAL_SECTION(op) \ |
| 91 | + { \ |
| 92 | + _PyCriticalSection _cs; \ |
| 93 | + _PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex) |
| 94 | + |
| 95 | +# define Py_END_CRITICAL_SECTION() \ |
| 96 | + _PyCriticalSection_End(&_cs); \ |
| 97 | + } |
| 98 | + |
| 99 | +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ |
| 100 | + { \ |
| 101 | + _PyCriticalSection2 _cs2; \ |
| 102 | + _PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex) |
| 103 | + |
| 104 | +# define Py_END_CRITICAL_SECTION2() \ |
| 105 | + _PyCriticalSection2_End(&_cs2); \ |
| 106 | + } |
| 107 | +#else /* !Py_NOGIL */ |
| 108 | +// The critical section APIs are no-ops with the GIL. |
| 109 | +# define Py_BEGIN_CRITICAL_SECTION(op) |
| 110 | +# define Py_END_CRITICAL_SECTION() |
| 111 | +# define Py_BEGIN_CRITICAL_SECTION2(a, b) |
| 112 | +# define Py_END_CRITICAL_SECTION2() |
| 113 | +#endif /* !Py_NOGIL */ |
| 114 | + |
| 115 | +typedef struct { |
| 116 | + // Tagged pointer to an outer active critical section (or 0). |
| 117 | + // The two least-significant-bits indicate whether the pointed-to critical |
| 118 | + // section is inactive and whether it is a _PyCriticalSection2 object. |
| 119 | + uintptr_t prev; |
| 120 | + |
| 121 | + // Mutex used to protect critical section |
| 122 | + PyMutex *mutex; |
| 123 | +} _PyCriticalSection; |
| 124 | + |
| 125 | +// A critical section protected by two mutexes. Use |
| 126 | +// _PyCriticalSection2_Begin and _PyCriticalSection2_End. |
| 127 | +typedef struct { |
| 128 | + _PyCriticalSection base; |
| 129 | + |
| 130 | + PyMutex *mutex2; |
| 131 | +} _PyCriticalSection2; |
| 132 | + |
| 133 | +static inline int |
| 134 | +_PyCriticalSection_IsActive(uintptr_t tag) |
| 135 | +{ |
| 136 | + return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0; |
| 137 | +} |
| 138 | + |
| 139 | +// Resumes the top-most critical section. |
| 140 | +PyAPI_FUNC(void) |
| 141 | +_PyCriticalSection_Resume(PyThreadState *tstate); |
| 142 | + |
| 143 | +// (private) slow path for locking the mutex |
| 144 | +PyAPI_FUNC(void) |
| 145 | +_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m); |
| 146 | + |
| 147 | +PyAPI_FUNC(void) |
| 148 | +_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, |
| 149 | + int is_m1_locked); |
| 150 | + |
| 151 | +static inline void |
| 152 | +_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) |
| 153 | +{ |
| 154 | + if (PyMutex_LockFast(&m->v)) { |
| 155 | + PyThreadState *tstate = _PyThreadState_GET(); |
| 156 | + c->mutex = m; |
| 157 | + c->prev = tstate->critical_section; |
| 158 | + tstate->critical_section = (uintptr_t)c; |
| 159 | + } |
| 160 | + else { |
| 161 | + _PyCriticalSection_BeginSlow(c, m); |
| 162 | + } |
| 163 | +} |
| 164 | + |
| 165 | +// Removes the top-most critical section from the thread's stack of critical |
| 166 | +// sections. If the new top-most critical section is inactive, then it is |
| 167 | +// resumed. |
| 168 | +static inline void |
| 169 | +_PyCriticalSection_Pop(_PyCriticalSection *c) |
| 170 | +{ |
| 171 | + PyThreadState *tstate = _PyThreadState_GET(); |
| 172 | + uintptr_t prev = c->prev; |
| 173 | + tstate->critical_section = prev; |
| 174 | + |
| 175 | + if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) { |
| 176 | + _PyCriticalSection_Resume(tstate); |
| 177 | + } |
| 178 | +} |
| 179 | + |
| 180 | +static inline void |
| 181 | +_PyCriticalSection_End(_PyCriticalSection *c) |
| 182 | +{ |
| 183 | + PyMutex_Unlock(c->mutex); |
| 184 | + _PyCriticalSection_Pop(c); |
| 185 | +} |
| 186 | + |
| 187 | +static inline void |
| 188 | +_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) |
| 189 | +{ |
| 190 | + if (m1 == m2) { |
| 191 | + // If the two mutex arguments are the same, treat this as a critical |
| 192 | + // section with a single mutex. |
| 193 | + c->mutex2 = NULL; |
| 194 | + _PyCriticalSection_Begin(&c->base, m1); |
| 195 | + return; |
| 196 | + } |
| 197 | + |
| 198 | + if ((uintptr_t)m2 < (uintptr_t)m1) { |
| 199 | + // Sort the mutexes so that the lower address is locked first. |
| 200 | + // The exact order does not matter, but we need to acquire the mutexes |
| 201 | + // in a consistent order to avoid lock ordering deadlocks. |
| 202 | + PyMutex *tmp = m1; |
| 203 | + m1 = m2; |
| 204 | + m2 = tmp; |
| 205 | + } |
| 206 | + |
| 207 | + if (PyMutex_LockFast(&m1->v)) { |
| 208 | + if (PyMutex_LockFast(&m2->v)) { |
| 209 | + PyThreadState *tstate = _PyThreadState_GET(); |
| 210 | + c->base.mutex = m1; |
| 211 | + c->mutex2 = m2; |
| 212 | + c->base.prev = tstate->critical_section; |
| 213 | + |
| 214 | + uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; |
| 215 | + tstate->critical_section = p; |
| 216 | + } |
| 217 | + else { |
| 218 | + _PyCriticalSection2_BeginSlow(c, m1, m2, 1); |
| 219 | + } |
| 220 | + } |
| 221 | + else { |
| 222 | + _PyCriticalSection2_BeginSlow(c, m1, m2, 0); |
| 223 | + } |
| 224 | +} |
| 225 | + |
| 226 | +static inline void |
| 227 | +_PyCriticalSection2_End(_PyCriticalSection2 *c) |
| 228 | +{ |
| 229 | + if (c->mutex2) { |
| 230 | + PyMutex_Unlock(c->mutex2); |
| 231 | + } |
| 232 | + PyMutex_Unlock(c->base.mutex); |
| 233 | + _PyCriticalSection_Pop(&c->base); |
| 234 | +} |
| 235 | + |
| 236 | +PyAPI_FUNC(void) |
| 237 | +_PyCriticalSection_SuspendAll(PyThreadState *tstate); |
| 238 | + |
| 239 | +#ifdef __cplusplus |
| 240 | +} |
| 241 | +#endif |
| 242 | +#endif /* !Py_INTERNAL_CRITICAL_SECTION_H */ |
0 commit comments