Skip to content

Commit a6aaa89

Browse files
committed
Implement biased reference counting
1 parent 01d3598 commit a6aaa89

File tree

12 files changed

+433
-8
lines changed

12 files changed

+433
-8
lines changed

Include/internal/pycore_object.h

+80-2
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,19 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *);
5454
Furthermore, we can't use designated initializers in Extensions since these
5555
are not supported pre-C++20. Thus, keeping an internal copy here is the most
5656
backwards compatible solution */
57+
#if defined(Py_NOGIL)
58+
#define _PyObject_HEAD_INIT(type) \
59+
{ \
60+
.ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL, \
61+
.ob_type = (type) \
62+
}
63+
#else
5764
#define _PyObject_HEAD_INIT(type) \
5865
{ \
5966
.ob_refcnt = _Py_IMMORTAL_REFCNT, \
6067
.ob_type = (type) \
6168
}
69+
#endif
6270
#define _PyVarObject_HEAD_INIT(type, size) \
6371
{ \
6472
.ob_base = _PyObject_HEAD_INIT(type), \
@@ -95,24 +103,63 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
95103
#ifdef Py_REF_DEBUG
96104
_Py_AddRefTotal(_PyInterpreterState_GET(), n);
97105
#endif
106+
#if !defined(Py_NOGIL)
98107
op->ob_refcnt += n;
108+
#else
109+
if (_Py_ThreadLocal(op)) {
110+
uint32_t local = op->ob_ref_local;
111+
Py_ssize_t refcnt = (Py_ssize_t)local + n;
112+
# if PY_SSIZE_T_MAX > UINT32_MAX
113+
if (refcnt > (Py_ssize_t)UINT32_MAX) {
114+
// Make the object immortal if the 32-bit local reference count
115+
// would overflow.
116+
refcnt = _Py_IMMORTAL_REFCNT_LOCAL;
117+
}
118+
# endif
119+
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, (uint32_t)refcnt);
120+
}
121+
else {
122+
_Py_atomic_add_ssize(&op->ob_ref_shared, (n << _Py_REF_SHARED_SHIFT));
123+
}
124+
#endif
99125
}
100126
#define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n)
101127

102128
static inline void _Py_SetImmortal(PyObject *op)
103129
{
104130
if (op) {
131+
#ifdef Py_NOGIL
132+
op->ob_tid = 0;
133+
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
134+
op->ob_ref_shared = 0;
135+
#else
105136
op->ob_refcnt = _Py_IMMORTAL_REFCNT;
137+
#endif
106138
}
107139
}
108140
#define _Py_SetImmortal(op) _Py_SetImmortal(_PyObject_CAST(op))
109141

142+
// Makes an immortal object mortal again with the specified refcnt. Should only
143+
// be used during runtime finalization.
144+
static inline void _Py_SetMortal(PyObject *op, Py_ssize_t refcnt)
145+
{
146+
if (op) {
147+
assert(_Py_IsImmortal(op));
148+
#ifdef Py_NOGIL
149+
op->ob_tid = 0;
150+
op->ob_ref_local = 0;
151+
op->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED);
152+
#else
153+
op->ob_refcnt = refcnt;
154+
#endif
155+
}
156+
}
157+
110158
/* _Py_ClearImmortal() should only be used during runtime finalization. */
111159
static inline void _Py_ClearImmortal(PyObject *op)
112160
{
113161
if (op) {
114-
assert(op->ob_refcnt == _Py_IMMORTAL_REFCNT);
115-
op->ob_refcnt = 1;
162+
_Py_SetMortal(op, 1);
116163
Py_DECREF(op);
117164
}
118165
}
@@ -122,6 +169,7 @@ static inline void _Py_ClearImmortal(PyObject *op)
122169
op = NULL; \
123170
} while (0)
124171

172+
#if !defined(Py_NOGIL)
125173
static inline void
126174
_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
127175
{
@@ -161,6 +209,36 @@ _Py_DECREF_NO_DEALLOC(PyObject *op)
161209
#endif
162210
}
163211

212+
#else
213+
static inline void
214+
_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
215+
{
216+
Py_DECREF(op);
217+
}
218+
219+
static inline void
220+
_Py_DECREF_NO_DEALLOC(PyObject *op)
221+
{
222+
Py_DECREF(op);
223+
}
224+
225+
static inline int
226+
_Py_REF_IS_MERGED(Py_ssize_t ob_ref_shared)
227+
{
228+
return (ob_ref_shared & _Py_REF_SHARED_FLAG_MASK) == _Py_REF_MERGED;
229+
}
230+
231+
static inline int
232+
_Py_REF_IS_QUEUED(Py_ssize_t ob_ref_shared)
233+
{
234+
return (ob_ref_shared & _Py_REF_SHARED_FLAG_MASK) == _Py_REF_QUEUED;
235+
}
236+
237+
// Merge the local and shared reference count fields and add `extra` to the
238+
// refcount when merging.
239+
Py_ssize_t _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra);
240+
#endif // !defined(Py_NOGIL)
241+
164242
#ifdef Py_REF_DEBUG
165243
# undef _Py_DEC_REFTOTAL
166244
#endif

Include/object.h

+183-3
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,26 @@ check by comparing the reference count field to the immortality reference count.
106106
#define _Py_IMMORTAL_REFCNT (UINT_MAX >> 2)
107107
#endif
108108

109+
// Py_NOGIL builds indicate immortal objects using `ob_ref_local`, which is
110+
// always 32-bits.
111+
#ifdef Py_NOGIL
112+
#define _Py_IMMORTAL_REFCNT_LOCAL UINT32_MAX
113+
#endif
114+
109115
// Make all internal uses of PyObject_HEAD_INIT immortal while preserving the
110116
// C-API expectation that the refcnt will be set to 1.
111-
#ifdef Py_BUILD_CORE
117+
#if defined(Py_NOGIL)
118+
#define PyObject_HEAD_INIT(type) \
119+
{ \
120+
0, \
121+
0, \
122+
0, \
123+
0, \
124+
_Py_IMMORTAL_REFCNT_LOCAL, \
125+
0, \
126+
(type), \
127+
},
128+
#elif defined(Py_BUILD_CORE)
112129
#define PyObject_HEAD_INIT(type) \
113130
{ \
114131
{ _Py_IMMORTAL_REFCNT }, \
@@ -142,6 +159,7 @@ check by comparing the reference count field to the immortality reference count.
142159
* by hand. Similarly every pointer to a variable-size Python object can,
143160
* in addition, be cast to PyVarObject*.
144161
*/
162+
#ifndef Py_NOGIL
145163
struct _object {
146164
#if (defined(__GNUC__) || defined(__clang__)) \
147165
&& !(defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L)
@@ -166,6 +184,31 @@ struct _object {
166184

167185
PyTypeObject *ob_type;
168186
};
187+
#else
188+
// The shared reference count uses the two least-significant bits to store
189+
// flags. The remaining bits are used to store the reference count.
190+
#define _Py_REF_SHARED_SHIFT 2
191+
#define _Py_REF_SHARED_FLAG_MASK 0x3
192+
193+
// The shared flags are initialized to zero.
194+
#define _Py_REF_SHARED_INIT 0x0
195+
#define _Py_REF_MAYBE_WEAKREF 0x1
196+
#define _Py_REF_QUEUED 0x2
197+
#define _Py_REF_MERGED 0x3
198+
199+
// Create a shared field from a refcnt and desired flags
200+
#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags))
201+
202+
struct _object {
203+
uintptr_t ob_tid; // thread id (or zero)
204+
uint16_t _padding;
205+
uint8_t ob_mutex; // per-object lock
206+
uint8_t ob_gc_bits; // gc-related state
207+
uint32_t ob_ref_local; // local reference count
208+
Py_ssize_t ob_ref_shared; // shared (atomic) reference count
209+
PyTypeObject *ob_type;
210+
};
211+
#endif
169212

170213
/* Cast argument to PyObject* type. */
171214
#define _PyObject_CAST(op) _Py_CAST(PyObject*, (op))
@@ -183,9 +226,56 @@ typedef struct {
183226
PyAPI_FUNC(int) Py_Is(PyObject *x, PyObject *y);
184227
#define Py_Is(x, y) ((x) == (y))
185228

229+
#ifndef Py_LIMITED_API
230+
static inline uintptr_t
231+
_Py_ThreadId(void)
232+
{
233+
uintptr_t tid;
234+
#if defined(_MSC_VER) && defined(_M_X64)
235+
tid = __readgsqword(48);
236+
#elif defined(_MSC_VER) && defined(_M_IX86)
237+
tid = __readfsdword(24);
238+
#elif defined(_MSC_VER) && defined(_M_ARM64)
239+
tid = __getReg(18);
240+
#elif defined(__i386__)
241+
__asm__("movl %%gs:0, %0" : "=r" (tid)); // 32-bit always uses GS
242+
#elif defined(__MACH__) && defined(__x86_64__)
243+
__asm__("movq %%gs:0, %0" : "=r" (tid)); // x86_64 macOSX uses GS
244+
#elif defined(__x86_64__)
245+
__asm__("movq %%fs:0, %0" : "=r" (tid)); // x86_64 Linux, BSD uses FS
246+
#elif defined(__arm__)
247+
__asm__ ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tid));
248+
#elif defined(__aarch64__) && defined(__APPLE__)
249+
__asm__ ("mrs %0, tpidrro_el0" : "=r" (tid));
250+
#elif defined(__aarch64__)
251+
__asm__ ("mrs %0, tpidr_el0" : "=r" (tid));
252+
#else
253+
# error "define _Py_ThreadId for this platform"
254+
#endif
255+
return tid;
256+
}
257+
#endif
258+
259+
#if defined(Py_NOGIL) && !defined(Py_LIMITED_API)
260+
static inline Py_ALWAYS_INLINE int
261+
_Py_ThreadLocal(PyObject *ob)
262+
{
263+
return ob->ob_tid == _Py_ThreadId();
264+
}
265+
#endif
186266

187267
static inline Py_ssize_t Py_REFCNT(PyObject *ob) {
268+
#if !defined(Py_NOGIL)
188269
return ob->ob_refcnt;
270+
#else
271+
uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local);
272+
if (local == _Py_IMMORTAL_REFCNT_LOCAL) {
273+
return _Py_IMMORTAL_REFCNT;
274+
}
275+
Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared);
276+
return _Py_STATIC_CAST(Py_ssize_t, local) +
277+
Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT);
278+
#endif
189279
}
190280
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
191281
# define Py_REFCNT(ob) Py_REFCNT(_PyObject_CAST(ob))
@@ -216,7 +306,9 @@ static inline Py_ssize_t Py_SIZE(PyObject *ob) {
216306

217307
static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op)
218308
{
219-
#if SIZEOF_VOID_P > 4
309+
#if defined(Py_NOGIL)
310+
return op->ob_ref_local == _Py_IMMORTAL_REFCNT_LOCAL;
311+
#elif SIZEOF_VOID_P > 4
220312
return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0;
221313
#else
222314
return op->ob_refcnt == _Py_IMMORTAL_REFCNT;
@@ -240,7 +332,24 @@ static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) {
240332
if (_Py_IsImmortal(ob)) {
241333
return;
242334
}
335+
#if !defined(Py_NOGIL)
243336
ob->ob_refcnt = refcnt;
337+
#else
338+
if (_Py_ThreadLocal(ob)) {
339+
// Set local refcount to desired refcount and shared refcount to zero,
340+
// but preserve the shared refcount flags.
341+
assert(refcnt < UINT32_MAX);
342+
ob->ob_ref_local = _Py_STATIC_CAST(uint32_t, refcnt);
343+
ob->ob_ref_shared &= _Py_REF_SHARED_FLAG_MASK;
344+
}
345+
else {
346+
// Set local refcount to zero and shared refcount to desired refcount.
347+
// Mark the object as merged.
348+
ob->ob_tid = 0;
349+
ob->ob_ref_local = 0;
350+
ob->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED);
351+
}
352+
#endif
244353
}
245354
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
246355
# define Py_SET_REFCNT(ob, refcnt) Py_SET_REFCNT(_PyObject_CAST(ob), (refcnt))
@@ -618,7 +727,19 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op)
618727
#else
619728
// Non-limited C API and limited C API for Python 3.9 and older access
620729
// directly PyObject.ob_refcnt.
621-
#if SIZEOF_VOID_P > 4
730+
#if defined(Py_NOGIL)
731+
uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local);
732+
uint32_t new_local = local + 1;
733+
if (new_local == 0) {
734+
return;
735+
}
736+
if (_Py_ThreadLocal(op)) {
737+
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, new_local);
738+
}
739+
else {
740+
_Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT));
741+
}
742+
#elif SIZEOF_VOID_P > 4
622743
// Portable saturated add, branching on the carry flag and set low bits
623744
PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN];
624745
PY_UINT32_T new_refcnt = cur_refcnt + 1;
@@ -643,6 +764,19 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op)
643764
# define Py_INCREF(op) Py_INCREF(_PyObject_CAST(op))
644765
#endif
645766

767+
768+
#if !defined(Py_LIMITED_API) && defined(Py_NOGIL)
769+
// Implements Py_DECREF on objects not owned by the current thread.
770+
PyAPI_FUNC(void) _Py_DecRefShared(PyObject *);
771+
PyAPI_FUNC(void) _Py_DecRefSharedDebug(PyObject *, const char *, int);
772+
773+
// Called from Py_DECREF by the owning thread when the local refcount reaches
774+
// zero. The call will deallocate the object if the shared refcount is also
775+
// zero. Otherwise, the thread gives up ownership and merges the reference
776+
// count fields.
777+
PyAPI_FUNC(void) _Py_MergeZeroRefcount(PyObject *);
778+
#endif
779+
646780
#if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG))
647781
// Stable ABI implements Py_DECREF() as a function call on limited C API
648782
// version 3.12 and newer, and on Python built in debug mode. _Py_DecRef() was
@@ -657,6 +791,52 @@ static inline void Py_DECREF(PyObject *op) {
657791
}
658792
#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op))
659793

794+
#elif defined(Py_NOGIL) && defined(Py_REF_DEBUG)
795+
static inline void Py_DECREF(const char *filename, int lineno, PyObject *op)
796+
{
797+
uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local);
798+
if (local == _Py_IMMORTAL_REFCNT_LOCAL) {
799+
return;
800+
}
801+
_Py_DECREF_STAT_INC();
802+
_Py_DECREF_DecRefTotal();
803+
if (_Py_ThreadLocal(op)) {
804+
if (local == 0) {
805+
_Py_NegativeRefcount(filename, lineno, op);
806+
}
807+
local--;
808+
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local);
809+
if (local == 0) {
810+
_Py_MergeZeroRefcount(op);
811+
}
812+
}
813+
else {
814+
_Py_DecRefSharedDebug(op, filename, lineno);
815+
}
816+
}
817+
#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op))
818+
819+
#elif defined(Py_NOGIL)
820+
static inline void Py_DECREF(PyObject *op)
821+
{
822+
uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local);
823+
if (local == _Py_IMMORTAL_REFCNT_LOCAL) {
824+
return;
825+
}
826+
_Py_DECREF_STAT_INC();
827+
if (_Py_ThreadLocal(op)) {
828+
local--;
829+
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local);
830+
if (local == 0) {
831+
_Py_MergeZeroRefcount(op);
832+
}
833+
}
834+
else {
835+
_Py_DecRefShared(op);
836+
}
837+
}
838+
#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op))
839+
660840
#elif defined(Py_REF_DEBUG)
661841
static inline void Py_DECREF(const char *filename, int lineno, PyObject *op)
662842
{

Lib/test/support/__init__.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,10 @@ def check_cflags_pgo():
799799
return any(option in cflags_nodist for option in pgo_options)
800800

801801

802-
_header = 'nP'
802+
if sysconfig.get_config_var('Py_NOGIL'):
803+
_header = 'PHBBInP'
804+
else:
805+
_header = 'nP'
803806
_align = '0n'
804807
_vheader = _header + 'n'
805808

0 commit comments

Comments
 (0)