@@ -106,9 +106,26 @@ check by comparing the reference count field to the immortality reference count.
106
106
#define _Py_IMMORTAL_REFCNT (UINT_MAX >> 2)
107
107
#endif
108
108
109
+ // Py_NOGIL builds indicate immortal objects using `ob_ref_local`, which is
110
+ // always 32-bits.
111
+ #ifdef Py_NOGIL
112
+ #define _Py_IMMORTAL_REFCNT_LOCAL UINT32_MAX
113
+ #endif
114
+
109
115
// Make all internal uses of PyObject_HEAD_INIT immortal while preserving the
110
116
// C-API expectation that the refcnt will be set to 1.
111
- #ifdef Py_BUILD_CORE
117
+ #if defined(Py_NOGIL )
118
+ #define PyObject_HEAD_INIT (type ) \
119
+ { \
120
+ 0, \
121
+ 0, \
122
+ 0, \
123
+ 0, \
124
+ _Py_IMMORTAL_REFCNT_LOCAL, \
125
+ 0, \
126
+ (type), \
127
+ },
128
+ #elif defined(Py_BUILD_CORE )
112
129
#define PyObject_HEAD_INIT (type ) \
113
130
{ \
114
131
{ _Py_IMMORTAL_REFCNT }, \
@@ -142,6 +159,7 @@ check by comparing the reference count field to the immortality reference count.
142
159
* by hand. Similarly every pointer to a variable-size Python object can,
143
160
* in addition, be cast to PyVarObject*.
144
161
*/
162
+ #ifndef Py_NOGIL
145
163
struct _object {
146
164
#if (defined(__GNUC__ ) || defined(__clang__ )) \
147
165
&& !(defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L )
@@ -166,6 +184,31 @@ struct _object {
166
184
167
185
PyTypeObject * ob_type ;
168
186
};
187
+ #else
188
+ // The shared reference count uses the two least-significant bits to store
189
+ // flags. The remaining bits are used to store the reference count.
190
+ #define _Py_REF_SHARED_SHIFT 2
191
+ #define _Py_REF_SHARED_FLAG_MASK 0x3
192
+
193
+ // The shared flags are initialized to zero.
194
+ #define _Py_REF_SHARED_INIT 0x0
195
+ #define _Py_REF_MAYBE_WEAKREF 0x1
196
+ #define _Py_REF_QUEUED 0x2
197
+ #define _Py_REF_MERGED 0x3
198
+
199
+ // Create a shared field from a refcnt and desired flags
200
+ #define _Py_REF_SHARED (refcnt , flags ) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags))
201
+
202
+ struct _object {
203
+ uintptr_t ob_tid ; // thread id (or zero)
204
+ uint16_t _padding ;
205
+ uint8_t ob_mutex ; // per-object lock
206
+ uint8_t ob_gc_bits ; // gc-related state
207
+ uint32_t ob_ref_local ; // local reference count
208
+ Py_ssize_t ob_ref_shared ; // shared (atomic) reference count
209
+ PyTypeObject * ob_type ;
210
+ };
211
+ #endif
169
212
170
213
/* Cast argument to PyObject* type. */
171
214
#define _PyObject_CAST (op ) _Py_CAST(PyObject*, (op))
@@ -183,9 +226,56 @@ typedef struct {
183
226
PyAPI_FUNC (int ) Py_Is (PyObject * x , PyObject * y );
184
227
#define Py_Is (x , y ) ((x) == (y))
185
228
229
+ #ifndef Py_LIMITED_API
230
+ static inline uintptr_t
231
+ _Py_ThreadId (void )
232
+ {
233
+ uintptr_t tid ;
234
+ #if defined(_MSC_VER ) && defined(_M_X64 )
235
+ tid = __readgsqword (48 );
236
+ #elif defined(_MSC_VER ) && defined(_M_IX86 )
237
+ tid = __readfsdword (24 );
238
+ #elif defined(_MSC_VER ) && defined(_M_ARM64 )
239
+ tid = __getReg (18 );
240
+ #elif defined(__i386__ )
241
+ __asm__("movl %%gs:0, %0" : "=r" (tid )); // 32-bit always uses GS
242
+ #elif defined(__MACH__ ) && defined(__x86_64__ )
243
+ __asm__("movq %%gs:0, %0" : "=r" (tid )); // x86_64 macOSX uses GS
244
+ #elif defined(__x86_64__ )
245
+ __asm__("movq %%fs:0, %0" : "=r" (tid )); // x86_64 Linux, BSD uses FS
246
+ #elif defined(__arm__ )
247
+ __asm__ ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tid ));
248
+ #elif defined(__aarch64__ ) && defined(__APPLE__ )
249
+ __asm__ ("mrs %0, tpidrro_el0" : "=r" (tid ));
250
+ #elif defined(__aarch64__ )
251
+ __asm__ ("mrs %0, tpidr_el0" : "=r" (tid ));
252
+ #else
253
+ # error "define _Py_ThreadId for this platform"
254
+ #endif
255
+ return tid ;
256
+ }
257
+ #endif
258
+
259
+ #if defined(Py_NOGIL ) && !defined(Py_LIMITED_API )
260
+ static inline Py_ALWAYS_INLINE int
261
+ _Py_ThreadLocal (PyObject * ob )
262
+ {
263
+ return ob -> ob_tid == _Py_ThreadId ();
264
+ }
265
+ #endif
186
266
187
267
static inline Py_ssize_t Py_REFCNT (PyObject * ob ) {
268
+ #if !defined(Py_NOGIL )
188
269
return ob -> ob_refcnt ;
270
+ #else
271
+ uint32_t local = _Py_atomic_load_uint32_relaxed (& ob -> ob_ref_local );
272
+ if (local == _Py_IMMORTAL_REFCNT_LOCAL ) {
273
+ return _Py_IMMORTAL_REFCNT ;
274
+ }
275
+ Py_ssize_t shared = _Py_atomic_load_ssize_relaxed (& ob -> ob_ref_shared );
276
+ return _Py_STATIC_CAST (Py_ssize_t , local ) +
277
+ Py_ARITHMETIC_RIGHT_SHIFT (Py_ssize_t , shared , _Py_REF_SHARED_SHIFT );
278
+ #endif
189
279
}
190
280
#if !defined(Py_LIMITED_API ) || Py_LIMITED_API + 0 < 0x030b0000
191
281
# define Py_REFCNT (ob ) Py_REFCNT(_PyObject_CAST(ob))
@@ -216,7 +306,9 @@ static inline Py_ssize_t Py_SIZE(PyObject *ob) {
216
306
217
307
static inline Py_ALWAYS_INLINE int _Py_IsImmortal (PyObject * op )
218
308
{
219
- #if SIZEOF_VOID_P > 4
309
+ #if defined(Py_NOGIL )
310
+ return op -> ob_ref_local == _Py_IMMORTAL_REFCNT_LOCAL ;
311
+ #elif SIZEOF_VOID_P > 4
220
312
return _Py_CAST (PY_INT32_T , op -> ob_refcnt ) < 0 ;
221
313
#else
222
314
return op -> ob_refcnt == _Py_IMMORTAL_REFCNT ;
@@ -240,7 +332,24 @@ static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) {
240
332
if (_Py_IsImmortal (ob )) {
241
333
return ;
242
334
}
335
+ #if !defined(Py_NOGIL )
243
336
ob -> ob_refcnt = refcnt ;
337
+ #else
338
+ if (_Py_ThreadLocal (ob )) {
339
+ // Set local refcount to desired refcount and shared refcount to zero,
340
+ // but preserve the shared refcount flags.
341
+ assert (refcnt < UINT32_MAX );
342
+ ob -> ob_ref_local = _Py_STATIC_CAST (uint32_t , refcnt );
343
+ ob -> ob_ref_shared &= _Py_REF_SHARED_FLAG_MASK ;
344
+ }
345
+ else {
346
+ // Set local refcount to zero and shared refcount to desired refcount.
347
+ // Mark the object as merged.
348
+ ob -> ob_tid = 0 ;
349
+ ob -> ob_ref_local = 0 ;
350
+ ob -> ob_ref_shared = _Py_REF_SHARED (refcnt , _Py_REF_MERGED );
351
+ }
352
+ #endif
244
353
}
245
354
#if !defined(Py_LIMITED_API ) || Py_LIMITED_API + 0 < 0x030b0000
246
355
# define Py_SET_REFCNT (ob , refcnt ) Py_SET_REFCNT(_PyObject_CAST(ob), (refcnt))
@@ -618,7 +727,19 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op)
618
727
#else
619
728
// Non-limited C API and limited C API for Python 3.9 and older access
620
729
// directly PyObject.ob_refcnt.
621
- #if SIZEOF_VOID_P > 4
730
+ #if defined(Py_NOGIL )
731
+ uint32_t local = _Py_atomic_load_uint32_relaxed (& op -> ob_ref_local );
732
+ uint32_t new_local = local + 1 ;
733
+ if (new_local == 0 ) {
734
+ return ;
735
+ }
736
+ if (_Py_ThreadLocal (op )) {
737
+ _Py_atomic_store_uint32_relaxed (& op -> ob_ref_local , new_local );
738
+ }
739
+ else {
740
+ _Py_atomic_add_ssize (& op -> ob_ref_shared , (1 << _Py_REF_SHARED_SHIFT ));
741
+ }
742
+ #elif SIZEOF_VOID_P > 4
622
743
// Portable saturated add, branching on the carry flag and set low bits
623
744
PY_UINT32_T cur_refcnt = op -> ob_refcnt_split [PY_BIG_ENDIAN ];
624
745
PY_UINT32_T new_refcnt = cur_refcnt + 1 ;
@@ -643,6 +764,19 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op)
643
764
# define Py_INCREF (op ) Py_INCREF(_PyObject_CAST(op))
644
765
#endif
645
766
767
+
768
+ #if !defined(Py_LIMITED_API ) && defined(Py_NOGIL )
769
+ // Implements Py_DECREF on objects not owned by the current thread.
770
+ PyAPI_FUNC (void ) _Py_DecRefShared (PyObject * );
771
+ PyAPI_FUNC (void ) _Py_DecRefSharedDebug (PyObject * , const char * , int );
772
+
773
+ // Called from Py_DECREF by the owning thread when the local refcount reaches
774
+ // zero. The call will deallocate the object if the shared refcount is also
775
+ // zero. Otherwise, the thread gives up ownership and merges the reference
776
+ // count fields.
777
+ PyAPI_FUNC (void ) _Py_MergeZeroRefcount (PyObject * );
778
+ #endif
779
+
646
780
#if defined(Py_LIMITED_API ) && (Py_LIMITED_API + 0 >= 0x030c0000 || defined(Py_REF_DEBUG ))
647
781
// Stable ABI implements Py_DECREF() as a function call on limited C API
648
782
// version 3.12 and newer, and on Python built in debug mode. _Py_DecRef() was
@@ -657,6 +791,52 @@ static inline void Py_DECREF(PyObject *op) {
657
791
}
658
792
#define Py_DECREF (op ) Py_DECREF(_PyObject_CAST(op))
659
793
794
+ #elif defined(Py_NOGIL ) && defined(Py_REF_DEBUG )
795
+ static inline void Py_DECREF (const char * filename , int lineno , PyObject * op )
796
+ {
797
+ uint32_t local = _Py_atomic_load_uint32_relaxed (& op -> ob_ref_local );
798
+ if (local == _Py_IMMORTAL_REFCNT_LOCAL ) {
799
+ return ;
800
+ }
801
+ _Py_DECREF_STAT_INC ();
802
+ _Py_DECREF_DecRefTotal ();
803
+ if (_Py_ThreadLocal (op )) {
804
+ if (local == 0 ) {
805
+ _Py_NegativeRefcount (filename , lineno , op );
806
+ }
807
+ local -- ;
808
+ _Py_atomic_store_uint32_relaxed (& op -> ob_ref_local , local );
809
+ if (local == 0 ) {
810
+ _Py_MergeZeroRefcount (op );
811
+ }
812
+ }
813
+ else {
814
+ _Py_DecRefSharedDebug (op , filename , lineno );
815
+ }
816
+ }
817
+ #define Py_DECREF (op ) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op))
818
+
819
+ #elif defined(Py_NOGIL )
820
+ static inline void Py_DECREF (PyObject * op )
821
+ {
822
+ uint32_t local = _Py_atomic_load_uint32_relaxed (& op -> ob_ref_local );
823
+ if (local == _Py_IMMORTAL_REFCNT_LOCAL ) {
824
+ return ;
825
+ }
826
+ _Py_DECREF_STAT_INC ();
827
+ if (_Py_ThreadLocal (op )) {
828
+ local -- ;
829
+ _Py_atomic_store_uint32_relaxed (& op -> ob_ref_local , local );
830
+ if (local == 0 ) {
831
+ _Py_MergeZeroRefcount (op );
832
+ }
833
+ }
834
+ else {
835
+ _Py_DecRefShared (op );
836
+ }
837
+ }
838
+ #define Py_DECREF (op ) Py_DECREF(_PyObject_CAST(op))
839
+
660
840
#elif defined(Py_REF_DEBUG )
661
841
static inline void Py_DECREF (const char * filename , int lineno , PyObject * op )
662
842
{
0 commit comments