Skip to content

Commit 2373401

Browse files
committed
gh-110481: Implement inter-thread queue for biased reference counting
1 parent b25b746 commit 2373401

21 files changed

+387
-11
lines changed

Include/internal/pycore_brc.h

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#ifndef Py_INTERNAL_BRC_H
2+
#define Py_INTERNAL_BRC_H
3+
4+
#include <stdint.h>
5+
#include "pycore_llist.h" // struct llist_node
6+
#include "pycore_lock.h" // PyMutex
7+
#include "pycore_object_stack.h" // _PyObjectStack
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
#ifndef Py_BUILD_CORE
14+
# error "this header requires Py_BUILD_CORE define"
15+
#endif
16+
17+
#ifdef Py_GIL_DISABLED
18+
19+
// Prime number to avoid correlations with memory addresses.
20+
#define _Py_BRC_NUM_BUCKETS 257
21+
22+
// Hash table bucket
23+
struct _brc_bucket {
24+
// Mutex protects both the bucket and thread queues in this bucket.
25+
PyMutex mutex;
26+
27+
// Linked list of _PyThreadStateImpl objects hashed to this bucket.
28+
struct llist_node root;
29+
};
30+
31+
// Per-interpreter biased reference counting state
32+
struct _brc_state {
33+
// Hash table of thread states by thread-id. Threads within a bucket are
34+
// chained using a doubly-linked list.
35+
struct _brc_bucket table[_Py_BRC_NUM_BUCKETS];
36+
};
37+
38+
// Per-thread biased reference counting state
39+
struct _brc_thread_state {
40+
// Linked-list of thread states per hash bucket
41+
struct llist_node bucket_node;
42+
43+
// Thread-id as determined by _PyThread_Id()
44+
uintptr_t tid;
45+
46+
// Objects with refcounts to be merged (protected by bucket mutex)
47+
_PyObjectStack objects_to_merge;
48+
49+
// Local stack of objects to be merged (not accessed by other threads)
50+
_PyObjectStack local_objects_to_merge;
51+
};
52+
53+
// Initialize/finalize the per-thread biased reference counting state
54+
void _Py_brc_init_thread(PyThreadState *tstate);
55+
void _Py_brc_remove_thread(PyThreadState *tstate);
56+
57+
// Initialize per-interpreter state
58+
void _Py_brc_init_state(PyInterpreterState *interp);
59+
60+
void _Py_brc_after_fork(PyInterpreterState *interp);
61+
62+
// Enqueues an object to be merged by it's owning thread (tid). This
63+
// steals a reference to the object.
64+
void _Py_brc_queue_object(PyObject *ob);
65+
66+
// Merge the refcounts of queued objects for the current thread.
67+
void _Py_brc_merge_refcounts(PyThreadState *tstate);
68+
69+
#endif /* Py_GIL_DISABLED */
70+
71+
#ifdef __cplusplus
72+
}
73+
#endif
74+
#endif /* !Py_INTERNAL_BRC_H */

Include/internal/pycore_ceval.h

+1
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame)
206206
#define _PY_ASYNC_EXCEPTION_BIT 3
207207
#define _PY_GC_SCHEDULED_BIT 4
208208
#define _PY_EVAL_PLEASE_STOP_BIT 5
209+
#define _PY_EVAL_EXPLICIT_MERGE_BIT 6
209210

210211
/* Reserve a few bits for future use */
211212
#define _PY_EVAL_EVENTS_BITS 8

Include/internal/pycore_interp.h

+1
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ struct _is {
201201

202202
#if defined(Py_GIL_DISABLED)
203203
struct _mimalloc_interp_state mimalloc;
204+
struct _brc_state brc; // biased reference counting state
204205
#endif
205206

206207
// Per-interpreter state for the obmalloc allocator. For the main

Include/internal/pycore_object_stack.h

+6
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ _PyObjectStackChunk_New(void);
3232
extern void
3333
_PyObjectStackChunk_Free(_PyObjectStackChunk *);
3434

35+
typedef struct _Py_freelist_state _PyFreeListState;
36+
3537
extern void
3638
_PyObjectStackChunk_ClearFreeList(_PyFreeListState *state, int is_finalization);
3739

@@ -74,6 +76,10 @@ _PyObjectStack_Pop(_PyObjectStack *stack)
7476
return obj;
7577
}
7678

79+
// Merge src into dst, leaving src empty
80+
extern void
81+
_PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src);
82+
7783
// Remove all items from the stack
7884
extern void
7985
_PyObjectStack_Clear(_PyObjectStack *stack);

Include/internal/pycore_tstate.h

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ extern "C" {
1010

1111
#include "pycore_freelist.h" // struct _Py_freelist_state
1212
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
13+
#include "pycore_brc.h" // struct _brc_thread_state
1314

1415

1516
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
@@ -22,6 +23,7 @@ typedef struct _PyThreadStateImpl {
2223
#ifdef Py_GIL_DISABLED
2324
struct _mimalloc_thread_state mimalloc;
2425
struct _Py_freelist_state freelist_state;
26+
struct _brc_thread_state brc;
2527
#endif
2628

2729
} _PyThreadStateImpl;

Lib/test/test_code.py

+1
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,7 @@ def __init__(self, f, test):
865865
self.test = test
866866
def run(self):
867867
del self.f
868+
gc_collect()
868869
self.test.assertEqual(LAST_FREED, 500)
869870

870871
SetExtra(f.__code__, FREE_INDEX, ctypes.c_voidp(500))

Lib/test/test_concurrent_futures/executor.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import threading
22
import time
3+
import unittest
34
import weakref
45
from concurrent import futures
56
from test import support
7+
from test.support import Py_GIL_DISABLED
68

79

810
def mul(x, y):
@@ -83,10 +85,21 @@ def test_no_stale_references(self):
8385
my_object_collected = threading.Event()
8486
my_object_callback = weakref.ref(
8587
my_object, lambda obj: my_object_collected.set())
86-
# Deliberately discarding the future.
87-
self.executor.submit(my_object.my_method)
88+
fut = self.executor.submit(my_object.my_method)
8889
del my_object
8990

91+
if Py_GIL_DISABLED:
92+
# Due to biased reference counting, my_object might only be
93+
# deallocated while the thread that created it runs -- if the
94+
# thread is paused waiting on an event, it may not merge the
95+
# refcount of the queued object. For that reason, we wait for the
96+
# task to finish (so that it's no longer referenced) and force a
97+
# GC to ensure that it is collected.
98+
fut.result() # Wait for the task to finish.
99+
support.gc_collect()
100+
else:
101+
del fut # Deliberately discard the future.
102+
90103
collected = my_object_collected.wait(timeout=support.SHORT_TIMEOUT)
91104
self.assertTrue(collected,
92105
"Stale reference not collected within timeout.")

Lib/test/test_concurrent_futures/test_process_pool.py

+1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def test_ressources_gced_in_workers(self):
9898

9999
# explicitly destroy the object to ensure that EventfulGCObj.__del__()
100100
# is called while manager is still running.
101+
support.gc_collect()
101102
obj = None
102103
support.gc_collect()
103104

Makefile.pre.in

+2
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ PYTHON_OBJS= \
405405
Python/ast_opt.o \
406406
Python/ast_unparse.o \
407407
Python/bltinmodule.o \
408+
Python/brc.o \
408409
Python/ceval.o \
409410
Python/codecs.o \
410411
Python/compile.o \
@@ -1808,6 +1809,7 @@ PYTHON_HEADERS= \
18081809
$(srcdir)/Include/internal/pycore_ast_state.h \
18091810
$(srcdir)/Include/internal/pycore_atexit.h \
18101811
$(srcdir)/Include/internal/pycore_bitutils.h \
1812+
$(srcdir)/Include/internal/pycore_brc.h \
18111813
$(srcdir)/Include/internal/pycore_bytes_methods.h \
18121814
$(srcdir)/Include/internal/pycore_bytesobject.h \
18131815
$(srcdir)/Include/internal/pycore_call.h \

Modules/posixmodule.c

+4
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,10 @@ PyOS_AfterFork_Child(void)
647647
goto fatal_error;
648648
}
649649

650+
#ifdef Py_GIL_DISABLED
651+
_Py_brc_after_fork(tstate->interp);
652+
#endif
653+
650654
_PySignal_AfterFork();
651655

652656
status = _PyInterpreterState_DeleteExceptMain(runtime);

Objects/dictobject.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -5581,7 +5581,8 @@ _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv)
55815581
// Don't try this at home, kids:
55825582
dict->ma_keys = NULL;
55835583
dict->ma_values = NULL;
5584-
Py_DECREF(dict);
5584+
Py_SET_REFCNT(dict, 0);
5585+
_Py_Dealloc((PyObject *)dict);
55855586
return true;
55865587
}
55875588

Objects/object.c

+2-6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/* Generic object operations; and implementation of None */
33

44
#include "Python.h"
5+
#include "pycore_brc.h" // _Py_brc_queue_object()
56
#include "pycore_call.h" // _PyObject_CallNoArgs()
67
#include "pycore_ceval.h" // _Py_EnterRecursiveCallTstate()
78
#include "pycore_context.h" // _PyContextTokenMissing_Type
@@ -344,12 +345,7 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno)
344345
&shared, new_shared));
345346

346347
if (should_queue) {
347-
// TODO: the inter-thread queue is not yet implemented. For now,
348-
// we just merge the refcount here.
349-
Py_ssize_t refcount = _Py_ExplicitMergeRefcount(o, -1);
350-
if (refcount == 0) {
351-
_Py_Dealloc(o);
352-
}
348+
_Py_brc_queue_object(o);
353349
}
354350
else if (new_shared == _Py_REF_MERGED) {
355351
// refcount is zero AND merged

PCbuild/_freeze_module.vcxproj

+1
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@
191191
<ClCompile Include="..\Python\ast_opt.c" />
192192
<ClCompile Include="..\Python\ast_unparse.c" />
193193
<ClCompile Include="..\Python\bltinmodule.c" />
194+
<ClCompile Include="..\Python\brc.c" />
194195
<ClCompile Include="..\Python\bootstrap_hash.c" />
195196
<ClCompile Include="..\Python\ceval.c" />
196197
<ClCompile Include="..\Python\codecs.c" />

PCbuild/_freeze_module.vcxproj.filters

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@
4646
<ClCompile Include="..\Python\bltinmodule.c">
4747
<Filter>Source Files</Filter>
4848
</ClCompile>
49+
<ClCompile Include="..\Python\brc.c">
50+
<Filter>Python</Filter>
51+
</ClCompile>
4952
<ClCompile Include="..\Objects\boolobject.c">
5053
<Filter>Source Files</Filter>
5154
</ClCompile>

PCbuild/pythoncore.vcxproj

+2
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@
206206
<ClInclude Include="..\Include\internal\pycore_ast_state.h" />
207207
<ClInclude Include="..\Include\internal\pycore_atexit.h" />
208208
<ClInclude Include="..\Include\internal\pycore_bitutils.h" />
209+
<ClInclude Include="..\Include\internal\pycore_brc.h" />
209210
<ClInclude Include="..\Include\internal\pycore_bytes_methods.h" />
210211
<ClInclude Include="..\Include\internal\pycore_bytesobject.h" />
211212
<ClInclude Include="..\Include\internal\pycore_call.h" />
@@ -553,6 +554,7 @@
553554
<ClCompile Include="..\Python\ast_unparse.c" />
554555
<ClCompile Include="..\Python\bltinmodule.c" />
555556
<ClCompile Include="..\Python\bootstrap_hash.c" />
557+
<ClCompile Include="..\Python\brc.c" />
556558
<ClCompile Include="..\Python\ceval.c" />
557559
<ClCompile Include="..\Python\codecs.c" />
558560
<ClCompile Include="..\Python\compile.c" />

PCbuild/pythoncore.vcxproj.filters

+6
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,9 @@
546546
<ClInclude Include="..\Include\internal\pycore_bitutils.h">
547547
<Filter>Include\internal</Filter>
548548
</ClInclude>
549+
<ClInclude Include="..\Include\internal\pycore_brc.h">
550+
<Filter>Include\internal</Filter>
551+
</ClInclude>
549552
<ClInclude Include="..\Include\internal\pycore_bytes_methods.h">
550553
<Filter>Include\internal</Filter>
551554
</ClInclude>
@@ -1253,6 +1256,9 @@
12531256
<ClCompile Include="..\Python\bltinmodule.c">
12541257
<Filter>Python</Filter>
12551258
</ClCompile>
1259+
<ClCompile Include="..\Python\brc.c">
1260+
<Filter>Python</Filter>
1261+
</ClCompile>
12561262
<ClCompile Include="..\Python\ceval.c">
12571263
<Filter>Python</Filter>
12581264
</ClCompile>

0 commit comments

Comments
 (0)