Skip to content

Commit e5470eb

Browse files
colesburyhenryiii
authored andcommitted
Support free-threaded CPython (PEP 703)
Some additional locking is added in the free-threaded build when `Py_GIL_DISABLED` is defined: - Most accesses to internals are protected by a single mutex - The registered_instances uses a striped lock to improve concurrency Pybind11 modules can indicate support for running with the GIL disabled by calling `set_gil_not_used()`.
1 parent 1a0ff40 commit e5470eb

14 files changed

+338
-136
lines changed

include/pybind11/detail/class.h

Lines changed: 63 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -205,39 +205,40 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P
205205

206206
/// Cleanup the type-info for a pybind11-registered type.
207207
extern "C" inline void pybind11_meta_dealloc(PyObject *obj) {
208-
auto *type = (PyTypeObject *) obj;
209-
auto &internals = get_internals();
210-
211-
// A pybind11-registered type will:
212-
// 1) be found in internals.registered_types_py
213-
// 2) have exactly one associated `detail::type_info`
214-
auto found_type = internals.registered_types_py.find(type);
215-
if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1
216-
&& found_type->second[0]->type == type) {
217-
218-
auto *tinfo = found_type->second[0];
219-
auto tindex = std::type_index(*tinfo->cpptype);
220-
internals.direct_conversions.erase(tindex);
221-
222-
if (tinfo->module_local) {
223-
get_local_internals().registered_types_cpp.erase(tindex);
224-
} else {
225-
internals.registered_types_cpp.erase(tindex);
226-
}
227-
internals.registered_types_py.erase(tinfo->type);
228-
229-
// Actually just `std::erase_if`, but that's only available in C++20
230-
auto &cache = internals.inactive_override_cache;
231-
for (auto it = cache.begin(), last = cache.end(); it != last;) {
232-
if (it->first == (PyObject *) tinfo->type) {
233-
it = cache.erase(it);
208+
with_internals([obj](internals &internals) {
209+
auto *type = (PyTypeObject *) obj;
210+
211+
// A pybind11-registered type will:
212+
// 1) be found in internals.registered_types_py
213+
// 2) have exactly one associated `detail::type_info`
214+
auto found_type = internals.registered_types_py.find(type);
215+
if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1
216+
&& found_type->second[0]->type == type) {
217+
218+
auto *tinfo = found_type->second[0];
219+
auto tindex = std::type_index(*tinfo->cpptype);
220+
internals.direct_conversions.erase(tindex);
221+
222+
if (tinfo->module_local) {
223+
get_local_internals().registered_types_cpp.erase(tindex);
234224
} else {
235-
++it;
225+
internals.registered_types_cpp.erase(tindex);
226+
}
227+
internals.registered_types_py.erase(tinfo->type);
228+
229+
// Actually just `std::erase_if`, but that's only available in C++20
230+
auto &cache = internals.inactive_override_cache;
231+
for (auto it = cache.begin(), last = cache.end(); it != last;) {
232+
if (it->first == (PyObject *) tinfo->type) {
233+
it = cache.erase(it);
234+
} else {
235+
++it;
236+
}
236237
}
237-
}
238238

239-
delete tinfo;
240-
}
239+
delete tinfo;
240+
}
241+
});
241242

242243
PyType_Type.tp_dealloc(obj);
243244
}
@@ -310,19 +311,20 @@ inline void traverse_offset_bases(void *valueptr,
310311
}
311312

312313
inline bool register_instance_impl(void *ptr, instance *self) {
313-
get_internals().registered_instances.emplace(ptr, self);
314+
with_instance_map(ptr, [&](instance_map &instances) { instances.emplace(ptr, self); });
314315
return true; // unused, but gives the same signature as the deregister func
315316
}
316317
inline bool deregister_instance_impl(void *ptr, instance *self) {
317-
auto &registered_instances = get_internals().registered_instances;
318-
auto range = registered_instances.equal_range(ptr);
319-
for (auto it = range.first; it != range.second; ++it) {
320-
if (self == it->second) {
321-
registered_instances.erase(it);
322-
return true;
318+
return with_instance_map(ptr, [&](instance_map &instances) {
319+
auto range = instances.equal_range(ptr);
320+
for (auto it = range.first; it != range.second; ++it) {
321+
if (self == it->second) {
322+
instances.erase(it);
323+
return true;
324+
}
323325
}
324-
}
325-
return false;
326+
return false;
327+
});
326328
}
327329

328330
inline void register_instance(instance *self, void *valptr, const type_info *tinfo) {
@@ -377,27 +379,32 @@ extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject
377379
}
378380

379381
inline void add_patient(PyObject *nurse, PyObject *patient) {
380-
auto &internals = get_internals();
381382
auto *instance = reinterpret_cast<detail::instance *>(nurse);
382383
instance->has_patients = true;
383384
Py_INCREF(patient);
384-
internals.patients[nurse].push_back(patient);
385+
386+
with_internals([&](internals &internals) { internals.patients[nurse].push_back(patient); });
385387
}
386388

387389
inline void clear_patients(PyObject *self) {
388390
auto *instance = reinterpret_cast<detail::instance *>(self);
389-
auto &internals = get_internals();
390-
auto pos = internals.patients.find(self);
391+
std::vector<PyObject *> patients;
391392

392-
if (pos == internals.patients.end()) {
393-
pybind11_fail("FATAL: Internal consistency check failed: Invalid clear_patients() call.");
394-
}
393+
with_internals([&](internals &internals) {
394+
auto pos = internals.patients.find(self);
395+
396+
if (pos == internals.patients.end()) {
397+
pybind11_fail(
398+
"FATAL: Internal consistency check failed: Invalid clear_patients() call.");
399+
}
400+
401+
// Clearing the patients can cause more Python code to run, which
402+
// can invalidate the iterator. Extract the vector of patients
403+
// from the unordered_map first.
404+
patients = std::move(pos->second);
405+
internals.patients.erase(pos);
406+
});
395407

396-
// Clearing the patients can cause more Python code to run, which
397-
// can invalidate the iterator. Extract the vector of patients
398-
// from the unordered_map first.
399-
auto patients = std::move(pos->second);
400-
internals.patients.erase(pos);
401408
instance->has_patients = false;
402409
for (PyObject *&patient : patients) {
403410
Py_CLEAR(patient);
@@ -662,10 +669,13 @@ inline PyObject *make_new_python_type(const type_record &rec) {
662669

663670
char *tp_doc = nullptr;
664671
if (rec.doc && options::show_user_defined_docstrings()) {
665-
/* Allocate memory for docstring (using PyObject_MALLOC, since
666-
Python will free this later on) */
672+
/* Allocate memory for docstring (Python will free this later on) */
667673
size_t size = std::strlen(rec.doc) + 1;
674+
#if PY_VERSION_HEX >= 0x030D0000
675+
tp_doc = (char *) PyMem_MALLOC(size);
676+
#else
668677
tp_doc = (char *) PyObject_MALLOC(size);
678+
#endif
669679
std::memcpy((void *) tp_doc, rec.doc, size);
670680
}
671681

include/pybind11/detail/internals.h

Lines changed: 133 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#include "../pytypes.h"
1919

2020
#include <exception>
21+
#include <mutex>
22+
#include <thread>
2123

2224
/// Tracks the `internals` and `type_info` ABI version independent of the main library version.
2325
///
@@ -34,7 +36,10 @@
3436
/// further ABI-incompatible changes may be made before the ABI is officially
3537
/// changed to the new version.
3638
#ifndef PYBIND11_INTERNALS_VERSION
37-
# if PY_VERSION_HEX >= 0x030C0000 || defined(_MSC_VER)
39+
# if PY_VERSION_HEX >= 0x030D0000
40+
// Version bump for Python 3.13+.
41+
# define PYBIND11_INTERNALS_VERSION 6
42+
# elif PY_VERSION_HEX >= 0x030C0000 || defined(_MSC_VER)
3843
// Version bump for Python 3.12+, before first 3.12 beta release.
3944
// Version bump for MSVC piggy-backed on PR #4779. See comments there.
4045
# define PYBIND11_INTERNALS_VERSION 5
@@ -168,15 +173,31 @@ struct override_hash {
168173
}
169174
};
170175

176+
using instance_map = std::unordered_multimap<const void *, instance *>;
177+
178+
struct instance_map_shard {
179+
std::mutex mutex;
180+
instance_map registered_instances;
181+
char padding[64 - (sizeof(std::mutex) + sizeof(instance_map)) % 64];
182+
};
183+
171184
/// Internal data structure used to track registered instances and types.
172185
/// Whenever binary incompatible changes are made to this structure,
173186
/// `PYBIND11_INTERNALS_VERSION` must be incremented.
174187
struct internals {
188+
#if PYBIND11_INTERNALS_VERSION >= 6
189+
std::mutex mutex;
190+
#endif
175191
// std::type_index -> pybind11's type information
176192
type_map<type_info *> registered_types_cpp;
177193
// PyTypeObject* -> base type_info(s)
178194
std::unordered_map<PyTypeObject *, std::vector<type_info *>> registered_types_py;
179-
std::unordered_multimap<const void *, instance *> registered_instances; // void * -> instance*
195+
#if PYBIND11_INTERNALS_VERSION >= 6
196+
std::unique_ptr<instance_map_shard[]> instance_shards; // void * -> instance*
197+
size_t instance_shards_mask;
198+
#else
199+
instance_map registered_instances; // void * -> instance*
200+
#endif
180201
std::unordered_set<std::pair<const PyObject *, const char *>, override_hash>
181202
inactive_override_cache;
182203
type_map<std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
@@ -462,7 +483,8 @@ inline object get_python_state_dict() {
462483
}
463484

464485
inline object get_internals_obj_from_state_dict(handle state_dict) {
465-
return reinterpret_borrow<object>(dict_getitemstring(state_dict.ptr(), PYBIND11_INTERNALS_ID));
486+
return reinterpret_steal<object>(
487+
dict_getitemstringref(state_dict.ptr(), PYBIND11_INTERNALS_ID));
466488
}
467489

468490
inline internals **get_internals_pp_from_capsule(handle obj) {
@@ -474,6 +496,20 @@ inline internals **get_internals_pp_from_capsule(handle obj) {
474496
return static_cast<internals **>(raw_ptr);
475497
}
476498

499+
inline uint64_t next_pow2(uint64_t x) {
500+
// Round-up to the next power of two.
501+
// See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
502+
x--;
503+
x |= (x >> 1);
504+
x |= (x >> 2);
505+
x |= (x >> 4);
506+
x |= (x >> 8);
507+
x |= (x >> 16);
508+
x |= (x >> 32);
509+
x++;
510+
return x;
511+
}
512+
477513
/// Return a reference to the current `internals` data
478514
PYBIND11_NOINLINE internals &get_internals() {
479515
auto **&internals_pp = get_internals_pp();
@@ -542,6 +578,18 @@ PYBIND11_NOINLINE internals &get_internals() {
542578
internals_ptr->static_property_type = make_static_property_type();
543579
internals_ptr->default_metaclass = make_default_metaclass();
544580
internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass);
581+
#if PYBIND11_INTERNALS_VERSION >= 6
582+
# if defined(Py_GIL_DISABLED)
583+
size_t num_shards = (size_t) next_pow2(2 * std::thread::hardware_concurrency());
584+
if (num_shards == 0) {
585+
num_shards = 1;
586+
}
587+
# else
588+
size_t num_shards = 1;
589+
# endif
590+
internals_ptr->instance_shards.reset(new instance_map_shard[num_shards]);
591+
internals_ptr->instance_shards_mask = num_shards - 1;
592+
#endif // PYBIND11_INTERNALS_VERSION >= 6
545593
}
546594
return **internals_pp;
547595
}
@@ -602,13 +650,77 @@ inline local_internals &get_local_internals() {
602650
return *locals;
603651
}
604652

653+
#if PYBIND11_INTERNALS_VERSION >= 6 && defined(Py_GIL_DISABLED)
654+
# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock<std::mutex> lock((internals).mutex)
655+
#else
656+
# define PYBIND11_LOCK_INTERNALS(internals)
657+
#endif
658+
659+
template <typename F>
660+
inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) {
661+
auto &internals = get_internals();
662+
PYBIND11_LOCK_INTERNALS(internals);
663+
return cb(internals);
664+
}
665+
666+
inline uint64_t splitmix64(uint64_t z) {
667+
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
668+
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
669+
return z ^ (z >> 31);
670+
}
671+
672+
template <typename F>
673+
inline auto with_instance_map(const void *ptr,
674+
const F &cb) -> decltype(cb(std::declval<instance_map &>())) {
675+
auto &internals = get_internals();
676+
677+
#if PYBIND11_INTERNALS_VERSION >= 6
678+
// Hash address to compute shard, but ignore low bits. We'd like allocations
679+
// from the same thread/core to map to the same shard and allocations from
680+
// other threads/cores to map to other shards. Using the high bits is a good
681+
// heuristic because memory allocators often have a per-thread
682+
// arena/superblock/segment from which smaller allocations are served.
683+
auto addr = reinterpret_cast<uintptr_t>(ptr);
684+
uint64_t hash = splitmix64((uint64_t) (addr >> 20));
685+
size_t idx = (size_t) hash & internals.instance_shards_mask;
686+
687+
auto &shard = internals.instance_shards[idx];
688+
# if defined(Py_GIL_DISABLED)
689+
std::unique_lock<std::mutex> lock(shard.mutex);
690+
# endif
691+
return cb(shard.registered_instances);
692+
#else
693+
(void) ptr;
694+
return cb(internals.registered_instances);
695+
#endif
696+
}
697+
698+
inline size_t num_registered_instances() {
699+
auto &internals = get_internals();
700+
#if PYBIND11_INTERNALS_VERSION >= 6
701+
size_t count = 0;
702+
for (size_t i = 0; i <= internals.instance_shards_mask; ++i) {
703+
auto &shard = internals.instance_shards[i];
704+
std::unique_lock<std::mutex> lock(shard.mutex);
705+
count += shard.registered_instances.size();
706+
}
707+
return count;
708+
#else
709+
return internals.registered_instances.size();
710+
#endif
711+
}
712+
605713
/// Constructs a std::string with the given arguments, stores it in `internals`, and returns its
606714
/// `c_str()`. Such strings objects have a long storage duration -- the internal strings are only
607715
/// cleared when the program exits or after interpreter shutdown (when embedding), and so are
608716
/// suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name).
609717
template <typename... Args>
610718
const char *c_str(Args &&...args) {
611-
auto &strings = get_internals().static_strings;
719+
// GCC 4.8 doesn't like parameter unpack within lambda capture, so use
720+
// PYBIND11_LOCK_INTERNALS.
721+
auto &internals = get_internals();
722+
PYBIND11_LOCK_INTERNALS(internals);
723+
auto &strings = internals.static_strings;
612724
strings.emplace_front(std::forward<Args>(args)...);
613725
return strings.front().c_str();
614726
}
@@ -638,30 +750,34 @@ PYBIND11_NAMESPACE_END(detail)
638750
/// pybind11 version) running in the current interpreter. Names starting with underscores
639751
/// are reserved for internal usage. Returns `nullptr` if no matching entry was found.
640752
PYBIND11_NOINLINE void *get_shared_data(const std::string &name) {
641-
auto &internals = detail::get_internals();
642-
auto it = internals.shared_data.find(name);
643-
return it != internals.shared_data.end() ? it->second : nullptr;
753+
return detail::with_internals([&](detail::internals &internals) {
754+
auto it = internals.shared_data.find(name);
755+
return it != internals.shared_data.end() ? it->second : nullptr;
756+
});
644757
}
645758

646759
/// Set the shared data that can be later recovered by `get_shared_data()`.
647760
PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) {
648-
detail::get_internals().shared_data[name] = data;
649-
return data;
761+
return detail::with_internals([&](detail::internals &internals) {
762+
internals.shared_data[name] = data;
763+
return data;
764+
});
650765
}
651766

652767
/// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if
653768
/// such entry exists. Otherwise, a new object of default-constructible type `T` is
654769
/// added to the shared data under the given name and a reference to it is returned.
655770
template <typename T>
656771
T &get_or_create_shared_data(const std::string &name) {
657-
auto &internals = detail::get_internals();
658-
auto it = internals.shared_data.find(name);
659-
T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr);
660-
if (!ptr) {
661-
ptr = new T();
662-
internals.shared_data[name] = ptr;
663-
}
664-
return *ptr;
772+
return *detail::with_internals([&](detail::internals &internals) {
773+
auto it = internals.shared_data.find(name);
774+
T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr);
775+
if (!ptr) {
776+
ptr = new T();
777+
internals.shared_data[name] = ptr;
778+
}
779+
return ptr;
780+
});
665781
}
666782

667783
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)

0 commit comments

Comments
 (0)