18
18
#include " ../pytypes.h"
19
19
20
20
#include < exception>
21
+ #include < mutex>
22
+ #include < thread>
21
23
22
24
// / Tracks the `internals` and `type_info` ABI version independent of the main library version.
23
25
// /
34
36
// / further ABI-incompatible changes may be made before the ABI is officially
35
37
// / changed to the new version.
36
38
#ifndef PYBIND11_INTERNALS_VERSION
37
- # if PY_VERSION_HEX >= 0x030C0000 || defined(_MSC_VER)
39
+ # if PY_VERSION_HEX >= 0x030D0000
40
+ // Version bump for Python 3.13+.
41
+ # define PYBIND11_INTERNALS_VERSION 6
42
+ # elif PY_VERSION_HEX >= 0x030C0000 || defined(_MSC_VER)
38
43
// Version bump for Python 3.12+, before first 3.12 beta release.
39
44
// Version bump for MSVC piggy-backed on PR #4779. See comments there.
40
45
# define PYBIND11_INTERNALS_VERSION 5
@@ -168,15 +173,31 @@ struct override_hash {
168
173
}
169
174
};
170
175
176
+ using instance_map = std::unordered_multimap<const void *, instance *>;
177
+
178
+ struct instance_map_shard {
179
+ std::mutex mutex;
180
+ instance_map registered_instances;
181
+ char padding[64 - (sizeof (std::mutex) + sizeof (instance_map)) % 64 ];
182
+ };
183
+
171
184
// / Internal data structure used to track registered instances and types.
172
185
// / Whenever binary incompatible changes are made to this structure,
173
186
// / `PYBIND11_INTERNALS_VERSION` must be incremented.
174
187
struct internals {
188
+ #if PYBIND11_INTERNALS_VERSION >= 6
189
+ std::mutex mutex;
190
+ #endif
175
191
// std::type_index -> pybind11's type information
176
192
type_map<type_info *> registered_types_cpp;
177
193
// PyTypeObject* -> base type_info(s)
178
194
std::unordered_map<PyTypeObject *, std::vector<type_info *>> registered_types_py;
179
- std::unordered_multimap<const void *, instance *> registered_instances; // void * -> instance*
195
+ #if PYBIND11_INTERNALS_VERSION >= 6
196
+ std::unique_ptr<instance_map_shard[]> instance_shards; // void * -> instance*
197
+ size_t instance_shards_mask;
198
+ #else
199
+ instance_map registered_instances; // void * -> instance*
200
+ #endif
180
201
std::unordered_set<std::pair<const PyObject *, const char *>, override_hash>
181
202
inactive_override_cache;
182
203
type_map<std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
@@ -462,7 +483,8 @@ inline object get_python_state_dict() {
462
483
}
463
484
464
485
inline object get_internals_obj_from_state_dict (handle state_dict) {
465
- return reinterpret_borrow<object>(dict_getitemstring (state_dict.ptr (), PYBIND11_INTERNALS_ID));
486
+ return reinterpret_steal<object>(
487
+ dict_getitemstringref (state_dict.ptr (), PYBIND11_INTERNALS_ID));
466
488
}
467
489
468
490
inline internals **get_internals_pp_from_capsule (handle obj) {
@@ -474,6 +496,20 @@ inline internals **get_internals_pp_from_capsule(handle obj) {
474
496
return static_cast <internals **>(raw_ptr);
475
497
}
476
498
499
+ inline uint64_t next_pow2 (uint64_t x) {
500
+ // Round-up to the next power of two.
501
+ // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
502
+ x--;
503
+ x |= (x >> 1 );
504
+ x |= (x >> 2 );
505
+ x |= (x >> 4 );
506
+ x |= (x >> 8 );
507
+ x |= (x >> 16 );
508
+ x |= (x >> 32 );
509
+ x++;
510
+ return x;
511
+ }
512
+
477
513
// / Return a reference to the current `internals` data
478
514
PYBIND11_NOINLINE internals &get_internals () {
479
515
auto **&internals_pp = get_internals_pp ();
@@ -542,6 +578,18 @@ PYBIND11_NOINLINE internals &get_internals() {
542
578
internals_ptr->static_property_type = make_static_property_type ();
543
579
internals_ptr->default_metaclass = make_default_metaclass ();
544
580
internals_ptr->instance_base = make_object_base_type (internals_ptr->default_metaclass );
581
+ #if PYBIND11_INTERNALS_VERSION >= 6
582
+ # if defined(Py_GIL_DISABLED)
583
+ size_t num_shards = (size_t ) next_pow2 (2 * std::thread::hardware_concurrency ());
584
+ if (num_shards == 0 ) {
585
+ num_shards = 1 ;
586
+ }
587
+ # else
588
+ size_t num_shards = 1 ;
589
+ # endif
590
+ internals_ptr->instance_shards .reset (new instance_map_shard[num_shards]);
591
+ internals_ptr->instance_shards_mask = num_shards - 1 ;
592
+ #endif // PYBIND11_INTERNALS_VERSION >= 6
545
593
}
546
594
return **internals_pp;
547
595
}
@@ -602,13 +650,77 @@ inline local_internals &get_local_internals() {
602
650
return *locals;
603
651
}
604
652
653
+ #if PYBIND11_INTERNALS_VERSION >= 6 && defined(Py_GIL_DISABLED)
654
+ # define PYBIND11_LOCK_INTERNALS (internals ) std::unique_lock<std::mutex> lock ((internals).mutex)
655
+ #else
656
+ # define PYBIND11_LOCK_INTERNALS (internals )
657
+ #endif
658
+
659
+ template <typename F>
660
+ inline auto with_internals (const F &cb) -> decltype(cb(get_internals())) {
661
+ auto &internals = get_internals ();
662
+ PYBIND11_LOCK_INTERNALS (internals);
663
+ return cb (internals);
664
+ }
665
+
666
+ inline uint64_t splitmix64 (uint64_t z) {
667
+ z = (z ^ (z >> 30 )) * 0xbf58476d1ce4e5b9 ;
668
+ z = (z ^ (z >> 27 )) * 0x94d049bb133111eb ;
669
+ return z ^ (z >> 31 );
670
+ }
671
+
672
+ template <typename F>
673
+ inline auto with_instance_map (const void *ptr,
674
+ const F &cb) -> decltype(cb(std::declval<instance_map &>())) {
675
+ auto &internals = get_internals ();
676
+
677
+ #if PYBIND11_INTERNALS_VERSION >= 6
678
+ // Hash address to compute shard, but ignore low bits. We'd like allocations
679
+ // from the same thread/core to map to the same shard and allocations from
680
+ // other threads/cores to map to other shards. Using the high bits is a good
681
+ // heuristic because memory allocators often have a per-thread
682
+ // arena/superblock/segment from which smaller allocations are served.
683
+ auto addr = reinterpret_cast <uintptr_t >(ptr);
684
+ uint64_t hash = splitmix64 ((uint64_t ) (addr >> 20 ));
685
+ size_t idx = (size_t ) hash & internals.instance_shards_mask ;
686
+
687
+ auto &shard = internals.instance_shards [idx];
688
+ # if defined(Py_GIL_DISABLED)
689
+ std::unique_lock<std::mutex> lock (shard.mutex );
690
+ # endif
691
+ return cb (shard.registered_instances );
692
+ #else
693
+ (void ) ptr;
694
+ return cb (internals.registered_instances );
695
+ #endif
696
+ }
697
+
698
+ inline size_t num_registered_instances () {
699
+ auto &internals = get_internals ();
700
+ #if PYBIND11_INTERNALS_VERSION >= 6
701
+ size_t count = 0 ;
702
+ for (size_t i = 0 ; i <= internals.instance_shards_mask ; ++i) {
703
+ auto &shard = internals.instance_shards [i];
704
+ std::unique_lock<std::mutex> lock (shard.mutex );
705
+ count += shard.registered_instances .size ();
706
+ }
707
+ return count;
708
+ #else
709
+ return internals.registered_instances .size ();
710
+ #endif
711
+ }
712
+
605
713
// / Constructs a std::string with the given arguments, stores it in `internals`, and returns its
606
714
// / `c_str()`. Such strings objects have a long storage duration -- the internal strings are only
607
715
// / cleared when the program exits or after interpreter shutdown (when embedding), and so are
608
716
// / suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name).
609
717
template <typename ... Args>
610
718
const char *c_str (Args &&...args) {
611
- auto &strings = get_internals ().static_strings ;
719
+ // GCC 4.8 doesn't like parameter unpack within lambda capture, so use
720
+ // PYBIND11_LOCK_INTERNALS.
721
+ auto &internals = get_internals ();
722
+ PYBIND11_LOCK_INTERNALS (internals);
723
+ auto &strings = internals.static_strings ;
612
724
strings.emplace_front (std::forward<Args>(args)...);
613
725
return strings.front ().c_str ();
614
726
}
@@ -638,30 +750,34 @@ PYBIND11_NAMESPACE_END(detail)
638
750
// / pybind11 version) running in the current interpreter. Names starting with underscores
639
751
// / are reserved for internal usage. Returns `nullptr` if no matching entry was found.
640
752
PYBIND11_NOINLINE void *get_shared_data(const std::string &name) {
641
- auto &internals = detail::get_internals ();
642
- auto it = internals.shared_data .find (name);
643
- return it != internals.shared_data .end () ? it->second : nullptr ;
753
+ return detail::with_internals ([&](detail::internals &internals) {
754
+ auto it = internals.shared_data .find (name);
755
+ return it != internals.shared_data .end () ? it->second : nullptr ;
756
+ });
644
757
}
645
758
646
759
// / Set the shared data that can be later recovered by `get_shared_data()`.
647
760
PYBIND11_NOINLINE void *set_shared_data (const std::string &name, void *data) {
648
- detail::get_internals ().shared_data [name] = data;
649
- return data;
761
+ return detail::with_internals ([&](detail::internals &internals) {
762
+ internals.shared_data [name] = data;
763
+ return data;
764
+ });
650
765
}
651
766
652
767
// / Returns a typed reference to a shared data entry (by using `get_shared_data()`) if
653
768
// / such entry exists. Otherwise, a new object of default-constructible type `T` is
654
769
// / added to the shared data under the given name and a reference to it is returned.
655
770
template <typename T>
656
771
T &get_or_create_shared_data (const std::string &name) {
657
- auto &internals = detail::get_internals ();
658
- auto it = internals.shared_data .find (name);
659
- T *ptr = (T *) (it != internals.shared_data .end () ? it->second : nullptr );
660
- if (!ptr) {
661
- ptr = new T ();
662
- internals.shared_data [name] = ptr;
663
- }
664
- return *ptr;
772
+ return *detail::with_internals ([&](detail::internals &internals) {
773
+ auto it = internals.shared_data .find (name);
774
+ T *ptr = (T *) (it != internals.shared_data .end () ? it->second : nullptr );
775
+ if (!ptr) {
776
+ ptr = new T ();
777
+ internals.shared_data [name] = ptr;
778
+ }
779
+ return ptr;
780
+ });
665
781
}
666
782
667
783
PYBIND11_NAMESPACE_END (PYBIND11_NAMESPACE)
0 commit comments