15
15
16
16
#include " common.h"
17
17
18
+ #include < atomic>
18
19
#include < exception>
19
20
#include < mutex>
20
21
#include < thread>
@@ -53,6 +54,7 @@ constexpr const char *internals_function_record_capsule_name = "pybind11_functio
53
54
inline PyTypeObject *make_static_property_type ();
54
55
inline PyTypeObject *make_default_metaclass ();
55
56
inline PyObject *make_object_base_type (PyTypeObject *metaclass);
57
+ inline void translate_exception (std::exception_ptr p);
56
58
57
59
// The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new
58
60
// Thread Specific Storage (TSS) API.
@@ -149,6 +151,20 @@ struct instance_map_shard {
149
151
150
152
static_assert (sizeof (instance_map_shard) % 64 == 0,
151
153
"instance_map_shard size is not a multiple of 64 bytes");
154
+
155
+ inline uint64_t round_up_to_next_pow2 (uint64_t x) {
156
+ // Round-up to the next power of two.
157
+ // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
158
+ x--;
159
+ x |= (x >> 1 );
160
+ x |= (x >> 2 );
161
+ x |= (x >> 4 );
162
+ x |= (x >> 8 );
163
+ x |= (x >> 16 );
164
+ x |= (x >> 32 );
165
+ x++;
166
+ return x;
167
+ }
152
168
#endif
153
169
154
170
// / Internal data structure used to track registered instances and types.
@@ -178,9 +194,9 @@ struct internals {
178
194
// extensions
179
195
std::forward_list<std::string> static_strings; // Stores the std::strings backing
180
196
// detail::c_str()
181
- PyTypeObject *static_property_type;
182
- PyTypeObject *default_metaclass;
183
- PyObject *instance_base;
197
+ PyTypeObject *static_property_type = nullptr ;
198
+ PyTypeObject *default_metaclass = nullptr ;
199
+ PyObject *instance_base = nullptr ;
184
200
// Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
185
201
PYBIND11_TLS_KEY_INIT (tstate)
186
202
PYBIND11_TLS_KEY_INIT (loader_life_support_tls_key)
@@ -189,7 +205,36 @@ struct internals {
189
205
190
206
type_map<PyObject *> native_enum_type_map;
191
207
192
- internals () = default ;
208
+ internals () {
209
+ PyThreadState *cur_tstate = PyThreadState_Get ();
210
+ // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
211
+ if (!PYBIND11_TLS_KEY_CREATE (tstate)) {
212
+ pybind11_fail (
213
+ " internals constructor: could not successfully initialize the tstate TSS key!" );
214
+ }
215
+ PYBIND11_TLS_REPLACE_VALUE (tstate, cur_tstate);
216
+
217
+ // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
218
+ if (!PYBIND11_TLS_KEY_CREATE (loader_life_support_tls_key)) {
219
+ pybind11_fail (" internals constructor: could not successfully initialize the "
220
+ " loader_life_support TSS key!" );
221
+ }
222
+
223
+ istate = cur_tstate->interp ;
224
+ registered_exception_translators.push_front (&translate_exception);
225
+ static_property_type = make_static_property_type ();
226
+ default_metaclass = make_default_metaclass ();
227
+ #ifdef Py_GIL_DISABLED
228
+ // Scale proportional to the number of cores. 2x is a heuristic to reduce contention.
229
+ auto num_shards
230
+ = static_cast <size_t >(round_up_to_next_pow2 (2 * std::thread::hardware_concurrency ()));
231
+ if (num_shards == 0 ) {
232
+ num_shards = 1 ;
233
+ }
234
+ instance_shards.reset (new instance_map_shard[num_shards]);
235
+ instance_shards_mask = num_shards - 1 ;
236
+ #endif
237
+ }
193
238
internals (const internals &other) = delete ;
194
239
internals &operator =(const internals &other) = delete ;
195
240
~internals () {
@@ -206,6 +251,17 @@ struct internals {
206
251
}
207
252
};
208
253
254
+ // the internals struct (above) is shared between all the modules. local_internals are only
255
+ // for a single module. Any changes made to internals may require an update to
256
+ // PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
257
+ // restricted to a single module. Whether a module has local internals or not should not
258
+ // impact any other modules, because the only things accessing the local internals is the
259
+ // module that contains them.
260
+ struct local_internals {
261
+ type_map<type_info *> registered_types_cpp;
262
+ std::forward_list<ExceptionTranslator> registered_exception_translators;
263
+ };
264
+
209
265
enum class holder_enum_t : uint8_t {
210
266
undefined,
211
267
std_unique_ptr, // Default, lacking interop with std::shared_ptr.
@@ -249,15 +305,49 @@ struct type_info {
249
305
" __pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) \
250
306
PYBIND11_COMPILER_TYPE_LEADING_UNDERSCORE PYBIND11_PLATFORM_ABI_ID " __"
251
307
252
- // / Each module locally stores a pointer to the `internals` data. The data
253
- // / itself is shared among modules with the same `PYBIND11_INTERNALS_ID`.
254
- inline internals **&get_internals_pp () {
255
- static internals **internals_pp = nullptr ;
256
- return internals_pp;
308
+ inline PyThreadState *get_thread_state_unchecked () {
309
+ #if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON)
310
+ return PyThreadState_GET ();
311
+ #elif PY_VERSION_HEX < 0x030D0000
312
+ return _PyThreadState_UncheckedGet ();
313
+ #else
314
+ return PyThreadState_GetUnchecked ();
315
+ #endif
257
316
}
258
317
259
- // forward decl
260
- inline void translate_exception (std::exception_ptr);
318
+ // / We use this counter to figure out if there are or have been multiple subinterpreters active at
319
+ // / any point. This must never decrease while any interpreter may be running in any thread!
320
+ inline std::atomic<int > &get_num_interpreters_seen () {
321
+ static std::atomic<int > counter (0 );
322
+ return counter;
323
+ }
324
+
325
+ template <typename InternalsType>
326
+ inline std::unique_ptr<InternalsType> *&get_internals_pp () {
327
+ #ifdef PYBIND11_SUBINTERPRETER_SUPPORT
328
+ if (get_num_interpreters_seen () > 1 ) {
329
+ // Internals is one per interpreter. When multiple interpreters are alive in different
330
+ // threads we have to allow them to have different internals, so we need a thread_local.
331
+ static thread_local std::unique_ptr<InternalsType> *t_internals_pp = nullptr ;
332
+ static thread_local PyInterpreterState *istate_cached = nullptr ;
333
+ // Whenever the interpreter changes on the current thread we need to invalidate the
334
+ // internals_pp so that it can be pulled from the interpreter's state dict. That is slow,
335
+ // so we use the current PyThreadState to check if it is necessary. The caller will see a
336
+ // null return and do the fetch from the state dict or create a new one (as needed).
337
+ auto *tstate = get_thread_state_unchecked ();
338
+ if (!tstate) {
339
+ istate_cached = nullptr ;
340
+ t_internals_pp = nullptr ;
341
+ } else if (tstate->interp != istate_cached) {
342
+ istate_cached = tstate->interp ;
343
+ t_internals_pp = nullptr ;
344
+ }
345
+ return t_internals_pp;
346
+ }
347
+ #endif
348
+ static std::unique_ptr<InternalsType> *s_internals_pp = nullptr ;
349
+ return s_internals_pp;
350
+ }
261
351
262
352
template <class T ,
263
353
enable_if_t <std::is_same<std::nested_exception, remove_cvref_t <T>>::value, int > = 0 >
@@ -384,49 +474,46 @@ inline object get_python_state_dict() {
384
474
return state_dict;
385
475
}
386
476
387
- inline object get_internals_obj_from_state_dict (handle state_dict) {
388
- return reinterpret_steal<object>(
389
- dict_getitemstringref (state_dict.ptr (), PYBIND11_INTERNALS_ID));
390
- }
391
-
392
- inline internals **get_internals_pp_from_capsule (handle obj) {
393
- void *raw_ptr = PyCapsule_GetPointer (obj.ptr (), /* name=*/ nullptr );
394
- if (raw_ptr == nullptr ) {
395
- raise_from (PyExc_SystemError, " pybind11::detail::get_internals_pp_from_capsule() FAILED" );
396
- throw error_already_set ();
477
+ template <typename InternalsType>
478
+ inline std::unique_ptr<InternalsType> *
479
+ get_internals_pp_from_capsule_in_state_dict (dict &state_dict, char const *state_dict_key) {
480
+ auto internals_obj
481
+ = reinterpret_steal<object>(dict_getitemstringref (state_dict.ptr (), state_dict_key));
482
+ if (internals_obj) {
483
+ void *raw_ptr = PyCapsule_GetPointer (internals_obj.ptr (), /* name=*/ nullptr );
484
+ if (!raw_ptr) {
485
+ raise_from (PyExc_SystemError,
486
+ " pybind11::detail::get_internals_pp_from_capsule_in_state_dict() FAILED" );
487
+ throw error_already_set ();
488
+ }
489
+ return reinterpret_cast <std::unique_ptr<InternalsType> *>(raw_ptr);
397
490
}
398
- return static_cast <internals **>(raw_ptr);
399
- }
400
-
401
- inline uint64_t round_up_to_next_pow2 (uint64_t x) {
402
- // Round-up to the next power of two.
403
- // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
404
- x--;
405
- x |= (x >> 1 );
406
- x |= (x >> 2 );
407
- x |= (x >> 4 );
408
- x |= (x >> 8 );
409
- x |= (x >> 16 );
410
- x |= (x >> 32 );
411
- x++;
412
- return x;
491
+ return nullptr ;
413
492
}
414
493
415
494
// / Return a reference to the current `internals` data
416
495
PYBIND11_NOINLINE internals &get_internals () {
417
- auto ** &internals_pp = get_internals_pp ();
496
+ auto *&internals_pp = get_internals_pp<internals> ();
418
497
if (internals_pp && *internals_pp) {
498
+ // This is the fast path, everything is already setup, just return it
419
499
return **internals_pp;
420
500
}
421
501
502
+ // Slow path, something needs fetched from the state dict or created
503
+
504
+ // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals.
422
505
gil_scoped_acquire_simple gil;
423
506
error_scope err_scope;
424
507
425
508
dict state_dict = get_python_state_dict ();
426
- if (object internals_obj = get_internals_obj_from_state_dict (state_dict)) {
427
- internals_pp = get_internals_pp_from_capsule (internals_obj);
509
+ internals_pp = get_internals_pp_from_capsule_in_state_dict<internals>(state_dict,
510
+ PYBIND11_INTERNALS_ID);
511
+ if (!internals_pp) {
512
+ internals_pp = new std::unique_ptr<internals>;
513
+ state_dict[PYBIND11_INTERNALS_ID] = capsule (reinterpret_cast <void *>(internals_pp));
428
514
}
429
- if (internals_pp && *internals_pp) {
515
+
516
+ if (*internals_pp) {
430
517
// We loaded the internals through `state_dict`, which means that our `error_already_set`
431
518
// and `builtin_exception` may be different local classes than the ones set up in the
432
519
// initial exception translator, below, so add another for our local exception classes.
@@ -435,68 +522,61 @@ PYBIND11_NOINLINE internals &get_internals() {
435
522
// libc++ with CPython doesn't require this (types are explicitly exported)
436
523
// libc++ with PyPy still need it, awaiting further investigation
437
524
#if !defined(__GLIBCXX__)
438
- (*internals_pp)->registered_exception_translators .push_front (&translate_local_exception);
525
+ if ((*internals_pp)->registered_exception_translators .empty ()
526
+ || (*internals_pp)->registered_exception_translators .front ()
527
+ != &translate_local_exception) {
528
+ (*internals_pp)
529
+ ->registered_exception_translators .push_front (&translate_local_exception);
530
+ }
439
531
#endif
440
532
} else {
441
- if (!internals_pp) {
442
- internals_pp = new internals *();
443
- }
444
- auto *&internals_ptr = *internals_pp;
445
- internals_ptr = new internals ();
533
+ auto &internals_ptr = *internals_pp;
534
+ internals_ptr.reset (new internals ());
446
535
447
- PyThreadState *tstate = PyThreadState_Get ();
448
- // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
449
- if (!PYBIND11_TLS_KEY_CREATE (internals_ptr->tstate )) {
450
- pybind11_fail (" get_internals: could not successfully initialize the tstate TSS key!" );
451
- }
452
- PYBIND11_TLS_REPLACE_VALUE (internals_ptr->tstate , tstate);
453
-
454
- // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
455
- if (!PYBIND11_TLS_KEY_CREATE (internals_ptr->loader_life_support_tls_key )) {
456
- pybind11_fail (" get_internals: could not successfully initialize the "
457
- " loader_life_support TSS key!" );
458
- }
459
-
460
- internals_ptr->istate = tstate->interp ;
461
- state_dict[PYBIND11_INTERNALS_ID] = capsule (reinterpret_cast <void *>(internals_pp));
462
- internals_ptr->registered_exception_translators .push_front (&translate_exception);
463
- internals_ptr->static_property_type = make_static_property_type ();
464
- internals_ptr->default_metaclass = make_default_metaclass ();
465
- internals_ptr->instance_base = make_object_base_type (internals_ptr->default_metaclass );
466
- #ifdef Py_GIL_DISABLED
467
- // Scale proportional to the number of cores. 2x is a heuristic to reduce contention.
468
- auto num_shards
469
- = static_cast <size_t >(round_up_to_next_pow2 (2 * std::thread::hardware_concurrency ()));
470
- if (num_shards == 0 ) {
471
- num_shards = 1 ;
536
+ if (!internals_ptr->instance_base ) {
537
+ // This calls get_internals, so cannot be called from within the internals constructor
538
+ // called above because internals_ptr must be set before get_internals is called again
539
+ internals_ptr->instance_base = make_object_base_type (internals_ptr->default_metaclass );
472
540
}
473
- internals_ptr->instance_shards .reset (new instance_map_shard[num_shards]);
474
- internals_ptr->instance_shards_mask = num_shards - 1 ;
475
- #endif // Py_GIL_DISABLED
476
541
}
542
+
477
543
return **internals_pp;
478
544
}
479
545
480
- // the internals struct (above) is shared between all the modules. local_internals are only
481
- // for a single module. Any changes made to internals may require an update to
482
- // PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
483
- // restricted to a single module. Whether a module has local internals or not should not
484
- // impact any other modules, because the only things accessing the local internals is the
485
- // module that contains them.
486
- struct local_internals {
487
- type_map<type_info *> registered_types_cpp;
488
- std::forward_list<ExceptionTranslator> registered_exception_translators;
489
- };
546
+ // / A string key uniquely describing this module
547
+ inline char const *get_local_internals_id () {
548
+ // Use the address of this static itself as part of the key, so that the value is uniquely tied
549
+ // to where the module is loaded in memory
550
+ static const std::string this_module_idstr
551
+ = PYBIND11_MODULE_LOCAL_ID
552
+ + std::to_string (reinterpret_cast <uintptr_t >(&this_module_idstr));
553
+ return this_module_idstr.c_str ();
554
+ }
490
555
491
556
// / Works like `get_internals`, but for things which are locally registered.
492
557
inline local_internals &get_local_internals () {
493
- // Current static can be created in the interpreter finalization routine. If the later will be
494
- // destroyed in another static variable destructor, creation of this static there will cause
495
- // static deinitialization fiasco. In order to avoid it we avoid destruction of the
496
- // local_internals static. One can read more about the problem and current solution here:
497
- // https://google.github.io/styleguide/cppguide.html#Static_and_Global_Variables
498
- static auto *locals = new local_internals ();
499
- return *locals;
558
+ auto *&local_internals_pp = get_internals_pp<local_internals>();
559
+ if (local_internals_pp && *local_internals_pp) {
560
+ return **local_internals_pp;
561
+ }
562
+
563
+ // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals.
564
+ gil_scoped_acquire_simple gil;
565
+ error_scope err_scope;
566
+
567
+ dict state_dict = get_python_state_dict ();
568
+ local_internals_pp = get_internals_pp_from_capsule_in_state_dict<local_internals>(
569
+ state_dict, get_local_internals_id ());
570
+ if (!local_internals_pp) {
571
+ local_internals_pp = new std::unique_ptr<local_internals>;
572
+ state_dict[get_local_internals_id ()]
573
+ = capsule (reinterpret_cast <void *>(local_internals_pp));
574
+ }
575
+ if (!*local_internals_pp) {
576
+ local_internals_pp->reset (new local_internals ());
577
+ }
578
+
579
+ return **local_internals_pp;
500
580
}
501
581
502
582
#ifdef Py_GIL_DISABLED
0 commit comments