@@ -489,25 +489,56 @@ gc_maybe_untrack(PyObject *op)
489
489
#define BUFFER_HI 16
490
490
#define BUFFER_LO 8
491
491
492
+ // Prefetch intructions will fetch the line of data from memory that
493
+ // contains the byte specified with the source operand to a location in
494
+ // the cache hierarchy specified by a locality hint. The instruction
495
+ // is only a hint and the CPU is free to ignore it. Instructions and
496
+ // behaviour are CPU specific but the definitions of locality hints
497
+ // below are mostly consistent.
498
+ //
499
+ // * T0 (temporal data) prefetch data into all levels of the cache hierarchy.
500
+ //
501
+ // * T1 (temporal data with respect to first level cache) prefetch data into
502
+ // level 2 cache and higher.
503
+ //
504
+ // * T2 (temporal data with respect to second level cache) prefetch data into
505
+ // level 3 cache and higher, or an implementation-specific choice.
506
+ //
507
+ // * NTA (non-temporal data with respect to all cache levels) prefetch data into
508
+ // non-temporal cache structure and into a location close to the processor,
509
+ // minimizing cache pollution.
510
+
492
511
#if defined(__GNUC__ ) || defined(__clang__ )
493
- #define PREFETCH_L1 (ptr ) __builtin_prefetch(ptr, 0, 3)
494
- #define PREFETCH_L2 (ptr ) __builtin_prefetch(ptr, 0, 2)
512
+ #define PREFETCH_T0 (ptr ) __builtin_prefetch(ptr, 0, 3)
513
+ #define PREFETCH_T1 (ptr ) __builtin_prefetch(ptr, 0, 2)
514
+ #define PREFETCH_T2 (ptr ) __builtin_prefetch(ptr, 0, 1)
515
+ #define PREFETCH_NTA (ptr ) __builtin_prefetch(ptr, 0, 0)
495
516
#elif defined(_MSC_VER ) && (defined(_M_X64 ) || defined(_M_I86 )) && !defined(_M_ARM64EC )
496
517
#include <mmintrin.h>
497
- #define PREFETCH_L1 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
498
- #define PREFETCH_L2 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
499
- #elif defined(__aarch64__ )
500
- #define PREFETCH_L1 (ptr ) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
501
- #define PREFETCH_L2 (ptr ) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
518
+ #define PREFETCH_T0 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
519
+ #define PREFETCH_T1 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
520
+ #define PREFETCH_T2 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T2)
521
+ #define PREFETCH_NTA (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_NTA)
522
+ #elif defined (__aarch64__ )
523
+ #define PREFETCH_T0 (ptr ) \
524
+ do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
525
+ #define PREFETCH_T1 (ptr ) \
526
+ do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
527
+ #define PREFETCH_T2 (ptr ) \
528
+ do { __asm__ __volatile__("prfm pldl3keep, %0" ::"Q"(*(ptr))); } while (0)
529
+ #define PREFETCH_NTA (ptr ) \
530
+ do { __asm__ __volatile__("prfm pldl1strm, %0" ::"Q"(*(ptr))); } while (0)
502
531
#else
503
- #define PREFETCH_L1 (ptr ) do { (void)(ptr); } while (0) /* disabled */
504
- #define PREFETCH_L2 (ptr ) do { (void)(ptr); } while (0) /* disabled */
532
+ #define PREFETCH_T0 (ptr ) do { (void)(ptr); } while (0) /* disabled */
533
+ #define PREFETCH_T1 (ptr ) do { (void)(ptr); } while (0) /* disabled */
534
+ #define PREFETCH_T2 (ptr ) do { (void)(ptr); } while (0) /* disabled */
535
+ #define PREFETCH_NTA (ptr ) do { (void)(ptr); } while (0) /* disabled */
505
536
#endif
506
537
507
538
#ifdef GC_ENABLE_PREFETCH_INSTRUCTIONS
508
- #define prefetch (ptr ) PREFETCH_L1 (ptr)
539
+ #define prefetch (ptr ) PREFETCH_T1 (ptr)
509
540
#else
510
- #define prefetch (ptr )
541
+ #define prefetch (ptr )
511
542
#endif
512
543
513
544
// a contigous sequence of PyObject pointers, can contain NULLs
0 commit comments