|
6 | 6 |
|
7 | 7 | #if defined(IS_X86)
|
8 | 8 | #if defined(_MSC_VER)
|
| 9 | +#include <Windows.h> |
9 | 10 | #include <intrin.h>
|
10 | 11 | #elif defined(__GNUC__)
|
11 | 12 | #include <immintrin.h>
|
|
14 | 15 | #endif
|
15 | 16 | #endif
|
16 | 17 |
|
| 18 | +#if !defined(BLAKE3_ATOMICS) |
| 19 | +#if defined(__has_include) |
| 20 | +#if __has_include(<stdatomic.h>) && !defined(_MSC_VER) |
| 21 | +#define BLAKE3_ATOMICS 1 |
| 22 | +#else |
| 23 | +#define BLAKE3_ATOMICS 0 |
| 24 | +#endif /* __has_include(<stdatomic.h>) && !defined(_MSC_VER) */ |
| 25 | +#else |
| 26 | +#define BLAKE3_ATOMICS 0 |
| 27 | +#endif /* defined(__has_include) */ |
| 28 | +#endif /* BLAKE3_ATOMICS */ |
| 29 | + |
| 30 | +#if BLAKE3_ATOMICS |
| 31 | +#define ATOMIC_INT _Atomic int |
| 32 | +#define ATOMIC_LOAD(x) x |
| 33 | +#define ATOMIC_STORE(x, y) x = y |
| 34 | +#elif defined(_MSC_VER) |
| 35 | +#define ATOMIC_INT LONG |
| 36 | +#define ATOMIC_LOAD(x) InterlockedOr(&x, 0) |
| 37 | +#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y) |
| 38 | +#else |
| 39 | +#define ATOMIC_INT int |
| 40 | +#define ATOMIC_LOAD(x) x |
| 41 | +#define ATOMIC_STORE(x, y) x = y |
| 42 | +#endif |
| 43 | + |
17 | 44 | #define MAYBE_UNUSED(x) (void)((x))
|
18 | 45 |
|
19 | 46 | #if defined(IS_X86)
|
@@ -76,22 +103,24 @@ enum cpu_feature {
|
76 | 103 | #if !defined(BLAKE3_TESTING)
|
77 | 104 | static /* Allow the variable to be controlled manually for testing */
|
78 | 105 | #endif
|
79 |
| - enum cpu_feature g_cpu_features = UNDEFINED; |
| 106 | + ATOMIC_INT g_cpu_features = UNDEFINED; |
80 | 107 |
|
81 | 108 | #if !defined(BLAKE3_TESTING)
|
82 | 109 | static
|
83 | 110 | #endif
|
84 | 111 | enum cpu_feature
|
85 | 112 | get_cpu_features(void) {
|
86 | 113 |
|
87 |
| - if (g_cpu_features != UNDEFINED) { |
88 |
| - return g_cpu_features; |
| 114 | + /* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */ |
| 115 | + enum cpu_feature features = ATOMIC_LOAD(g_cpu_features); |
| 116 | + if (features != UNDEFINED) { |
| 117 | + return features; |
89 | 118 | } else {
|
90 | 119 | #if defined(IS_X86)
|
91 | 120 | uint32_t regs[4] = {0};
|
92 | 121 | uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
|
93 | 122 | (void)edx;
|
94 |
| - enum cpu_feature features = 0; |
| 123 | + features = 0; |
95 | 124 | cpuid(regs, 0);
|
96 | 125 | const int max_id = *eax;
|
97 | 126 | cpuid(regs, 1);
|
@@ -124,7 +153,7 @@ static
|
124 | 153 | }
|
125 | 154 | }
|
126 | 155 | }
|
127 |
| - g_cpu_features = features; |
| 156 | + ATOMIC_STORE(g_cpu_features, features); |
128 | 157 | return features;
|
129 | 158 | #else
|
130 | 159 | /* How to detect NEON? */
|
|
0 commit comments