|
| 1 | +#define WIN32_LEAN_AND_MEAN |
| 2 | +#include <windows.h> |
| 3 | +#include <processthreadsapi.h> |
| 4 | +#include <stdint.h> |
| 5 | + |
| 6 | +#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE |
| 7 | +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 |
| 8 | +#endif |
| 9 | +#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE |
| 10 | +#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44 |
| 11 | +#endif |
| 12 | +#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE |
| 13 | +#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 |
| 14 | +#endif |
| 15 | +#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE |
| 16 | +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46 |
| 17 | +#endif |
| 18 | +#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE |
| 19 | +#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 |
| 20 | +#endif |
| 21 | +#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE |
| 22 | +#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50 |
| 23 | +#endif |
| 24 | +#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE |
| 25 | +#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55 |
| 26 | +#endif |
| 27 | +#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE |
| 28 | +#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56 |
| 29 | +#endif |
| 30 | +#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE |
| 31 | +#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57 |
| 32 | +#endif |
| 33 | +#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE |
| 34 | +#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58 |
| 35 | +#endif |
| 36 | +#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE |
| 37 | +#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59 |
| 38 | +#endif |
| 39 | + |
| 40 | +void __init_cpu_features_resolver(unsigned long hwcap, |
| 41 | + const __ifunc_arg_t *arg) {} |
| 42 | + |
| 43 | +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { |
| 44 | + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) |
| 45 | + return; |
| 46 | + |
| 47 | +#define setCPUFeature(F) features |= 1ULL << F |
| 48 | + |
| 49 | + uint64_t features = 0; |
| 50 | + |
| 51 | + setCPUFeature(FEAT_INIT); |
| 52 | + setCPUFeature(FEAT_FP); |
| 53 | + |
| 54 | + // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent |
| 55 | + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { |
| 56 | + setCPUFeature(FEAT_SHA2); |
| 57 | + setCPUFeature(FEAT_PMULL); |
| 58 | + } |
| 59 | + |
| 60 | + static const struct ProcessFeatureToFeatMap_t { |
| 61 | + int WinApiFeature; |
| 62 | + enum CPUFeatures CPUFeature; |
| 63 | + } FeatMap[] = { |
| 64 | + {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC}, |
| 65 | + {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE}, |
| 66 | + {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD}, |
| 67 | + {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT}, |
| 68 | + {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC}, |
| 69 | + {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE}, |
| 70 | + {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2}, |
| 71 | + {PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE, FEAT_SVE_PMULL128}, |
| 72 | + {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3}, |
| 73 | + {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4}, |
| 74 | + {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM}, |
| 75 | + {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM}, |
| 76 | + // There is no I8MM flag, but when SVE_I8MM is available, I8MM is too. |
| 77 | + {PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM}, |
| 78 | + }; |
| 79 | + |
| 80 | + for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I) |
| 81 | + if (IsProcessorFeaturePresent(FeatMap[I].WinApiFeature)) |
| 82 | + setCPUFeature(FeatMap[I].CPUFeature); |
| 83 | + |
| 84 | + __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED); |
| 85 | +} |
0 commit comments