Skip to content

[FMV][compiler-rt] Fix cpu features initialization. #95149

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "../cpu_model/aarch64.h"

struct FEATURES {
long long features;
unsigned long long features;
};

extern struct FEATURES __aarch64_cpu_features;
Expand All @@ -23,14 +23,18 @@ extern bool __aarch64_has_sme_and_tpidr2_el0;
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
#endif
__attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {
if (!__aarch64_cpu_features.features)
__init_cpu_features();
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

__init_cpu_features();
}

__attribute__((target("sve"))) long
__arm_get_current_vg(void) __arm_streaming_compatible {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @aemerson @sdesmalen-arm

can this ever get called before __init_cpu_features()?

struct SME_STATE State = __arm_sme_state();
bool HasSVE = __aarch64_cpu_features.features & (1ULL << FEAT_SVE);
unsigned long long features =
__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
bool HasSVE = features & (1ULL << FEAT_SVE);

if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
return 0;
Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/android.inc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

// ifunc resolvers don't have hwcaps in arguments on Android API lower
Expand All @@ -17,7 +17,7 @@ void __init_cpu_features_resolver(unsigned long hwcap,

void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

// Don't set any CPU features,
Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/freebsd.inc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

__init_cpu_features_constructor(hwcap, arg);
Expand All @@ -10,7 +10,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
unsigned long hwcap = 0;
unsigned long hwcap2 = 0;
// CPU features already initialized.
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

int res = 0;
Expand Down
8 changes: 5 additions & 3 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/fuchsia.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <zircon/syscalls.h>

void __init_cpu_features_resolver() {
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

// This ensures the vDSO is a direct link-time dependency of anything that
Expand All @@ -13,8 +13,8 @@ void __init_cpu_features_resolver() {
if (status != ZX_OK)
return;

#define setCPUFeature(cpu_feature) \
__aarch64_cpu_features.features |= 1ULL << cpu_feature
unsigned long long feat = 0;
#define setCPUFeature(cpu_feature) feat |= 1ULL << cpu_feature

if (features & ZX_ARM64_FEATURE_ISA_FP)
setCPUFeature(FEAT_FP);
Expand Down Expand Up @@ -48,4 +48,6 @@ void __init_cpu_features_resolver() {
setCPUFeature(FEAT_SVE);

setCPUFeature(FEAT_INIT);

__atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
}
135 changes: 43 additions & 92 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
#define HAVE_SYS_AUXV_H
#endif



static void __init_cpu_features_constructor(unsigned long hwcap,
const __ifunc_arg_t *arg) {
#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
unsigned long long feat = 0;
#define setCPUFeature(F) feat |= 1ULL << F
#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
#define extractBits(val, start, number) \
(val & ((1ULL << number) - 1ULL) << start) >> start
Expand All @@ -20,26 +19,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_PMULL);
if (hwcap & HWCAP_FLAGM)
setCPUFeature(FEAT_FLAGM);
if (hwcap2 & HWCAP2_FLAGM2) {
setCPUFeature(FEAT_FLAGM);
if (hwcap2 & HWCAP2_FLAGM2)
setCPUFeature(FEAT_FLAGM2);
}
if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
if (hwcap & HWCAP_SM4)
setCPUFeature(FEAT_SM4);
if (hwcap & HWCAP_ASIMDDP)
setCPUFeature(FEAT_DOTPROD);
if (hwcap & HWCAP_ASIMDFHM)
setCPUFeature(FEAT_FP16FML);
if (hwcap & HWCAP_FPHP) {
if (hwcap & HWCAP_FPHP)
setCPUFeature(FEAT_FP16);
setCPUFeature(FEAT_FP);
}
if (hwcap & HWCAP_DIT)
setCPUFeature(FEAT_DIT);
if (hwcap & HWCAP_ASIMDRDM)
setCPUFeature(FEAT_RDM);
if (hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC2);
if (hwcap & HWCAP_AES)
setCPUFeature(FEAT_AES);
if (hwcap & HWCAP_SHA1)
Expand All @@ -52,23 +45,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_FCMA);
if (hwcap & HWCAP_SB)
setCPUFeature(FEAT_SB);
if (hwcap & HWCAP_SSBS)
if (hwcap & HWCAP_SSBS) {
setCPUFeature(FEAT_SSBS);
setCPUFeature(FEAT_SSBS2);
}
if (hwcap2 & HWCAP2_MTE) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
}
if (hwcap2 & HWCAP2_MTE3) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
if (hwcap2 & HWCAP2_MTE3)
setCPUFeature(FEAT_MEMTAG3);
}
if (hwcap2 & HWCAP2_SVEAES)
setCPUFeature(FEAT_SVE_AES);
if (hwcap2 & HWCAP2_SVEPMULL) {
setCPUFeature(FEAT_SVE_AES);
if (hwcap2 & HWCAP2_SVEPMULL)
setCPUFeature(FEAT_SVE_PMULL128);
}
if (hwcap2 & HWCAP2_SVEBITPERM)
setCPUFeature(FEAT_SVE_BITPERM);
if (hwcap2 & HWCAP2_SVESHA3)
Expand Down Expand Up @@ -105,6 +95,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_WFXT);
if (hwcap2 & HWCAP2_SME)
setCPUFeature(FEAT_SME);
if (hwcap2 & HWCAP2_SME2)
setCPUFeature(FEAT_SME2);
if (hwcap2 & HWCAP2_SME_I16I64)
setCPUFeature(FEAT_SME_I64);
if (hwcap2 & HWCAP2_SME_F64F64)
Expand All @@ -113,86 +105,45 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_MOPS);
if (hwcap & HWCAP_CPUID) {
unsigned long ftr;
getCPUFeature(ID_AA64PFR1_EL1, ftr);
// ID_AA64PFR1_EL1.MTE >= 0b0001
if (extractBits(ftr, 8, 4) >= 0x1)
setCPUFeature(FEAT_MEMTAG);
// ID_AA64PFR1_EL1.SSBS == 0b0001
if (extractBits(ftr, 4, 4) == 0x1)
setCPUFeature(FEAT_SSBS);
// ID_AA64PFR1_EL1.SME == 0b0010
if (extractBits(ftr, 24, 4) == 0x2)
setCPUFeature(FEAT_SME2);
getCPUFeature(ID_AA64PFR0_EL1, ftr);
// ID_AA64PFR0_EL1.FP != 0b1111
if (extractBits(ftr, 16, 4) != 0xF) {
setCPUFeature(FEAT_FP);
// ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
setCPUFeature(FEAT_SIMD);
}
// ID_AA64PFR0_EL1.SVE != 0b0000
if (extractBits(ftr, 32, 4) != 0x0) {
// get ID_AA64ZFR0_EL1, that name supported
// if sve enabled only
getCPUFeature(S3_0_C0_C4_4, ftr);
// ID_AA64ZFR0_EL1.SVEver == 0b0000
if (extractBits(ftr, 0, 4) == 0x0)
setCPUFeature(FEAT_SVE);
// ID_AA64ZFR0_EL1.SVEver == 0b0001
if (extractBits(ftr, 0, 4) == 0x1)
setCPUFeature(FEAT_SVE2);
// ID_AA64ZFR0_EL1.BF16 != 0b0000
if (extractBits(ftr, 20, 4) != 0x0)
setCPUFeature(FEAT_SVE_BF16);
}
getCPUFeature(ID_AA64ISAR0_EL1, ftr);
// ID_AA64ISAR0_EL1.SHA3 != 0b0000
if (extractBits(ftr, 32, 4) != 0x0)
setCPUFeature(FEAT_SHA3);

getCPUFeature(ID_AA64ISAR1_EL1, ftr);
// ID_AA64ISAR1_EL1.DPB >= 0b0001
if (extractBits(ftr, 0, 4) >= 0x1)
setCPUFeature(FEAT_DPB);
// ID_AA64ISAR1_EL1.LRCPC != 0b0000
if (extractBits(ftr, 20, 4) != 0x0)
setCPUFeature(FEAT_RCPC);
// ID_AA64ISAR1_EL1.LRCPC == 0b0011
if (extractBits(ftr, 20, 4) == 0x3)
setCPUFeature(FEAT_RCPC3);
// ID_AA64ISAR1_EL1.SPECRES == 0b0001
if (extractBits(ftr, 40, 4) == 0x2)
/* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */
if (extractBits(ftr, 40, 4) >= 0x1)
setCPUFeature(FEAT_PREDRES);
// ID_AA64ISAR1_EL1.BF16 != 0b0000
if (extractBits(ftr, 44, 4) != 0x0)
setCPUFeature(FEAT_BF16);
// ID_AA64ISAR1_EL1.LS64 >= 0b0001
/* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */
if (extractBits(ftr, 60, 4) >= 0x1)
setCPUFeature(FEAT_LS64);
// ID_AA64ISAR1_EL1.LS64 >= 0b0010
/* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */
if (extractBits(ftr, 60, 4) >= 0x2)
setCPUFeature(FEAT_LS64_V);
// ID_AA64ISAR1_EL1.LS64 >= 0b0011
/* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */
if (extractBits(ftr, 60, 4) >= 0x3)
setCPUFeature(FEAT_LS64_ACCDATA);
} else {
// Set some features in case of no CPUID support
if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
setCPUFeature(FEAT_FP);
// FP and AdvSIMD fields have the same value
setCPUFeature(FEAT_SIMD);
}
if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
setCPUFeature(FEAT_DPB);
if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC);
if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
setCPUFeature(FEAT_BF16);
if (hwcap2 & HWCAP2_SVEBF16)
setCPUFeature(FEAT_SVE_BF16);
if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
setCPUFeature(FEAT_SVE2);
if (hwcap & HWCAP_SHA3)
setCPUFeature(FEAT_SHA3);
}
if (hwcap & HWCAP_FP) {
setCPUFeature(FEAT_FP);
// FP and AdvSIMD fields have the same value
setCPUFeature(FEAT_SIMD);
}
if (hwcap & HWCAP_DCPOP)
setCPUFeature(FEAT_DPB);
if (hwcap & HWCAP_LRCPC)
setCPUFeature(FEAT_RCPC);
if (hwcap & HWCAP_ILRCPC)
setCPUFeature(FEAT_RCPC2);
if (hwcap2 & HWCAP2_LRCPC3)
setCPUFeature(FEAT_RCPC3);
if (hwcap2 & HWCAP2_BF16)
setCPUFeature(FEAT_BF16);
if (hwcap2 & HWCAP2_SVEBF16)
setCPUFeature(FEAT_SVE_BF16);
if (hwcap & HWCAP_SVE)
setCPUFeature(FEAT_SVE);
if (hwcap2 & HWCAP2_SVE2)
setCPUFeature(FEAT_SVE2);
if (hwcap & HWCAP_SHA3)
setCPUFeature(FEAT_SHA3);
setCPUFeature(FEAT_INIT);

__atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
}
4 changes: 2 additions & 2 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/sysauxv.inc
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
__init_cpu_features_constructor(hwcap, arg);
}

void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
if (__aarch64_cpu_features.features)
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

unsigned long hwcap = getauxval(AT_HWCAP);
Expand Down
6 changes: 6 additions & 0 deletions compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@
#ifndef HWCAP2_SVE_EBF16
#define HWCAP2_SVE_EBF16 (1ULL << 33)
#endif
#ifndef HWCAP2_SME2
#define HWCAP2_SME2 (1UL << 37)
#endif
#ifndef HWCAP2_MOPS
#define HWCAP2_MOPS (1ULL << 43)
#endif
#ifndef HWCAP2_LRCPC3
#define HWCAP2_LRCPC3 (1UL << 46)
#endif