Skip to content

Commit d185bd9

Browse files
davemgreentstellar
authored andcommitted
[AArch64] Enable vscale_range with +sme (#124466)
If we have +sme but not +sve, we would not set vscale_range on functions. It should be valid to apply it with the same range with just +sme, which can help mitigate some performance regressions in cases such as scalable vector bitcasts (https://godbolt.org/z/exhe4jd8d). (cherry picked from commit 9f1c825)
1 parent dc50bb0 commit d185bd9

File tree

11 files changed

+30
-22
lines changed

11 files changed

+30
-22
lines changed

clang/include/clang/Basic/TargetInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,8 @@ class TargetInfo : public TransferrableTargetInfo,
10231023

10241024
/// Returns target-specific min and max values VScale_Range.
10251025
virtual std::optional<std::pair<unsigned, unsigned>>
1026-
getVScaleRange(const LangOptions &LangOpts) const {
1026+
getVScaleRange(const LangOptions &LangOpts,
1027+
bool IsArmStreamingFunction) const {
10271028
return std::nullopt;
10281029
}
10291030
/// The __builtin_clz* and __builtin_ctz* built-in

clang/lib/AST/ASTContext.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10363,7 +10363,8 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType,
1036310363
/// getRVVTypeSize - Return RVV vector register size.
1036410364
static uint64_t getRVVTypeSize(ASTContext &Context, const BuiltinType *Ty) {
1036510365
assert(Ty->isRVVVLSBuiltinType() && "Invalid RVV Type");
10366-
auto VScale = Context.getTargetInfo().getVScaleRange(Context.getLangOpts());
10366+
auto VScale =
10367+
Context.getTargetInfo().getVScaleRange(Context.getLangOpts(), false);
1036710368
if (!VScale)
1036810369
return 0;
1036910370

clang/lib/AST/ItaniumMangle.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4198,7 +4198,7 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) {
41984198

41994199
// Apend the LMUL suffix.
42004200
auto VScale = getASTContext().getTargetInfo().getVScaleRange(
4201-
getASTContext().getLangOpts());
4201+
getASTContext().getLangOpts(), false);
42024202
unsigned VLen = VScale->first * llvm::RISCV::RVVBitsPerBlock;
42034203

42044204
if (T->getVectorKind() == VectorKind::RVVFixedLengthData) {

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -703,12 +703,13 @@ ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
703703
}
704704

705705
std::optional<std::pair<unsigned, unsigned>>
706-
AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
706+
AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts,
707+
bool IsArmStreamingFunction) const {
707708
if (LangOpts.VScaleMin || LangOpts.VScaleMax)
708709
return std::pair<unsigned, unsigned>(
709710
LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax);
710711

711-
if (hasFeature("sve"))
712+
if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")))
712713
return std::pair<unsigned, unsigned>(1, 16);
713714

714715
return std::nullopt;

clang/lib/Basic/Targets/AArch64.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
184184
ArrayRef<Builtin::Info> getTargetBuiltins() const override;
185185

186186
std::optional<std::pair<unsigned, unsigned>>
187-
getVScaleRange(const LangOptions &LangOpts) const override;
187+
getVScaleRange(const LangOptions &LangOpts,
188+
bool IsArmStreamingFunction) const override;
188189
bool doesFeatureAffectCodeGen(StringRef Name) const override;
189190
bool validateCpuSupports(StringRef FeatureStr) const override;
190191
bool hasFeature(StringRef Feature) const override;

clang/lib/Basic/Targets/RISCV.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
222222
// Currently we support the v1.0 RISC-V V intrinsics.
223223
Builder.defineMacro("__riscv_v_intrinsic", Twine(getVersionValue(1, 0)));
224224

225-
auto VScale = getVScaleRange(Opts);
225+
auto VScale = getVScaleRange(Opts, false);
226226
if (VScale && VScale->first && VScale->first == VScale->second)
227227
Builder.defineMacro("__riscv_v_fixed_vlen",
228228
Twine(VScale->first * llvm::RISCV::RVVBitsPerBlock));
@@ -289,7 +289,8 @@ bool RISCVTargetInfo::initFeatureMap(
289289
}
290290

291291
std::optional<std::pair<unsigned, unsigned>>
292-
RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
292+
RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts,
293+
bool IsArmStreamingFunction) const {
293294
// RISCV::RVVBitsPerBlock is 64.
294295
unsigned VScaleMin = ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock;
295296

clang/lib/Basic/Targets/RISCV.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ class RISCVTargetInfo : public TargetInfo {
9999
const std::vector<std::string> &FeaturesVec) const override;
100100

101101
std::optional<std::pair<unsigned, unsigned>>
102-
getVScaleRange(const LangOptions &LangOpts) const override;
102+
getVScaleRange(const LangOptions &LangOpts,
103+
bool IsArmStreamingFunction) const override;
103104

104105
bool hasFeature(StringRef Feature) const override;
105106

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -551,14 +551,6 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
551551
CurFn->addFnAttr("min-legal-vector-width",
552552
llvm::utostr(LargestVectorWidth));
553553

554-
// Add vscale_range attribute if appropriate.
555-
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
556-
getContext().getTargetInfo().getVScaleRange(getLangOpts());
557-
if (VScaleRange) {
558-
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
559-
getLLVMContext(), VScaleRange->first, VScaleRange->second));
560-
}
561-
562554
// If we generated an unreachable return block, delete it now.
563555
if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) {
564556
Builder.ClearInsertionPoint();
@@ -1110,6 +1102,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
11101102
if (FD && FD->isMain())
11111103
Fn->removeFnAttr("zero-call-used-regs");
11121104

1105+
// Add vscale_range attribute if appropriate.
1106+
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
1107+
getContext().getTargetInfo().getVScaleRange(
1108+
getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false);
1109+
if (VScaleRange) {
1110+
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
1111+
getLLVMContext(), VScaleRange->first, VScaleRange->second));
1112+
}
1113+
11131114
llvm::BasicBlock *EntryBB = createBasicBlock("entry", CurFn);
11141115

11151116
// Create a marker to make it easy to insert allocas into the entryblock

clang/lib/CodeGen/Targets/RISCV.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,8 +367,8 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const {
367367
const auto *VT = Ty->castAs<VectorType>();
368368
assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
369369

370-
auto VScale =
371-
getContext().getTargetInfo().getVScaleRange(getContext().getLangOpts());
370+
auto VScale = getContext().getTargetInfo().getVScaleRange(
371+
getContext().getLangOpts(), false);
372372

373373
unsigned NumElts = VT->getNumElements();
374374
llvm::Type *EltType = llvm::Type::getInt1Ty(getVMContext());

clang/lib/Sema/SemaType.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8491,7 +8491,8 @@ static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType,
84918491
return;
84928492
}
84938493

8494-
auto VScale = S.Context.getTargetInfo().getVScaleRange(S.getLangOpts());
8494+
auto VScale =
8495+
S.Context.getTargetInfo().getVScaleRange(S.getLangOpts(), false);
84958496
if (!VScale || !VScale->first || VScale->first != VScale->second) {
84968497
S.Diag(Attr.getLoc(), diag::err_attribute_riscv_rvv_bits_unsupported)
84978498
<< Attr;

clang/test/CodeGen/AArch64/sme-intrinsics/aarch64-sme-attrs.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,12 +300,12 @@ int test_variadic_template() __arm_inout("za") {
300300
preserves_za_decl);
301301
}
302302

303-
// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
303+
// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
304304
// CHECK: attributes #[[NORMAL_DECL]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
305305
// CHECK: attributes #[[SM_ENABLED_DECL]] = { "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
306306
// CHECK: attributes #[[SM_COMPATIBLE]] = { mustprogress noinline nounwind "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
307307
// CHECK: attributes #[[SM_COMPATIBLE_DECL]] = { "aarch64_pstate_sm_compatible" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
308-
// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
308+
// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind vscale_range(1,16) "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
309309
// CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
310310
// CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_inout_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }
311311
// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_preserves_za" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" }

0 commit comments

Comments
 (0)