-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Clang][SME] Detect always_inline used with mismatched streaming attributes #77936
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-support @llvm/pr-subscribers-clang Author: Sam Tebbs (SamTebbs33) ChangesThis patch adds an error that is emitted when a streaming function is marked as always_inline and is called from a non-streaming function. Full diff: https://github.com/llvm/llvm-project/pull/77936.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 568000106a84dc..dbd92b600a936e 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -279,6 +279,8 @@ def err_builtin_needs_feature : Error<"%0 needs target feature %1">;
def err_function_needs_feature : Error<
"always_inline function %1 requires target feature '%2', but would "
"be inlined into function %0 that is compiled without support for '%2'">;
+def err_function_alwaysinline_attribute_mismatch : Error<
+ "always_inline function %1 and its caller %0 have mismatched %2 attributes">;
def warn_avx_calling_convention
: Warning<"AVX vector %select{return|argument}0 of type %1 without '%2' "
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 4c464a1ae4c67f..0fed60103c9a2c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13803,8 +13803,17 @@ class Sema final {
FormatArgumentPassingKind ArgPassingKind;
};
+enum ArmStreamingType {
+ ArmNonStreaming,
+ ArmStreaming,
+ ArmStreamingCompatible,
+ ArmStreamingOrSVE2p1
+};
+
+
static bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
bool IsVariadic, FormatStringInfo *FSI);
+ static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD);
private:
void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index 52216d93a302bb..03a6f2f1d7a9d2 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -151,4 +151,5 @@ add_clang_library(clangCodeGen
clangFrontend
clangLex
clangSerialization
+ clangSema
)
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 7102d190fe008b..ea3d5a97605f1c 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -8,6 +8,8 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
+#include "clang/Basic/DiagnosticFrontend.h"
+#include "clang/Sema/Sema.h"
using namespace clang;
using namespace clang::CodeGen;
@@ -153,6 +155,11 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
}
return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
}
+
+ void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
+ const FunctionDecl *Caller,
+ const FunctionDecl *Callee,
+ const CallArgList &Args) const override;
};
class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
@@ -812,6 +819,19 @@ Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
/*allowHigherAlign*/ false);
}
+void AArch64TargetCodeGenInfo::checkFunctionCallABI(
+ CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
+ const FunctionDecl *Callee, const CallArgList &Args) const {
+ if (!Callee->hasAttr<AlwaysInlineAttr>())
+ return;
+
+ auto CalleeIsStreaming = Sema::getArmStreamingFnType(Callee) == Sema::ArmStreaming;
+ auto CallerIsStreaming = Sema::getArmStreamingFnType(Caller) == Sema::ArmStreaming;
+
+ if (CalleeIsStreaming && !CallerIsStreaming)
+ CGM.getDiags().Report(CallLoc, diag::err_function_alwaysinline_attribute_mismatch) << Caller->getDeclName() << Callee->getDeclName() << "streaming";
+}
+
std::unique_ptr<TargetCodeGenInfo>
CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
AArch64ABIKind Kind) {
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 74f8f626fb1637..160637dde448e4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2998,13 +2998,6 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context,
llvm_unreachable("Invalid NeonTypeFlag!");
}
-enum ArmStreamingType {
- ArmNonStreaming,
- ArmStreaming,
- ArmStreamingCompatible,
- ArmStreamingOrSVE2p1
-};
-
bool Sema::ParseSVEImmChecks(
CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
// Perform all the immediate checks for this builtin call.
@@ -3145,7 +3138,7 @@ bool Sema::ParseSVEImmChecks(
return HasError;
}
-static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
+Sema::ArmStreamingType Sema::getArmStreamingFnType(const FunctionDecl *FD) {
if (FD->hasAttr<ArmLocallyStreamingAttr>())
return ArmStreaming;
if (const auto *T = FD->getType()->getAs<FunctionProtoType>()) {
@@ -3159,31 +3152,31 @@ static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
const FunctionDecl *FD,
- ArmStreamingType BuiltinType) {
- ArmStreamingType FnType = getArmStreamingFnType(FD);
- if (BuiltinType == ArmStreamingOrSVE2p1) {
+ Sema::ArmStreamingType BuiltinType) {
+ Sema::ArmStreamingType FnType = Sema::getArmStreamingFnType(FD);
+ if (BuiltinType == Sema::ArmStreamingOrSVE2p1) {
// Check intrinsics that are available in [sve2p1 or sme/sme2].
llvm::StringMap<bool> CallerFeatureMap;
S.Context.getFunctionFeatureMap(CallerFeatureMap, FD);
if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap))
- BuiltinType = ArmStreamingCompatible;
+ BuiltinType = Sema::ArmStreamingCompatible;
else
- BuiltinType = ArmStreaming;
+ BuiltinType = Sema::ArmStreaming;
}
- if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) {
+ if (FnType == Sema::ArmStreaming && BuiltinType == Sema::ArmNonStreaming) {
S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
<< TheCall->getSourceRange() << "streaming";
}
- if (FnType == ArmStreamingCompatible &&
- BuiltinType != ArmStreamingCompatible) {
+ if (FnType == Sema::ArmStreamingCompatible &&
+ BuiltinType != Sema::ArmStreamingCompatible) {
S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
<< TheCall->getSourceRange() << "streaming compatible";
return;
}
- if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+ if (FnType == Sema::ArmNonStreaming && BuiltinType == Sema::ArmStreaming) {
S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
<< TheCall->getSourceRange() << "non-streaming";
}
diff --git a/clang/test/CodeGen/aarch64-sme-func-attrs-inline-locally-streaming.c b/clang/test/CodeGen/aarch64-sme-func-attrs-inline-locally-streaming.c
new file mode 100644
index 00000000000000..4aa9fbf4a8fa18
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-func-attrs-inline-locally-streaming.c
@@ -0,0 +1,12 @@
+// RUN: %clang --target=aarch64-none-linux-gnu -march=armv9-a+sme -O3 -S -Xclang -verify %s
+
+// Conflicting attributes when using always_inline
+__attribute__((always_inline)) __arm_locally_streaming
+int inlined_fn_local(void) {
+ return 42;
+}
+// expected-error@+1 {{always_inline function 'inlined_fn_local' and its caller 'inlined_fn_caller' have mismatched streaming attributes}}
+int inlined_fn_caller(void) { return inlined_fn_local(); }
+__arm_locally_streaming
+int inlined_fn_caller_local(void) { return inlined_fn_local(); }
+int inlined_fn_caller_streaming(void) __arm_streaming { return inlined_fn_local(); }
diff --git a/clang/test/CodeGen/aarch64-sme-func-attrs-inline-streaming.c b/clang/test/CodeGen/aarch64-sme-func-attrs-inline-streaming.c
new file mode 100644
index 00000000000000..7268a49bb2491d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-func-attrs-inline-streaming.c
@@ -0,0 +1,12 @@
+// RUN: %clang --target=aarch64-none-linux-gnu -march=armv9-a+sme -O3 -S -Xclang -verify %s
+
+// Conflicting attributes when using always_inline
+__attribute__((always_inline))
+int inlined_fn_streaming(void) __arm_streaming {
+ return 42;
+}
+// expected-error@+1 {{always_inline function 'inlined_fn_streaming' and its caller 'inlined_fn_caller' have mismatched streaming attributes}}
+int inlined_fn_caller(void) { return inlined_fn_streaming(); }
+__arm_locally_streaming
+int inlined_fn_caller_local(void) { return inlined_fn_streaming(); }
+int inlined_fn_caller_streaming(void) __arm_streaming { return inlined_fn_streaming(); }
|
@llvm/pr-subscribers-backend-aarch64 Author: Sam Tebbs (SamTebbs33) ChangesThis patch adds an error that is emitted when a streaming function is marked as always_inline and is called from a non-streaming function. Full diff: https://github.com/llvm/llvm-project/pull/77936.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 568000106a84dc..dbd92b600a936e 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -279,6 +279,8 @@ def err_builtin_needs_feature : Error<"%0 needs target feature %1">;
def err_function_needs_feature : Error<
"always_inline function %1 requires target feature '%2', but would "
"be inlined into function %0 that is compiled without support for '%2'">;
+def err_function_alwaysinline_attribute_mismatch : Error<
+ "always_inline function %1 and its caller %0 have mismatched %2 attributes">;
def warn_avx_calling_convention
: Warning<"AVX vector %select{return|argument}0 of type %1 without '%2' "
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 4c464a1ae4c67f..0fed60103c9a2c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13803,8 +13803,17 @@ class Sema final {
FormatArgumentPassingKind ArgPassingKind;
};
+enum ArmStreamingType {
+ ArmNonStreaming,
+ ArmStreaming,
+ ArmStreamingCompatible,
+ ArmStreamingOrSVE2p1
+};
+
+
static bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
bool IsVariadic, FormatStringInfo *FSI);
+ static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD);
private:
void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index 52216d93a302bb..03a6f2f1d7a9d2 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -151,4 +151,5 @@ add_clang_library(clangCodeGen
clangFrontend
clangLex
clangSerialization
+ clangSema
)
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 7102d190fe008b..ea3d5a97605f1c 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -8,6 +8,8 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
+#include "clang/Basic/DiagnosticFrontend.h"
+#include "clang/Sema/Sema.h"
using namespace clang;
using namespace clang::CodeGen;
@@ -153,6 +155,11 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
}
return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
}
+
+ void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
+ const FunctionDecl *Caller,
+ const FunctionDecl *Callee,
+ const CallArgList &Args) const override;
};
class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
@@ -812,6 +819,19 @@ Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
/*allowHigherAlign*/ false);
}
+void AArch64TargetCodeGenInfo::checkFunctionCallABI(
+ CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
+ const FunctionDecl *Callee, const CallArgList &Args) const {
+ if (!Callee->hasAttr<AlwaysInlineAttr>())
+ return;
+
+ auto CalleeIsStreaming = Sema::getArmStreamingFnType(Callee) == Sema::ArmStreaming;
+ auto CallerIsStreaming = Sema::getArmStreamingFnType(Caller) == Sema::ArmStreaming;
+
+ if (CalleeIsStreaming && !CallerIsStreaming)
+ CGM.getDiags().Report(CallLoc, diag::err_function_alwaysinline_attribute_mismatch) << Caller->getDeclName() << Callee->getDeclName() << "streaming";
+}
+
std::unique_ptr<TargetCodeGenInfo>
CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
AArch64ABIKind Kind) {
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 74f8f626fb1637..160637dde448e4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2998,13 +2998,6 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context,
llvm_unreachable("Invalid NeonTypeFlag!");
}
-enum ArmStreamingType {
- ArmNonStreaming,
- ArmStreaming,
- ArmStreamingCompatible,
- ArmStreamingOrSVE2p1
-};
-
bool Sema::ParseSVEImmChecks(
CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
// Perform all the immediate checks for this builtin call.
@@ -3145,7 +3138,7 @@ bool Sema::ParseSVEImmChecks(
return HasError;
}
-static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
+Sema::ArmStreamingType Sema::getArmStreamingFnType(const FunctionDecl *FD) {
if (FD->hasAttr<ArmLocallyStreamingAttr>())
return ArmStreaming;
if (const auto *T = FD->getType()->getAs<FunctionProtoType>()) {
@@ -3159,31 +3152,31 @@ static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
const FunctionDecl *FD,
- ArmStreamingType BuiltinType) {
- ArmStreamingType FnType = getArmStreamingFnType(FD);
- if (BuiltinType == ArmStreamingOrSVE2p1) {
+ Sema::ArmStreamingType BuiltinType) {
+ Sema::ArmStreamingType FnType = Sema::getArmStreamingFnType(FD);
+ if (BuiltinType == Sema::ArmStreamingOrSVE2p1) {
// Check intrinsics that are available in [sve2p1 or sme/sme2].
llvm::StringMap<bool> CallerFeatureMap;
S.Context.getFunctionFeatureMap(CallerFeatureMap, FD);
if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap))
- BuiltinType = ArmStreamingCompatible;
+ BuiltinType = Sema::ArmStreamingCompatible;
else
- BuiltinType = ArmStreaming;
+ BuiltinType = Sema::ArmStreaming;
}
- if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) {
+ if (FnType == Sema::ArmStreaming && BuiltinType == Sema::ArmNonStreaming) {
S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
<< TheCall->getSourceRange() << "streaming";
}
- if (FnType == ArmStreamingCompatible &&
- BuiltinType != ArmStreamingCompatible) {
+ if (FnType == Sema::ArmStreamingCompatible &&
+ BuiltinType != Sema::ArmStreamingCompatible) {
S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
<< TheCall->getSourceRange() << "streaming compatible";
return;
}
- if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+ if (FnType == Sema::ArmNonStreaming && BuiltinType == Sema::ArmStreaming) {
S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
<< TheCall->getSourceRange() << "non-streaming";
}
diff --git a/clang/test/CodeGen/aarch64-sme-func-attrs-inline-locally-streaming.c b/clang/test/CodeGen/aarch64-sme-func-attrs-inline-locally-streaming.c
new file mode 100644
index 00000000000000..4aa9fbf4a8fa18
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-func-attrs-inline-locally-streaming.c
@@ -0,0 +1,12 @@
+// RUN: %clang --target=aarch64-none-linux-gnu -march=armv9-a+sme -O3 -S -Xclang -verify %s
+
+// Conflicting attributes when using always_inline
+__attribute__((always_inline)) __arm_locally_streaming
+int inlined_fn_local(void) {
+ return 42;
+}
+// expected-error@+1 {{always_inline function 'inlined_fn_local' and its caller 'inlined_fn_caller' have mismatched streaming attributes}}
+int inlined_fn_caller(void) { return inlined_fn_local(); }
+__arm_locally_streaming
+int inlined_fn_caller_local(void) { return inlined_fn_local(); }
+int inlined_fn_caller_streaming(void) __arm_streaming { return inlined_fn_local(); }
diff --git a/clang/test/CodeGen/aarch64-sme-func-attrs-inline-streaming.c b/clang/test/CodeGen/aarch64-sme-func-attrs-inline-streaming.c
new file mode 100644
index 00000000000000..7268a49bb2491d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-func-attrs-inline-streaming.c
@@ -0,0 +1,12 @@
+// RUN: %clang --target=aarch64-none-linux-gnu -march=armv9-a+sme -O3 -S -Xclang -verify %s
+
+// Conflicting attributes when using always_inline
+__attribute__((always_inline))
+int inlined_fn_streaming(void) __arm_streaming {
+ return 42;
+}
+// expected-error@+1 {{always_inline function 'inlined_fn_streaming' and its caller 'inlined_fn_caller' have mismatched streaming attributes}}
+int inlined_fn_caller(void) { return inlined_fn_streaming(); }
+__arm_locally_streaming
+int inlined_fn_caller_local(void) { return inlined_fn_streaming(); }
+int inlined_fn_caller_streaming(void) __arm_streaming { return inlined_fn_streaming(); }
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
clang/test/CodeGen/aarch64-sme-func-attrs-inline-locally-streaming.c
Outdated
Show resolved
Hide resolved
|
||
// The caller can inline the callee if their streaming modes match or the | ||
// callee is streaming compatible | ||
if (CalleeStreamingMode != CallerStreamingMode && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you use the llvm::AArch64::SMEAttrs class and reuse some of the same functionality as we have in AArch64TTIImpl::areInlineCompatible
(ignoring the hasPossibleIncompatibleOps
part).
That way, we can also check for ZA, e.g.
__attribute__((always_inline)) __arm_new("za")
void bar(void) { }
void foo() { bar(); }
here bar
cannot be inlined because that would require making foo
__arm_new("za")
as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks for the idea.
clang/lib/Sema/SemaChecking.cpp
Outdated
@@ -3145,7 +3138,7 @@ bool Sema::ParseSVEImmChecks( | |||
return HasError; | |||
} | |||
|
|||
static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { | |||
Sema::ArmStreamingType Sema::getArmStreamingFnType(const FunctionDecl *FD) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this function is worth changing the build configuration to add a dependence of clang::Sema to clang::CodeGen :)
You can just inline (duplicate) the functionality here directly into your function in AArch64TargetCodeGenInfo::checkFunctionCallABI
and map the attributes directly to the llvm::AArch64::SMEAttrs
class.
That way you can also avoid needing the ArmStreamingType
enum.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
|
||
// The caller can inline the callee if their streaming modes match or the | ||
// callee is streaming compatible | ||
if (CalleeStreamingMode != CallerStreamingMode && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is valid to inline Callees with ArmStreaming or ArmStreamingCompatible into Callers with the __arm_locally_streaming keyword? What happens currently when you try this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is indeed valid to inline a streaming function into a locally streaming function since at the inline point streaming is guaranteed to be on.
There's no general rule that forbids taking the address of an always_inline function. So if a user really wants to, they can call a mismatched always_inline function anyway. Given that, making this a hard error seems a bit dubious; it should probably be a warning instead. |
…ibutes This patch adds an error that is emitted when a streaming function is marked as always_inline and is called from a non-streaming function.
90a6654
to
b38b7d5
Compare
FWIW, the GNU documentation on
Clang and GCC emit an error for e.g. mismatching target attributes: https://godbolt.org/z/ddThn67sa |
clang has never emitted diagnostics for failure to inline an always_inline function. But if gcc is doing it, maybe defaulting to an error isn't such a big deal. Separately, it's probably worth ensuring that the LLVM inlining passes don't actually perform illegal inlining for functions marked always_inline; looking at the code, it looks like we might end up skipping the relevant checks. |
clang does actually emit an error when it finds always_inline on a callee with different target attributes to the caller, as Sander said above.
The |
if (CalleeAttrs.hasNewZABody()) | ||
CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za) | ||
<< Callee->getDeclName(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is missing a check to see if the call requires the lazy-save mechanism as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is, thanks!
if (!Callee->hasAttr<AlwaysInlineAttr>()) | ||
return; | ||
|
||
auto GetSMEAttrs = [](const FunctionDecl *F) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
auto GetSMEAttrs = [](const FunctionDecl *F) { | |
auto GetSMEAttrs = [](const FunctionDecl *F) -> llvm::SMEAttrs { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems wrong to move the header files to llvm/include/llvm/Support, but to leave the implementation in llvm/lib/AArch64 (which ends up in a different library). If you move the corresponding .cpp file to llvm/lib/Support then you don't need to link with AArch64Utils
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed. I tried moving it to Support but that introduced a dependency cycle, so I re-implemented some of the SMEAttributes logic in-place instead.
I think that Eli's point is that at the moment |
SMEAttributes CalleeAttrs(Callee); | ||
SMEAttributes CallerAttrs(Caller); | ||
|
||
if (CallerAttrs.requiresSMChange(CalleeAttrs, true)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can write this code without mimicking the (LLVM) SMEAttributes class, which makes the code a lot simpler.
For example, for the streaming-mode diagnostic you could do something like this:
bool CallerIsStreaming = Caller->hasAttr<ArmLocallyStreamingAttr>() || (CallerAttrs & SME_PStateSMEnabledMask);
bool CalleeIsStreaming = ...
bool CallerIsStreamingCompatible = !CallerIsStreaming && (CallerAttrs & SME_PStateSMCompatibleMask);
bool CalleeIsStreamingCompatible = ...
if (!CalleeIsStreamingCompatible)
if (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible))
// emit diagnostic
#include "llvm/CodeGen/CallingConvLower.h" | ||
#include "llvm/CodeGen/MachineFunction.h" | ||
#include "llvm/CodeGen/SelectionDAG.h" | ||
#include "llvm/CodeGen/TargetLowering.h" | ||
#include "llvm/IR/CallingConv.h" | ||
#include "llvm/IR/Instruction.h" | ||
#include "llvm/Support/AArch64SMEAttributes.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change is no longer necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These changes are no longer necessary?
clang/lib/CodeGen/CMakeLists.txt
Outdated
@@ -31,6 +31,7 @@ set(LLVM_LINK_COMPONENTS | |||
Target | |||
TargetParser | |||
TransformUtils | |||
AArch64Utils |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change is no longer necessary?
@@ -279,6 +279,12 @@ def err_builtin_needs_feature : Error<"%0 needs target feature %1">; | |||
def err_function_needs_feature : Error< | |||
"always_inline function %1 requires target feature '%2', but would " | |||
"be inlined into function %0 that is compiled without support for '%2'">; | |||
def err_function_always_inline_attribute_mismatch : Error< | |||
"always_inline function %1 and its caller %0 have mismatched %2 attributes">; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"always_inline function %1 and its caller %0 have mismatched %2 attributes">; | |
"always_inline function %1 and its caller %0 have mismatching %2 attributes">; |
} | ||
|
||
bool hasNewZABody() { return HasNewZA; } | ||
bool requiresLazySave() const { return HasNewZA; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That doesn't look right. A lazy-save is required when the caller has ZA state and the callee does not share ZA state and is a Private-ZA function.
That said, I think that we might be too restrictive (here and in LLVM) to not allow inlining of functions that would otherwise require a lazy-save.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I've now removed the lazy save checking to loosen that restriction.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
return; | ||
|
||
SMEAttributes CalleeAttrs(Callee); | ||
SMEAttributes CallerAttrs(Caller); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Caller or Callee might be Null?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Caller
could actually be null here but based on where this function is called in CGCall
, Callee
can't. I've added a null check on Caller
. Thanks!
// RUN: %clang --target=aarch64-none-linux-gnu -march=armv9-a+sme -O3 -S -Xclang -verify %s | ||
|
||
// Conflicting attributes when using always_inline | ||
__attribute__((always_inline)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: To simplify the test, could you do something like #define __ai __attribute__((always_inline))
and then add that in front of the return type?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
@@ -814,6 +820,49 @@ Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, | |||
/*allowHigherAlign*/ false); | |||
} | |||
|
|||
class SMEAttributes { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd like to avoid adding another SMEAttributes class
. Could you write some specific functions instead? i.e.
static bool isStreaming(const FunctionDecl *F) { ... }
static bool isStreamingCompatible(const FunctionDecl *F) { ... }
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
if (auto *NewAttr = F->getAttr<ArmNewAttr>()) { | ||
if (NewAttr->isNewZA()) | ||
HasNewZA = true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please inline this directly where it's used on line 861.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
@@ -0,0 +1,6 @@ | |||
// RUN: %clang --target=aarch64-none-linux-gnu -march=armv9-a+sme -O3 -S -Xclang -verify %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you merge all these tests into a single file?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. I originally didn't as the frontend bails out once it's printed all the errors for one function (-ferror-limit doesn't help here), but using macros to only test one caller function at a time mitigated this.
@@ -0,0 +1,7 @@ | |||
// RUN: %clang --target=aarch64-none-linux-gnu -march=armv9-a+sme -O3 -S -Xclang -verify %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can remove -O3
because clang doesn't do any codegen when it has -verify
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
def err_function_always_inline_lazy_save : Error< | ||
"inlining always_inline function %0 into %1 would require a lazy za save">; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one seems unused, please remove it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for spotting that still being there.
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -target-feature +sme -verify -DTEST_NONE -x c %s | ||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -target-feature +sme -verify -DTEST_COMPATIBLE -x c %s | ||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -target-feature +sme -verify -DTEST_STREAMING -x c %s | ||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -target-feature +sme -verify -DTEST_LOCALLY -x c %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you use -fsyntax-only
then you don't need to use the #define
's.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately the part of the code that checks this isn't run under -fsyntax-only. It's the same for the existing target feature checks with always_inline: https://godbolt.org/z/frrbeah7d
void AArch64TargetCodeGenInfo::checkFunctionCallABI( | ||
CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, | ||
const FunctionDecl *Callee, const CallArgList &Args) const { | ||
if (!Caller || !Callee->hasAttr<AlwaysInlineAttr>()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: is it worth adding || !Callee
here as well? (without knowing the context of how this function is called, I guess you could argue that Callee
could also be nullptr
)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From looking at where checkFunctionCallABI
is called, it's not possible for Callee
to be null, but I'm happy to add a check just in case that changes.
llvm-project/clang/lib/CodeGen/CGCall.cpp
Line 5022 in f7c2e5f
CGM.getTargetCodeGenInfo().checkFunctionCallABI( |
return T->getAArch64SMEAttributes() & | ||
FunctionType::SME_PStateSMCompatibleMask; | ||
return false; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@sdesmalen-arm WDYT about making these two free functions into members of FunctionDecl? Seems like copies of these helpers are proliferating.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it makes sense to create a single interface for these and remove any duplicates!
I think we'll want to have two interfaces:
- One for QualType for the type attributes (declared in AST/Type.h?)
- One for FunctionDecl that also checks the declaration attributes (declared in AST/Decl.h?)
I'm happy for it to be done as part of this patch or as a follow-up to keep this patch simpler.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good to me! I'd prefer to do it in a follow-up patch.
return T->getAArch64SMEAttributes() & | ||
FunctionType::SME_PStateSMCompatibleMask; | ||
return false; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it makes sense to create a single interface for these and remove any duplicates!
I think we'll want to have two interfaces:
- One for QualType for the type attributes (declared in AST/Type.h?)
- One for FunctionDecl that also checks the declaration attributes (declared in AST/Decl.h?)
I'm happy for it to be done as part of this patch or as a follow-up to keep this patch simpler.
PR llvm#77936 introduced a diagnostic to avoid calls being inlined into functions with a different streaming mode, because inlining those functions may result in different runtime behaviour. This was necessary because LLVM doesn't check whether inlining is possible and thus blindly inlines the function without checking feasibility. In practice however, this introduces an artificial restriction that the user may not be able to work around. Calling an `always_inline` function from some header file that is out of the control of the user would result in an error that the user cannot remedy. Therefore, this patch demotes the error into a warning (for calls from streaming[-compatible] -> non-streaming), but the proper fix would be to fix the AlwaysInliner in LLVM to avoid inlining when it has analyzed the callee and has determined that inlining is not possible. Calling an always_inline function for calls from non-streaming -> streaming will remain an error, because there is little pre-existing code for SME, so it is expected that the header file can be modified by the user (e.g. by using `__arm_streaming_compatible` if the code is claimed to be compatible).
…rs (#100740) PR #77936 introduced a diagnostic to avoid calls being inlined into functions with a different streaming mode, because inlining those functions may result in different runtime behaviour. This was necessary because LLVM doesn't check whether inlining is possible and thus blindly inlines the function without checking feasibility. In practice however, this introduces an artificial restriction that the user may not be able to work around. Calling an `always_inline` function from some header file that is out of the control of the user would result in an error that the user cannot remedy. Therefore, this patch demotes the error into a warning (for calls from streaming[-compatible] -> non-streaming), but the proper fix would be to fix the AlwaysInliner in LLVM to avoid inlining when it has analyzed the callee and has determined that inlining is not possible. Calling an always_inline function for calls from non-streaming -> streaming will remain an error, because there is little pre-existing code for SME, so it is expected that the header file can be modified by the user (e.g. by using `__arm_streaming_compatible` if the code is claimed to be compatible).
…rs (llvm#100740) PR llvm#77936 introduced a diagnostic to avoid calls being inlined into functions with a different streaming mode, because inlining those functions may result in different runtime behaviour. This was necessary because LLVM doesn't check whether inlining is possible and thus blindly inlines the function without checking feasibility. In practice however, this introduces an artificial restriction that the user may not be able to work around. Calling an `always_inline` function from some header file that is out of the control of the user would result in an error that the user cannot remedy. Therefore, this patch demotes the error into a warning (for calls from streaming[-compatible] -> non-streaming), but the proper fix would be to fix the AlwaysInliner in LLVM to avoid inlining when it has analyzed the callee and has determined that inlining is not possible. Calling an always_inline function for calls from non-streaming -> streaming will remain an error, because there is little pre-existing code for SME, so it is expected that the header file can be modified by the user (e.g. by using `__arm_streaming_compatible` if the code is claimed to be compatible). (cherry picked from commit 5430f73)
This patch adds an error that is emitted when a streaming function is marked as always_inline and is called from a non-streaming function.