-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[Clang][RISCV] Handle RVV tuple types correctly as InputOperands for inline asm #67109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang ChangesThis PR is based on #67018. This PR fixes compilation issue for RVV tuple types as InputOperands for inline asm. Currently the compiler generates https://godbolt.org/z/djebPfqxf for tuple type as inline asm inputs and cannot be code generated successfully https://godbolt.org/z/na7T19Krc. This PR fixes Clang by generating https://godbolt.org/z/MsovoxbY9 instead, which can be successfully handled by the back-end. A follow-up PR will handle interactions of RVV tuple type InputOperands and OutputOperands correctly. Full diff: https://github.com/llvm/llvm-project/pull/67109.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6674aa2409a5947..4a2bdde56c5704e 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -19,6 +19,7 @@
#include "clang/AST/Expr.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/PrettyStackTrace.h"
@@ -29,10 +30,13 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include <optional>
@@ -2392,6 +2396,26 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
} else if (TruncTy->isVectorTy()) {
Tmp = Builder.CreateBitCast(Tmp, TruncTy);
+ } else if (TruncTy->isStructTy() && ResultRegQualTys[i]->isRVVType()) {
+ auto *STy = cast<llvm::StructType>(TruncTy);
+ auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ "Must be dealing with RVV tuple type");
+
+ unsigned MinElts = VTy->getElementCount().getKnownMinValue();
+ llvm::Value *StructValue = llvm::PoisonValue::get(STy);
+
+ for (unsigned Idx = 0, TupleSize = STy->getNumElements();
+ Idx != TupleSize; ++Idx) {
+ llvm::Value *IdxValue =
+ llvm::ConstantInt::get(CGM.Int64Ty, Idx * MinElts);
+ llvm::Value *SubVec = Builder.CreateExtractVector(VTy, Tmp, IdxValue);
+
+ StructValue = Builder.CreateInsertValue(StructValue, SubVec, Idx);
+ }
+
+ Tmp = StructValue;
}
}
@@ -2399,7 +2423,13 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
// ResultTypeRequiresCast elements correspond to the first
// ResultTypeRequiresCast.size() elements of RegResults.
if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
- unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
+ unsigned Size;
+ if (ResultRegQualTys[i]->isRVVType() && TruncTy->isStructTy()) {
+ Size = cast<llvm::ScalableVectorType>(
+ cast<llvm::StructType>(TruncTy)->getElementType(0))
+ ->getScalarSizeInBits();
+ } else
+ Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
Builder.CreateStore(Tmp, A);
@@ -2524,11 +2554,32 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
ResultRegIsFlagReg.push_back(IsFlagReg);
llvm::Type *Ty = ConvertTypeForMem(QTy);
+ ResultTruncRegTypes.push_back(Ty);
+
+ // Expressing the type as a structure in inline asm calls will complicate
+ // the current code case, so instead, the return type is set to be a
+ // single scalable vector, then reconstructed with `vector.extract` and
+ // `insertvalue`. The type is derived here, and the reconstruction is done
+ // under EmitAsmStores.
+ if (QTy->isRVVType() && isa<llvm::StructType>(Ty)) {
+ // Flatten the structure into a single ScalableVectorType
+ auto *STy = cast<llvm::StructType>(Ty);
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ isa<llvm::ScalableVectorType>(STy->getElementType(0)) &&
+ "Dealing with RVV tuple (aggregate with homogeneous scalable "
+ "vectors");
+
+ auto *VecTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+ Ty = llvm::ScalableVectorType::get(VecTy->getScalarType(),
+ STy->getNumElements() *
+ VecTy->getMinNumElements());
+ }
+
const bool RequiresCast = Info.allowsRegister() &&
(getTargetHooks().isScalarizableAsmOperand(*this, Ty) ||
Ty->isAggregateType());
- ResultTruncRegTypes.push_back(Ty);
ResultTypeRequiresCast.push_back(RequiresCast);
if (RequiresCast) {
@@ -2551,6 +2602,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
QualType InputTy = S.getInputExpr(InputNo)->getType();
QualType OutputType = OutExpr->getType();
+ if ((InputTy->isRVVType() &&
+ isa<llvm::StructType>(ConvertType(InputTy))) ||
+ (OutputType->isRVVType() &&
+ isa<llvm::StructType>(ConvertType(OutputType)))) {
+ llvm_unreachable("FIXME: Deal with RVV type matching.");
+ }
+
uint64_t InputSize = getContext().getTypeSize(InputTy);
if (getContext().getTypeSize(OutputType) < InputSize) {
// Form the asm to return the value as a larger integer or fp type.
@@ -2671,6 +2729,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
QualType OutputType = S.getOutputExpr(Output)->getType();
QualType InputTy = InputExpr->getType();
+ if ((InputTy->isRVVType() &&
+ isa<llvm::StructType>(ConvertType(InputTy))) ||
+ (OutputType->isRVVType() &&
+ isa<llvm::StructType>(ConvertType(OutputType)))) {
+ llvm_unreachable("FIXME: Deal with RVV type matching.");
+ }
+
if (getContext().getTypeSize(OutputType) >
getContext().getTypeSize(InputTy)) {
// Use ptrtoint as appropriate so that we can do our extension.
@@ -2701,10 +2766,40 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
std::max((uint64_t)LargestVectorWidth,
VT->getPrimitiveSizeInBits().getKnownMinValue());
- ArgTypes.push_back(Arg->getType());
- ArgElemTypes.push_back(ArgElemType);
- Args.push_back(Arg);
- Constraints += InputConstraint;
+ // Expand RVV tuple type input operands.
+ if (InputExpr->getType()->isRVVType() && Arg->getType()->isStructTy()) {
+ std::string ExpandedInputContraint;
+
+ auto *STy = cast<llvm::StructType>(Arg->getType());
+
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ isa<llvm::ScalableVectorType>(STy->getElementType(0)) &&
+ "Only aggregate type of homogeneous scalable vectors is handled "
+ "here");
+
+ auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+ for (unsigned Idx = 0, TupleSize = STy->getNumElements();
+ Idx != TupleSize; ++Idx) {
+ if (ExpandedInputContraint.size())
+ ExpandedInputContraint += ",";
+
+ ExpandedInputContraint += InputConstraint;
+ ArgTypes.push_back(VTy);
+ ArgElemTypes.push_back(ArgElemType);
+
+ llvm::Value *SubVec = Builder.CreateExtractValue(Arg, {Idx});
+
+ Args.push_back(SubVec);
+ }
+
+ Constraints += ExpandedInputContraint;
+ } else {
+ ArgTypes.push_back(Arg->getType());
+ ArgElemTypes.push_back(ArgElemType);
+ Args.push_back(Arg);
+ Constraints += InputConstraint;
+ }
}
// Append the "input" part of inout constraints.
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
new file mode 100644
index 000000000000000..24f403c6625d0aa
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
@@ -0,0 +1,54 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
+#include <riscv_vector.h>
+
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
+// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @foo(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> asm "#NOP", "=^vr"() #[[ATTR2:[0-9]+]], !srcloc !4
+// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0
+// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 2)
+// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1
+// CHECK-NEXT: ret void
+//
+void foo() {
+ vint32m1x2_t v0;
+ asm ("#NOP" : "=vr" (v0));
+}
+
+// CHECK-LABEL: define dso_local void @bar(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } asm "#NOP", "=^vr,=^vr"() #[[ATTR2]], !srcloc !5
+// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 0)
+// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0
+// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 2)
+// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1
+// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 0)
+// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP5]], 0
+// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 2)
+// CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP6]], <vscale x 2 x i32> [[TMP7]], 1
+// CHECK-NEXT: ret void
+//
+void bar() {
+ vint32m1x2_t v0, v2;
+ asm ("#NOP" : "=vr" (v0), "=vr" (v2));
+}
+
+// CHECK-LABEL: define dso_local void @baz(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, 0
+// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, 1
+// CHECK-NEXT: call void asm sideeffect "#NOP", "^vr,^vr"(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i32> [[TMP1]]) #[[ATTR3:[0-9]+]], !srcloc !6
+// CHECK-NEXT: ret void
+//
+void baz() {
+ vint32m1x2_t v2;
+ asm ("#NOP" :: "vr" (v2));
+}
|
38de145
to
bce63fa
Compare
…inline asm The RVV tuple type maps to an aggregate type with homogeneous scalable vectors. EmitAsmStmt does not handle this correctly and this commit attempts to fix it. Get pass validation check for homogeneous scalable vector types in InlineAsm::verify. Handle RVV tuple types correctly under CGStmt.cpp:EmitAsmStores, since we can allow direct store for the tuple types. A follow-up commit will deal with details when associated with InputOperands.
…V tuple type The generated LLVM cannot be successfully handled because function argument of tuple type is an aggregate of scalable vectors. It needs to be flattened into separate arguments.
…nline asm This commit flatten the input operand from an aggregate structure into separate arguments for RVV tuple types.
bce63fa
to
46dbc84
Compare
Change: Rebase upon change of parent PR. |
This PR is based on #67018. This PR fixes compilation issue for RVV tuple types as InputOperands for inline asm.
Currently the compiler generates https://godbolt.org/z/djebPfqxf for tuple type as inline asm inputs and cannot be code generated successfully https://godbolt.org/z/na7T19Krc. This PR fixes Clang by generating https://godbolt.org/z/MsovoxbY9 instead, which can be successfully handled by the back-end.
Tied operands of RVV tuple type InputOperands and OutputOperands are not handled yet.