[Clang][RISCV] Handle RVV tuple types correctly as OutputOperands for inline asm #67018

eopXD · 2023-09-21T14:14:36Z

This PR fixes compilation issue for RVV tuple types as OutputOperands for inline asm.

The RVV tuple type maps to an aggregate type with homogeneous scalable
vectors. EmitAsmStmt does not handle this correctly and this commit
attempts to fix it.

Expressing the type as a structure in inline asm calls will complicate
the current code base, so instead, the return type is set to be
a single scalable vector, then reconstructed with vector.extract and
insertvalue.

A follow-up PR will handle InputOperands correctly.

llvmbot · 2023-09-21T14:15:40Z

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Changes

This commit fixes compilation issue for RVV tuple types as OutputOperand for inline asm. The LLVM IR generated by the test case added is currently not handled successfully in the backend, which also needs a fix too.

Full diff: https://github.com/llvm/llvm-project/pull/67018.diff

2 Files Affected:

(modified) clang/lib/CodeGen/CGStmt.cpp (+68-2)
(added) clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c (+41)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6674aa2409a5947..0dd79903802f328 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -19,6 +19,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/DiagnosticSema.h"
 #include "clang/Basic/PrettyStackTrace.h"
@@ -29,10 +30,13 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include <optional>
 
@@ -2392,6 +2396,27 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
         Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
       } else if (TruncTy->isVectorTy()) {
         Tmp = Builder.CreateBitCast(Tmp, TruncTy);
+      } else if (TruncTy->isStructTy()) {
+        auto *STy = cast<llvm::StructType>(TruncTy);
+        auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+        assert(ResultRegQualTys[i]->isRVVType() &&
+               STy->containsHomogeneousScalableVectorTypes() &&
+               "Must be dealing with RVV tuple type");
+
+        unsigned MinElts = VTy->getElementCount().getKnownMinValue();
+        llvm::Value *StructValue = llvm::PoisonValue::get(STy);
+
+        for (unsigned Idx = 0, TupleSize = STy->getNumElements();
+             Idx != TupleSize; ++Idx) {
+          llvm::Value *IdxValue =
+              llvm::ConstantInt::get(CGM.Int64Ty, Idx * MinElts);
+          llvm::Value *SubVec = Builder.CreateExtractVector(VTy, Tmp, IdxValue);
+
+          StructValue = Builder.CreateInsertValue(StructValue, SubVec, Idx);
+        }
+
+        Tmp = StructValue;
       }
     }
 
@@ -2399,7 +2424,13 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
     // ResultTypeRequiresCast elements correspond to the first
     // ResultTypeRequiresCast.size() elements of RegResults.
     if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
-      unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
+      unsigned Size;
+      if (ResultRegQualTys[i]->isRVVType() && TruncTy->isStructTy()) {
+        Size = cast<llvm::ScalableVectorType>(
+                   cast<llvm::StructType>(TruncTy)->getElementType(0))
+                   ->getScalarSizeInBits();
+      } else
+        Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
       Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
       if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
         Builder.CreateStore(Tmp, A);
@@ -2524,11 +2555,32 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
       ResultRegIsFlagReg.push_back(IsFlagReg);
 
       llvm::Type *Ty = ConvertTypeForMem(QTy);
+      ResultTruncRegTypes.push_back(Ty);
+
+      // Expressing the type as a structure in inline asm calls will complicate
+      // the current code case, so instead, the return type is set to be a
+      // single scalable vector, then reconstructed with `vector.extract` and
+      // `insertvalue`. The type is derived here, and the reconstruction is done
+      // under EmitAsmStores.
+      if (QTy->isRVVType() && isa<llvm::StructType>(Ty)) {
+        // Flatten the structure into a single ScalableVectorType
+        auto *STy = cast<llvm::StructType>(Ty);
+        assert(STy->containsHomogeneousScalableVectorTypes() &&
+               isa<llvm::ScalableVectorType>(STy->getElementType(0)) &&
+               "Dealing with RVV tuple (aggregate with homogeneous scalable "
+               "vectors");
+
+        auto *VecTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+        Ty = llvm::ScalableVectorType::get(VecTy->getScalarType(),
+                                           STy->getNumElements() *
+                                               VecTy->getMinNumElements());
+      }
+
       const bool RequiresCast = Info.allowsRegister() &&
           (getTargetHooks().isScalarizableAsmOperand(*this, Ty) ||
            Ty->isAggregateType());
 
-      ResultTruncRegTypes.push_back(Ty);
       ResultTypeRequiresCast.push_back(RequiresCast);
 
       if (RequiresCast) {
@@ -2551,6 +2603,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
         QualType InputTy = S.getInputExpr(InputNo)->getType();
         QualType OutputType = OutExpr->getType();
 
+        if ((InputTy->isRVVType() &&
+             isa<llvm::StructType>(ConvertType(InputTy))) ||
+            (OutputType->isRVVType() &&
+             isa<llvm::StructType>(ConvertType(OutputType)))) {
+          llvm_unreachable("FIXME: Deal with RVV type matching.");
+        }
+
         uint64_t InputSize = getContext().getTypeSize(InputTy);
         if (getContext().getTypeSize(OutputType) < InputSize) {
           // Form the asm to return the value as a larger integer or fp type.
@@ -2671,6 +2730,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
       QualType OutputType = S.getOutputExpr(Output)->getType();
       QualType InputTy = InputExpr->getType();
 
+      if ((InputTy->isRVVType() &&
+           isa<llvm::StructType>(ConvertType(InputTy))) ||
+          (OutputType->isRVVType() &&
+           isa<llvm::StructType>(ConvertType(OutputType)))) {
+        llvm_unreachable("FIXME: Deal with RVV type matching.");
+      }
+
       if (getContext().getTypeSize(OutputType) >
           getContext().getTypeSize(InputTy)) {
         // Use ptrtoint as appropriate so that we can do our extension.
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
new file mode 100644
index 000000000000000..cad4f8ed5dcbd48
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
@@ -0,0 +1,41 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
+#include <riscv_vector.h>
+
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @foo(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> asm "#NOP", "=^vr"() #[[ATTR2:[0-9]+]], !srcloc !4
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1
+// CHECK-NEXT:    ret void
+//
+void foo() {
+  vint32m1x2_t v0;
+  asm ("#NOP" : "=vr" (v0));
+}
+
+// CHECK-LABEL: define dso_local void @bar(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } asm "#NOP", "=^vr,=^vr"() #[[ATTR2]], !srcloc !5
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 2)
+// CHECK-NEXT:    [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 0)
+// CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP5]], 0
+// CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 2)
+// CHECK-NEXT:    [[TMP8:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP6]], <vscale x 2 x i32> [[TMP7]], 1
+// CHECK-NEXT:    ret void
+//
+void bar() {
+  vint32m1x2_t v0, v2;
+  asm ("#NOP" : "=vr" (v0), "=vr" (v2));
+}

topperc · 2023-09-23T04:15:55Z

Does this mean the backend register allocation will pick a large LMUL register the same size as the whole tuple and force the register to be overly aligned? For example an lmul=1 seg2 tuple can use v0+v1, or v1+v2, or v2+v3, etc. But lmul=2 can only use v0+v1, v2+v3, v4+v5, etc.

eopXD · 2023-09-23T14:26:25Z

Does this mean the backend register allocation will pick a large LMUL register the same size as the whole tuple and force the register to be overly aligned? For example an lmul=1 seg2 tuple can use v0+v1, or v1+v2, or v2+v3, etc. But lmul=2 can only use v0+v1, v2+v3, v4+v5, etc.

Yes you are correct. The current approach will set restrictions since we will allocate the registers to be the multiplier of the LMUL in the back end. Let me try and work through how I can break them up.

clang/lib/CodeGen/CGStmt.cpp

…inline asm The RVV tuple type maps to an aggregate type with homogeneous scalable vectors. EmitAsmStmt does not handle this correctly and this commit attempts to fix it. Get pass validation check for homogeneous scalable vector types in InlineAsm::verify. Handle RVV tuple types correctly under CGStmt.cpp:EmitAsmStores, since we can allow direct store for the tuple types. A follow-up commit will deal with details when associated with InputOperands.

eopXD · 2023-09-25T06:35:17Z

Change: Updated the approach to get pass inline asm verifications and handle them correctly under EmitAsmStores.

eopXD requested review from nikic, topperc, paulwalker-arm and a team September 21, 2023 14:14

llvmbot added clang Clang issues not falling into any other category clang:codegen IR generation bugs: mangling, exceptions, etc. labels Sep 21, 2023

eopXD force-pushed the clang-rvv-tuple-inline-output-operand branch from 1e02945 to 565785d Compare September 21, 2023 14:15

eopXD force-pushed the clang-rvv-tuple-inline-output-operand branch from 565785d to a26eb9f Compare September 21, 2023 15:46

eopXD changed the title ~~[Clang][RISCV] Handle RVV tuple types correctly as OutputOperand for inline asm~~ [Clang][RISCV] Handle RVV tuple types correctly as OutputOperands for inline asm Sep 22, 2023

eopXD mentioned this pull request Sep 22, 2023

[Clang][RISCV] Handle RVV tuple types correctly as InputOperands for inline asm #67109

Open

eopXD force-pushed the clang-rvv-tuple-inline-output-operand branch from a26eb9f to dc77e5c Compare September 22, 2023 10:23

kito-cheng reviewed Sep 24, 2023

View reviewed changes

clang/lib/CodeGen/CGStmt.cpp Outdated Show resolved Hide resolved

eopXD force-pushed the clang-rvv-tuple-inline-output-operand branch from dc77e5c to 6f232cc Compare September 25, 2023 06:30

llvmbot added the llvm:ir label Sep 25, 2023

nikic removed their request for review September 25, 2023 19:11

eopXD requested a review from kito-cheng September 26, 2023 00:38

4vtomat mentioned this pull request Apr 24, 2024

[Clang][RISCV] Handle RVV tuple types correctly as InputOperand/OutputOperand for inline asm #89883

Closed

paulwalker-arm removed their request for review September 5, 2024 15:41

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Clang][RISCV] Handle RVV tuple types correctly as OutputOperands for inline asm #67018

[Clang][RISCV] Handle RVV tuple types correctly as OutputOperands for inline asm #67018

Uh oh!

eopXD commented Sep 21, 2023 •

edited

Loading

Uh oh!

llvmbot commented Sep 21, 2023 •

edited

Loading

Uh oh!

topperc commented Sep 23, 2023

Uh oh!

eopXD commented Sep 23, 2023

Uh oh!

Uh oh!

eopXD commented Sep 25, 2023

Uh oh!

Uh oh!

[Clang][RISCV] Handle RVV tuple types correctly as OutputOperands for inline asm #67018

Are you sure you want to change the base?

[Clang][RISCV] Handle RVV tuple types correctly as OutputOperands for inline asm #67018

Uh oh!

Conversation

eopXD commented Sep 21, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 21, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc commented Sep 23, 2023

Uh oh!

eopXD commented Sep 23, 2023

Uh oh!

Uh oh!

eopXD commented Sep 25, 2023

Uh oh!

Uh oh!

eopXD commented Sep 21, 2023 •

edited

Loading

llvmbot commented Sep 21, 2023 •

edited

Loading