Skip to content

Commit 6f232cc

Browse files
committed
[Clang][RISCV] Handle RVV tuple types correctly as OutputOperand for inline asm
The RVV tuple type maps to an aggregate type with homogeneous scalable vectors. EmitAsmStmt does not handle this correctly and this commit attempts to fix it. Get pass validation check for homogeneous scalable vector types in InlineAsm::verify. Handle RVV tuple types correctly under CGStmt.cpp:EmitAsmStores, since we can allow direct store for the tuple types. A follow-up commit will deal with details when associated with InputOperands.
1 parent 7b8130c commit 6f232cc

File tree

3 files changed

+61
-17
lines changed

3 files changed

+61
-17
lines changed

clang/lib/CodeGen/CGStmt.cpp

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/ADT/SmallSet.h"
3030
#include "llvm/ADT/StringExtras.h"
3131
#include "llvm/IR/Assumptions.h"
32+
#include "llvm/IR/Constants.h"
3233
#include "llvm/IR/DataLayout.h"
3334
#include "llvm/IR/InlineAsm.h"
3435
#include "llvm/IR/Intrinsics.h"
@@ -2399,22 +2400,27 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
23992400
// ResultTypeRequiresCast elements correspond to the first
24002401
// ResultTypeRequiresCast.size() elements of RegResults.
24012402
if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
2402-
unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
2403-
Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
2404-
if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
2405-
Builder.CreateStore(Tmp, A);
2406-
continue;
2407-
}
2403+
if (ResultRegQualTys[i]->isRVVType() && Tmp->getType()->isStructTy()) {
2404+
Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
2405+
Dest = CGF.MakeAddrLValue(A, ResultRegQualTys[i]);
2406+
} else {
2407+
unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
2408+
Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
2409+
if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
2410+
Builder.CreateStore(Tmp, A);
2411+
continue;
2412+
}
24082413

2409-
QualType Ty =
2410-
CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false);
2411-
if (Ty.isNull()) {
2412-
const Expr *OutExpr = S.getOutputExpr(i);
2413-
CGM.getDiags().Report(OutExpr->getExprLoc(),
2414-
diag::err_store_value_to_reg);
2415-
return;
2414+
QualType Ty =
2415+
CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false);
2416+
if (Ty.isNull()) {
2417+
const Expr *OutExpr = S.getOutputExpr(i);
2418+
CGM.getDiags().Report(OutExpr->getExprLoc(),
2419+
diag::err_store_value_to_reg);
2420+
return;
2421+
}
2422+
Dest = CGF.MakeAddrLValue(A, Ty);
24162423
}
2417-
Dest = CGF.MakeAddrLValue(A, Ty);
24182424
}
24192425
CGF.EmitStoreThroughLValue(RValue::get(Tmp), Dest);
24202426
}
@@ -2531,7 +2537,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
25312537
ResultTruncRegTypes.push_back(Ty);
25322538
ResultTypeRequiresCast.push_back(RequiresCast);
25332539

2534-
if (RequiresCast) {
2540+
// Allow RVV tuple type (aggregate of homogeneous scalable vector) to be
2541+
// pushed into return type of inline asm call.
2542+
if (RequiresCast && !(QTy->isRVVType() && Ty->isStructTy())) {
25352543
unsigned Size = getContext().getTypeSize(QTy);
25362544
Ty = llvm::IntegerType::get(getLLVMContext(), Size);
25372545
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
2+
#include <riscv_vector.h>
3+
4+
// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
5+
// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
6+
7+
// CHECK-LABEL: define dso_local void @foo(
8+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
9+
// CHECK-NEXT: entry:
10+
// CHECK-NEXT: [[TMP0:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } asm "#NOP", "=^vr"() #[[ATTR1:[0-9]+]], !srcloc !4
11+
// CHECK-NEXT: ret void
12+
//
13+
void foo() {
14+
vint32m1x2_t v0;
15+
asm ("#NOP" : "=vr" (v0));
16+
}
17+
18+
// CHECK-LABEL: define dso_local void @bar(
19+
// CHECK-SAME: ) #[[ATTR0]] {
20+
// CHECK-NEXT: entry:
21+
// CHECK-NEXT: [[TMP0:%.*]] = call { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } asm "#NOP", "=^vr,=^vr"() #[[ATTR1]], !srcloc !5
22+
// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } [[TMP0]], 0
23+
// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } [[TMP0]], 1
24+
// CHECK-NEXT: ret void
25+
//
26+
void bar() {
27+
vint32m1x2_t v0, v2;
28+
asm ("#NOP" : "=vr" (v0), "=vr" (v2));
29+
}

llvm/lib/IR/InlineAsm.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,15 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) {
321321
return makeStringError("inline asm without outputs must return void");
322322
break;
323323
case 1:
324-
if (Ty->getReturnType()->isStructTy())
325-
return makeStringError("inline asm with one output cannot return struct");
324+
if (Ty->getReturnType()->isStructTy()) {
325+
// The return type may be a structure if the output operand is from RVV
326+
// tuple types. If so the structure must be a structure with homogeneous
327+
// scalable vector types.
328+
if (!cast<StructType>(Ty->getReturnType())
329+
->containsHomogeneousScalableVectorTypes())
330+
return makeStringError(
331+
"inline asm with one output cannot return struct");
332+
}
326333
break;
327334
default:
328335
StructType *STy = dyn_cast<StructType>(Ty->getReturnType());

0 commit comments

Comments
 (0)