Skip to content

Commit e12940b

Browse files
eopXD4vtomat
andcommitted
[Clang][RISCV] Handle RVV tuple types correctly as OutputOperand for inline asm
The RVV tuple type maps to an aggregate type with homogeneous scalable vectors. EmitAsmStmt does not handle this correctly and this commit attempts to fix it. Get pass validation check for homogeneous scalable vector types in InlineAsm::verify. Handle RVV tuple types correctly under CGStmt.cpp:EmitAsmStores, since we can allow direct store for the tuple types. A follow-up commit will deal with details when associated with InputOperands. Co-authored-by: Brandon Wu <[email protected]>
1 parent cc73c5c commit e12940b

File tree

3 files changed

+63
-17
lines changed

3 files changed

+63
-17
lines changed

clang/lib/CodeGen/CGStmt.cpp

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/ADT/SmallSet.h"
3030
#include "llvm/ADT/StringExtras.h"
3131
#include "llvm/IR/Assumptions.h"
32+
#include "llvm/IR/Constants.h"
3233
#include "llvm/IR/DataLayout.h"
3334
#include "llvm/IR/InlineAsm.h"
3435
#include "llvm/IR/Intrinsics.h"
@@ -2487,22 +2488,28 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
24872488
// ResultTypeRequiresCast elements correspond to the first
24882489
// ResultTypeRequiresCast.size() elements of RegResults.
24892490
if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
2490-
unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
2491-
Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
2492-
if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
2493-
Builder.CreateStore(Tmp, A);
2494-
continue;
2495-
}
2491+
if (ResultRegQualTys[i]->isRVVSizelessBuiltinType() &&
2492+
Tmp->getType()->isStructTy()) {
2493+
Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
2494+
Dest = CGF.MakeAddrLValue(A, ResultRegQualTys[i]);
2495+
} else {
2496+
unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
2497+
Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
2498+
if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
2499+
Builder.CreateStore(Tmp, A);
2500+
continue;
2501+
}
24962502

2497-
QualType Ty =
2498-
CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false);
2499-
if (Ty.isNull()) {
2500-
const Expr *OutExpr = S.getOutputExpr(i);
2501-
CGM.getDiags().Report(OutExpr->getExprLoc(),
2502-
diag::err_store_value_to_reg);
2503-
return;
2503+
QualType Ty =
2504+
CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false);
2505+
if (Ty.isNull()) {
2506+
const Expr *OutExpr = S.getOutputExpr(i);
2507+
CGM.getDiags().Report(OutExpr->getExprLoc(),
2508+
diag::err_store_value_to_reg);
2509+
return;
2510+
}
2511+
Dest = CGF.MakeAddrLValue(A, Ty);
25042512
}
2505-
Dest = CGF.MakeAddrLValue(A, Ty);
25062513
}
25072514
CGF.EmitStoreThroughLValue(RValue::get(Tmp), Dest);
25082515
}
@@ -2648,7 +2655,10 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
26482655
ResultTruncRegTypes.push_back(Ty);
26492656
ResultTypeRequiresCast.push_back(RequiresCast);
26502657

2651-
if (RequiresCast) {
2658+
// Allow RVV tuple type (aggregate of homogeneous scalable vector) to be
2659+
// pushed into return type of inline asm call.
2660+
if (RequiresCast &&
2661+
!(QTy->isRVVSizelessBuiltinType() && Ty->isStructTy())) {
26522662
unsigned Size = getContext().getTypeSize(QTy);
26532663
Ty = llvm::IntegerType::get(getLLVMContext(), Size);
26542664
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
2+
#include <riscv_vector.h>
3+
4+
// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
5+
// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
6+
7+
// CHECK-LABEL: define dso_local void @foo(
8+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
9+
// CHECK-NEXT: entry:
10+
// CHECK-NEXT: [[TMP0:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } asm "#NOP", "=^vr"() #[[ATTR1:[0-9]+]], !srcloc [[META6:![0-9]+]]
11+
// CHECK-NEXT: ret void
12+
//
13+
void foo() {
14+
vint32m1x2_t v0;
15+
asm ("#NOP" : "=vr" (v0));
16+
}
17+
18+
// CHECK-LABEL: define dso_local void @bar(
19+
// CHECK-SAME: ) #[[ATTR0]] {
20+
// CHECK-NEXT: entry:
21+
// CHECK-NEXT: [[TMP0:%.*]] = call { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } asm "#NOP", "=^vr,=^vr"() #[[ATTR1]], !srcloc [[META7:![0-9]+]]
22+
// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } [[TMP0]], 0
23+
// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } [[TMP0]], 1
24+
// CHECK-NEXT: ret void
25+
//
26+
void bar() {
27+
vint32m1x2_t v0, v2;
28+
asm ("#NOP" : "=vr" (v0), "=vr" (v2));
29+
}

llvm/lib/IR/InlineAsm.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,15 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) {
321321
return makeStringError("inline asm without outputs must return void");
322322
break;
323323
case 1:
324-
if (Ty->getReturnType()->isStructTy())
325-
return makeStringError("inline asm with one output cannot return struct");
324+
if (Ty->getReturnType()->isStructTy()) {
325+
// The return type may be a structure if the output operand is from RVV
326+
// tuple types. If so the structure must be a structure with homogeneous
327+
// scalable vector types.
328+
if (!cast<StructType>(Ty->getReturnType())
329+
->containsHomogeneousScalableVectorTypes())
330+
return makeStringError(
331+
"inline asm with one output cannot return struct");
332+
}
326333
break;
327334
default:
328335
StructType *STy = dyn_cast<StructType>(Ty->getReturnType());

0 commit comments

Comments
 (0)