diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 6674aa2409a59..a275c458f70b0 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -2399,22 +2400,27 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S, // ResultTypeRequiresCast elements correspond to the first // ResultTypeRequiresCast.size() elements of RegResults. if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { - unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); - Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]); - if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) { - Builder.CreateStore(Tmp, A); - continue; - } + if (ResultRegQualTys[i]->isRVVType() && Tmp->getType()->isStructTy()) { + Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]); + Dest = CGF.MakeAddrLValue(A, ResultRegQualTys[i]); + } else { + unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]); + Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]); + if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) { + Builder.CreateStore(Tmp, A); + continue; + } - QualType Ty = - CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false); - if (Ty.isNull()) { - const Expr *OutExpr = S.getOutputExpr(i); - CGM.getDiags().Report(OutExpr->getExprLoc(), - diag::err_store_value_to_reg); - return; + QualType Ty = + CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false); + if (Ty.isNull()) { + const Expr *OutExpr = S.getOutputExpr(i); + CGM.getDiags().Report(OutExpr->getExprLoc(), + diag::err_store_value_to_reg); + return; + } + Dest = CGF.MakeAddrLValue(A, Ty); } - Dest = CGF.MakeAddrLValue(A, Ty); } CGF.EmitStoreThroughLValue(RValue::get(Tmp), Dest); } @@ -2531,7 +2537,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { ResultTruncRegTypes.push_back(Ty); ResultTypeRequiresCast.push_back(RequiresCast); - if (RequiresCast) { + // Allow RVV tuple type (aggregate of homogeneous scalable vector) to be + // pushed into return type of inline asm call. + if (RequiresCast && !(QTy->isRVVType() && Ty->isStructTy())) { unsigned Size = getContext().getTypeSize(QTy); Ty = llvm::IntegerType::get(getLLVMContext(), Size); } @@ -2701,10 +2709,40 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::max((uint64_t)LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue()); - ArgTypes.push_back(Arg->getType()); - ArgElemTypes.push_back(ArgElemType); - Args.push_back(Arg); - Constraints += InputConstraint; + // Expand RVV tuple type input operands. + if (InputExpr->getType()->isRVVType() && Arg->getType()->isStructTy()) { + std::string ExpandedInputContraint; + + auto *STy = cast(Arg->getType()); + + assert(STy->containsHomogeneousScalableVectorTypes() && + isa(STy->getElementType(0)) && + "Only aggregate type of homogeneous scalable vectors is handled " + "here"); + + auto *VTy = cast(STy->getElementType(0)); + + for (unsigned Idx = 0, TupleSize = STy->getNumElements(); + Idx != TupleSize; ++Idx) { + if (ExpandedInputContraint.size()) + ExpandedInputContraint += ","; + + ExpandedInputContraint += InputConstraint; + ArgTypes.push_back(VTy); + ArgElemTypes.push_back(ArgElemType); + + llvm::Value *SubVec = Builder.CreateExtractValue(Arg, {Idx}); + + Args.push_back(SubVec); + } + + Constraints += ExpandedInputContraint; + } else { + ArgTypes.push_back(Arg->getType()); + ArgElemTypes.push_back(ArgElemType); + Args.push_back(Arg); + Constraints += InputConstraint; + } } // Append the "input" part of inout constraints. diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c new file mode 100644 index 0000000000000..9dc6fb27de543 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c @@ -0,0 +1,42 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 +#include + +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s + +// CHECK-LABEL: define dso_local void @foo( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { , } asm "#NOP", "=^vr"() #[[ATTR1:[0-9]+]], !srcloc !4 +// CHECK-NEXT: ret void +// +void foo() { + vint32m1x2_t v0; + asm ("#NOP" : "=vr" (v0)); +} + +// CHECK-LABEL: define dso_local void @bar( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { { , }, { , } } asm "#NOP", "=^vr,=^vr"() #[[ATTR1]], !srcloc !5 +// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { { , }, { , } } [[TMP0]], 0 +// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { { , }, { , } } [[TMP0]], 1 +// CHECK-NEXT: ret void +// +void bar() { + vint32m1x2_t v0, v2; + asm ("#NOP" : "=vr" (v0), "=vr" (v2)); +} + +// CHECK-LABEL: define dso_local void @baz( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = extractvalue { , } undef, 0 +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } undef, 1 +// CHECK-NEXT: call void asm sideeffect "#NOP", "^vr,^vr"( [[TMP0]], [[TMP1]]) #[[ATTR2:[0-9]+]], !srcloc !6 +// CHECK-NEXT: ret void +// +void baz() { + vint32m1x2_t v2; + asm ("#NOP" :: "vr" (v2)); +} diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp index aeaa6a3741b94..319678cbb8fe1 100644 --- a/llvm/lib/IR/InlineAsm.cpp +++ b/llvm/lib/IR/InlineAsm.cpp @@ -321,8 +321,15 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) { return makeStringError("inline asm without outputs must return void"); break; case 1: - if (Ty->getReturnType()->isStructTy()) - return makeStringError("inline asm with one output cannot return struct"); + if (Ty->getReturnType()->isStructTy()) { + // The return type may be a structure if the output operand is from RVV + // tuple types. If so the structure must be a structure with homogeneous + // scalable vector types. + if (!cast(Ty->getReturnType()) + ->containsHomogeneousScalableVectorTypes()) + return makeStringError( + "inline asm with one output cannot return struct"); + } break; default: StructType *STy = dyn_cast(Ty->getReturnType());