Skip to content

Commit 195c500

Browse files
committed
[clang][RISCV] Enable struct of homogeneous scalable vector as function argument
Currently llvm support struct as function input, so RISCV tuple type can just use struct of homogeneous scalable vector instead of flatten them.
1 parent 19cab7e commit 195c500

File tree

510 files changed

+242838
-378199
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

510 files changed

+242838
-378199
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 79 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,7 +1531,8 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context,
15311531
case ABIArgInfo::Direct: {
15321532
// FIXME: handle sseregparm someday...
15331533
llvm::StructType *STy = dyn_cast<llvm::StructType>(AI.getCoerceToType());
1534-
if (AI.isDirect() && AI.getCanBeFlattened() && STy) {
1534+
if (AI.isDirect() && AI.getCanBeFlattened() && STy &&
1535+
!STy->containsHomogeneousScalableVectorTypes()) {
15351536
IRArgs.NumberOfArgs = STy->getNumElements();
15361537
} else {
15371538
IRArgs.NumberOfArgs = 1;
@@ -1713,7 +1714,8 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
17131714
// FCAs, so we flatten them if this is safe to do for this argument.
17141715
llvm::Type *argType = ArgInfo.getCoerceToType();
17151716
llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
1716-
if (st && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
1717+
if (st && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened() &&
1718+
!st->containsHomogeneousScalableVectorTypes()) {
17171719
assert(NumIRArgs == st->getNumElements());
17181720
for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
17191721
ArgTypes[FirstIRArg + i] = st->getElementType(i);
@@ -3206,6 +3208,25 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
32063208
}
32073209
}
32083210

3211+
llvm::StructType *STy =
3212+
dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
3213+
llvm::TypeSize StructSize;
3214+
llvm::TypeSize PtrElementSize;
3215+
if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
3216+
STy->getNumElements() > 1) {
3217+
StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
3218+
PtrElementSize =
3219+
CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(Ty));
3220+
if (STy->containsHomogeneousScalableVectorTypes()) {
3221+
assert(StructSize == PtrElementSize &&
3222+
"Only allow non-fractional movement of structure with"
3223+
"homogeneous scalable vector type");
3224+
3225+
ArgVals.push_back(ParamValue::forDirect(AI));
3226+
break;
3227+
}
3228+
}
3229+
32093230
Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg),
32103231
Arg->getName());
32113232

@@ -3214,53 +3235,29 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
32143235

32153236
// Fast-isel and the optimizer generally like scalar values better than
32163237
// FCAs, so we flatten them if this is safe to do for this argument.
3217-
llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
32183238
if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
32193239
STy->getNumElements() > 1) {
3220-
llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
3221-
llvm::TypeSize PtrElementSize =
3222-
CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType());
3223-
if (StructSize.isScalable()) {
3224-
assert(STy->containsHomogeneousScalableVectorTypes() &&
3225-
"ABI only supports structure with homogeneous scalable vector "
3226-
"type");
3227-
assert(StructSize == PtrElementSize &&
3228-
"Only allow non-fractional movement of structure with"
3229-
"homogeneous scalable vector type");
3230-
assert(STy->getNumElements() == NumIRArgs);
3231-
3232-
llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy);
3233-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3234-
auto *AI = Fn->getArg(FirstIRArg + i);
3235-
AI->setName(Arg->getName() + ".coerce" + Twine(i));
3236-
LoadedStructValue =
3237-
Builder.CreateInsertValue(LoadedStructValue, AI, i);
3238-
}
3240+
uint64_t SrcSize = StructSize.getFixedValue();
3241+
uint64_t DstSize = PtrElementSize.getFixedValue();
32393242

3240-
Builder.CreateStore(LoadedStructValue, Ptr);
3243+
Address AddrToStoreInto = Address::invalid();
3244+
if (SrcSize <= DstSize) {
3245+
AddrToStoreInto = Ptr.withElementType(STy);
32413246
} else {
3242-
uint64_t SrcSize = StructSize.getFixedValue();
3243-
uint64_t DstSize = PtrElementSize.getFixedValue();
3244-
3245-
Address AddrToStoreInto = Address::invalid();
3246-
if (SrcSize <= DstSize) {
3247-
AddrToStoreInto = Ptr.withElementType(STy);
3248-
} else {
3249-
AddrToStoreInto =
3250-
CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
3251-
}
3247+
AddrToStoreInto =
3248+
CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
3249+
}
32523250

3253-
assert(STy->getNumElements() == NumIRArgs);
3254-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3255-
auto AI = Fn->getArg(FirstIRArg + i);
3256-
AI->setName(Arg->getName() + ".coerce" + Twine(i));
3257-
Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
3258-
Builder.CreateStore(AI, EltPtr);
3259-
}
3251+
assert(STy->getNumElements() == NumIRArgs);
3252+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3253+
auto AI = Fn->getArg(FirstIRArg + i);
3254+
AI->setName(Arg->getName() + ".coerce" + Twine(i));
3255+
Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
3256+
Builder.CreateStore(AI, EltPtr);
3257+
}
32603258

3261-
if (SrcSize > DstSize) {
3262-
Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
3263-
}
3259+
if (SrcSize > DstSize) {
3260+
Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
32643261
}
32653262
} else {
32663263
// Simple case, just do a coerced store of the argument into the alloca.
@@ -5277,6 +5274,24 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
52775274
break;
52785275
}
52795276

5277+
llvm::StructType *STy =
5278+
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
5279+
llvm::Type *SrcTy = ConvertTypeForMem(I->Ty);
5280+
llvm::TypeSize SrcTypeSize;
5281+
llvm::TypeSize DstTypeSize;
5282+
if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
5283+
SrcTypeSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
5284+
DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
5285+
if (STy->containsHomogeneousScalableVectorTypes()) {
5286+
assert(SrcTypeSize == DstTypeSize &&
5287+
"Only allow non-fractional movement of structure with "
5288+
"homogeneous scalable vector type");
5289+
5290+
IRCallArgs[FirstIRArg] = I->getKnownRValue().getScalarVal();
5291+
break;
5292+
}
5293+
}
5294+
52805295
// FIXME: Avoid the conversion through memory if possible.
52815296
Address Src = Address::invalid();
52825297
if (!I->isAggregate()) {
@@ -5292,54 +5307,30 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
52925307

52935308
// Fast-isel and the optimizer generally like scalar values better than
52945309
// FCAs, so we flatten them if this is safe to do for this argument.
5295-
llvm::StructType *STy =
5296-
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
52975310
if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
5298-
llvm::Type *SrcTy = Src.getElementType();
5299-
llvm::TypeSize SrcTypeSize =
5300-
CGM.getDataLayout().getTypeAllocSize(SrcTy);
5301-
llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
5302-
if (SrcTypeSize.isScalable()) {
5303-
assert(STy->containsHomogeneousScalableVectorTypes() &&
5304-
"ABI only supports structure with homogeneous scalable vector "
5305-
"type");
5306-
assert(SrcTypeSize == DstTypeSize &&
5307-
"Only allow non-fractional movement of structure with "
5308-
"homogeneous scalable vector type");
5309-
assert(NumIRArgs == STy->getNumElements());
5310-
5311-
llvm::Value *StoredStructValue =
5312-
Builder.CreateLoad(Src, Src.getName() + ".tuple");
5313-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5314-
llvm::Value *Extract = Builder.CreateExtractValue(
5315-
StoredStructValue, i, Src.getName() + ".extract" + Twine(i));
5316-
IRCallArgs[FirstIRArg + i] = Extract;
5317-
}
5311+
uint64_t SrcSize = SrcTypeSize.getFixedValue();
5312+
uint64_t DstSize = DstTypeSize.getFixedValue();
5313+
5314+
// If the source type is smaller than the destination type of the
5315+
// coerce-to logic, copy the source value into a temp alloca the size
5316+
// of the destination type to allow loading all of it. The bits past
5317+
// the source value are left undef.
5318+
if (SrcSize < DstSize) {
5319+
Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(),
5320+
Src.getName() + ".coerce");
5321+
Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
5322+
Src = TempAlloca;
53185323
} else {
5319-
uint64_t SrcSize = SrcTypeSize.getFixedValue();
5320-
uint64_t DstSize = DstTypeSize.getFixedValue();
5321-
5322-
// If the source type is smaller than the destination type of the
5323-
// coerce-to logic, copy the source value into a temp alloca the size
5324-
// of the destination type to allow loading all of it. The bits past
5325-
// the source value are left undef.
5326-
if (SrcSize < DstSize) {
5327-
Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(),
5328-
Src.getName() + ".coerce");
5329-
Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
5330-
Src = TempAlloca;
5331-
} else {
5332-
Src = Src.withElementType(STy);
5333-
}
5324+
Src = Src.withElementType(STy);
5325+
}
53345326

5335-
assert(NumIRArgs == STy->getNumElements());
5336-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5337-
Address EltPtr = Builder.CreateStructGEP(Src, i);
5338-
llvm::Value *LI = Builder.CreateLoad(EltPtr);
5339-
if (ArgHasMaybeUndefAttr)
5340-
LI = Builder.CreateFreeze(LI);
5341-
IRCallArgs[FirstIRArg + i] = LI;
5342-
}
5327+
assert(NumIRArgs == STy->getNumElements());
5328+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5329+
Address EltPtr = Builder.CreateStructGEP(Src, i);
5330+
llvm::Value *LI = Builder.CreateLoad(EltPtr);
5331+
if (ArgHasMaybeUndefAttr)
5332+
LI = Builder.CreateFreeze(LI);
5333+
IRCallArgs[FirstIRArg + i] = LI;
53435334
}
53445335
} else {
53455336
// In the simple case, just pass the coerced loaded value.

0 commit comments

Comments
 (0)