Skip to content

Commit 3c51051

Browse files
authored
Merge pull request #79 from sx-aurora-dev/feature/vector-to-vregs
[VE][isel] Map EVT vectors to vector registers.
2 parents e6d48f3 + 49d0c61 commit 3c51051

21 files changed

+1387
-3091
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,14 @@ class TargetLoweringBase {
10501050
return Legal;
10511051
}
10521052

1053+
// Use this to bypass the builtin legalization decisions for EVTs. The builtin
1054+
// scheme may lead to undesirable results (eg power-of-two-padding or
1055+
// scalarization) for EVT-typed nodes (eg v7f16).
1056+
virtual Optional<LegalizeKind> getCustomTypeConversion(LLVMContext &Context,
1057+
EVT VT) const {
1058+
return None;
1059+
}
1060+
10531061
/// Return how this operation should be treated: either it is legal, needs to
10541062
/// be promoted to a larger size, needs to be expanded to some other code
10551063
/// sequence, or the target has a custom expander for it.

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,27 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
422422
return Val;
423423
}
424424

425+
// Vector/Vector bitcast.
426+
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
427+
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
428+
429+
if (ValueVT.isScalableVector()) {
430+
assert(PartEVT.getVectorElementCount() ==
431+
ValueVT.getVectorElementCount());
432+
// Promote or truncate.
433+
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
434+
}
435+
436+
// Shorten and promote.
437+
assert(PartEVT.getVectorNumElements() >= ValueVT.getVectorNumElements());
438+
if (PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
439+
EVT ClippedVT =
440+
EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
441+
ValueVT.getVectorNumElements());
442+
Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ClippedVT, Val,
443+
DAG.getVectorIdxConstant(0, DL));
444+
}
445+
425446
// Promoted vector extract
426447
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
427448
}
@@ -611,26 +632,42 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
611632

612633
static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
613634
const SDLoc &DL, EVT PartVT) {
635+
614636
if (!PartVT.isVector())
615637
return SDValue();
616638

617639
EVT ValueVT = Val.getValueType();
618640
ElementCount PartNumElts = PartVT.getVectorElementCount();
619641
ElementCount ValueNumElts = ValueVT.getVectorElementCount();
620642

643+
// Widening a scalable vector to another scalable vector is done by inserting
644+
// the vector into a larger undef one.
645+
if (PartVT.isFixedLengthVector() &&
646+
(PartNumElts.getFixedValue() > ValueNumElts.getFixedValue())) {
647+
// Promote first?
648+
if (PartVT.getVectorElementType() != ValueVT.getVectorElementType()) {
649+
if (PartVT.getVectorElementType().getScalarSizeInBits() <
650+
ValueVT.getVectorElementType().getScalarSizeInBits()) {
651+
return SDValue();
652+
}
653+
654+
// Promote, then extract.
655+
EVT PromotedVT =
656+
EVT::getVectorVT(*DAG.getContext(), PartVT.getVectorElementType(),
657+
ValueVT.getVectorNumElements());
658+
Val = DAG.getAnyExtOrTrunc(Val, DL, PromotedVT);
659+
}
660+
} else if (PartNumElts.isScalable())
661+
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
662+
Val, DAG.getVectorIdxConstant(0, DL));
621663
// We only support widening vectors with equivalent element types and
622664
// fixed/scalable properties. If a target needs to widen a fixed-length type
623665
// to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
624-
if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
666+
else if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
625667
PartNumElts.isScalable() != ValueNumElts.isScalable() ||
626668
PartVT.getVectorElementType() != ValueVT.getVectorElementType())
627669
return SDValue();
628670

629-
// Widening a scalable vector to another scalable vector is done by inserting
630-
// the vector into a larger undef one.
631-
if (PartNumElts.isScalable())
632-
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
633-
Val, DAG.getVectorIdxConstant(0, DL));
634671

635672
EVT ElementVT = PartVT.getVectorElementType();
636673
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,11 @@ void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
956956

957957
TargetLoweringBase::LegalizeKind
958958
TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
959+
// Fully customized legalization.
960+
Optional<LegalizeKind> CustomLK = getCustomTypeConversion(Context, VT);
961+
if (CustomLK)
962+
return *CustomLK;
963+
959964
// If this is a simple type, use the ComputeRegisterProp mechanism.
960965
if (VT.isSimple()) {
961966
MVT SVT = VT.getSimpleVT();

llvm/lib/Target/VE/VECallingConv.td

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ class CCIfNotSubtarget<string F, CCAction A>
3030
class CCIfVPU<CCAction A> : CCIfSubtarget<"enableVPU()",A>;
3131
class CCIfNotVPU<CCAction A> : CCIfNotSubtarget<"enableVPU()",A>;
3232

33+
class CCIfPacked<CCAction A> : CCIfVPU<CCIfSubtarget<"hasPackedMode()",A>>;
34+
class CCIfNotPacked<CCAction A> : CCIfNotSubtarget<"hasPackedMode()",A>;
35+
3336
def CC_VE_C_Stack: CallingConv<[
3437
// F128 are assigned to the stack in 16-byte aligned units
3538
CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>,
@@ -176,24 +179,22 @@ def RetCC_VE_C : CallingConv<[
176179
///// fastcc - fast vreg passing /////
177180
def CC_VE_Fast : CallingConv<[
178181
// Virtual packed registers.
179-
CCIfVPU<CCIfType<[v512f64, v512i64],
182+
CCIfPacked<CCIfType<[v512f64, v512i64],
180183
CCAssignToRegWithShadow<[VP0, VP1, VP2, VP3],
181184
[V0, V2, V4, V6]>>>,
182185

183186
// vector --> generic vector registers
184-
CCIfVPU<CCIfType<[v256i32, v256f32, v256i64, v256f64,
185-
v512i32, v512f32],
187+
CCIfVPU<CCIfType<[v256i32, v256f32, v256i64, v256f64],
186188
CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>>,
187-
188-
CCIfVPU<CCIfType<[v512i32, v512f32],
189+
CCIfPacked<CCIfType<[v512i32, v512f32],
189190
CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>>,
190191

191192
// vector mask --> generic vector mask registers
192193
CCIfVPU<CCIfType<[v256i1],
193194
CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>>,
194195

195196
// pair of vector mask --> generic vector mask registers
196-
CCIfVPU<CCIfType<[v512i1],
197+
CCIfPacked<CCIfType<[v512i1],
197198
CCAssignToRegWithShadow<[VMP1, VMP2, VMP3], [VM1, VM3, VM5]>>>,
198199

199200
// Default to the standard cc
@@ -202,23 +203,23 @@ def CC_VE_Fast : CallingConv<[
202203

203204
def RetCC_VE_Fast : CallingConv<[
204205
// Virtual packed registers.
205-
CCIfVPU<CCIfType<[v512f64, v512i64],
206+
CCIfPacked<CCIfType<[v512f64, v512i64],
206207
CCAssignToRegWithShadow<[VP0, VP1, VP2, VP3],
207208
[V0, V2, V4, V6]>>>,
208209

209210
// vector --> generic vector registers
210211
CCIfVPU<CCIfType<[v256i32, v256f32, v256i64, v256f64],
211212
CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>>,
212213

213-
CCIfVPU<CCIfType<[v512i32, v512f32],
214+
CCIfPacked<CCIfType<[v512i32, v512f32],
214215
CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>>,
215216

216217
// vector mask --> generic vector mask registers
217218
CCIfVPU<CCIfType<[v256i1],
218219
CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>>,
219220

220221
// pair of vector mask --> generic vector mask registers
221-
CCIfVPU<CCIfType<[v512i1],
222+
CCIfPacked<CCIfType<[v512i1],
222223
CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
223224
[VM1, VM3, VM5]>>>,
224225

llvm/lib/Target/VE/VEISelLowering.cpp

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3777,3 +3777,201 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
37773777
return lowerVAARG(Op, DAG);
37783778
}
37793779
}
3780+
3781+
static bool isPackableElemVT(EVT VT) {
3782+
if (VT.isVector())
3783+
return false;
3784+
return VT.getScalarSizeInBits() <= 32;
3785+
}
3786+
3787+
static bool isVectorRegisterVT(EVT VT) {
3788+
if (!VT.isVector() || VT.isScalableVector())
3789+
return false;
3790+
unsigned NumElems = VT.getVectorNumElements();
3791+
EVT ElemVT = VT.getVectorElementType();
3792+
3793+
// Not a legal element count.
3794+
if ((NumElems != 256) && (NumElems != 512))
3795+
return false;
3796+
3797+
// Legal as both regular and packed vectors.
3798+
if (ElemVT == MVT::i1 || ElemVT == MVT::i32 || ElemVT == MVT::f32)
3799+
return true;
3800+
3801+
// Only legal in regular mode.
3802+
return NumElems == 256;
3803+
}
3804+
3805+
static TargetLoweringBase::LegalizeKind
3806+
getPromoteElementConversion(LLVMContext &Context, EVT ElemVT,
3807+
unsigned NumElems) {
3808+
using LegalizeKind = TargetLoweringBase::LegalizeKind;
3809+
using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;
3810+
3811+
LegalizeTypeAction LTA;
3812+
MVT PromotedElemVT;
3813+
if (ElemVT.isFloatingPoint()) {
3814+
PromotedElemVT = MVT::f32;
3815+
LTA = LegalizeTypeAction::TypePromoteFloat;
3816+
} else {
3817+
assert(ElemVT.isInteger());
3818+
PromotedElemVT = MVT::i32;
3819+
LTA = LegalizeTypeAction::TypePromoteInteger;
3820+
}
3821+
return LegalizeKind(LTA, EVT::getVectorVT(Context, PromotedElemVT, NumElems));
3822+
}
3823+
3824+
static TargetLoweringBase::LegalizeKind
3825+
getWidenVectorConversion(LLVMContext &Context, EVT ElemVT,
3826+
unsigned LegalNumElems) {
3827+
using LegalizeKind = TargetLoweringBase::LegalizeKind;
3828+
using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;
3829+
3830+
return LegalizeKind(LegalizeTypeAction::TypeWidenVector,
3831+
EVT::getVectorVT(Context, ElemVT, LegalNumElems));
3832+
}
3833+
3834+
static TargetLoweringBase::LegalizeKind
3835+
getSplitVectorConversion(LLVMContext &Context, EVT ElemVT, unsigned NumElems) {
3836+
using LegalizeKind = TargetLoweringBase::LegalizeKind;
3837+
using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;
3838+
3839+
return LegalizeKind(LegalizeTypeAction::TypeSplitVector,
3840+
EVT::getVectorVT(Context, ElemVT, (NumElems + 1) / 2));
3841+
}
3842+
3843+
Optional<TargetLoweringBase::LegalizeKind>
3844+
VETargetLowering::getCustomTypeConversion(LLVMContext &Context, EVT VT) const {
3845+
// Do not interfere with SPU legalization.
3846+
if (!VT.isVector() || !Subtarget->enableVPU() ||
3847+
VT.getVectorNumElements() == 1)
3848+
return None;
3849+
3850+
EVT ElemVT = VT.getVectorElementType();
3851+
unsigned NumElems = VT.getVectorNumElements();
3852+
auto ElemBits = ElemVT.getScalarSizeInBits();
3853+
3854+
// Only use packed mode when surpassing the regular (256 elements) vector
3855+
// size.
3856+
const bool RequiresPackedRegister =
3857+
isOverPackedType(VT) || (isPackableElemVT(ElemVT) && NumElems > 256);
3858+
3859+
// Already a legal type.
3860+
if (isVectorRegisterVT(VT) &&
3861+
(!RequiresPackedRegister || Subtarget->hasPackedMode()))
3862+
return None;
3863+
3864+
// Promote small elements to i/f32.
3865+
if (1 < ElemBits && ElemBits < 32)
3866+
return getPromoteElementConversion(Context, ElemVT, NumElems);
3867+
3868+
// Excessive element size.
3869+
if (ElemBits > 64)
3870+
return None; // Defer to builtin expansion for oversized vectors.
3871+
3872+
// Only use packed mode when surpassing the regular (256 elements) vector
3873+
// size.
3874+
const bool UsePackedRegister =
3875+
Subtarget->hasPackedMode() && RequiresPackedRegister;
3876+
3877+
// Widen to register width.
3878+
const unsigned RegisterNumElems = UsePackedRegister ? 512 : 256;
3879+
if (NumElems < RegisterNumElems)
3880+
return getWidenVectorConversion(Context, ElemVT, RegisterNumElems);
3881+
3882+
// Split to register width.
3883+
// TODO: Teach isel to split non-power-of-two vectors.
3884+
if (NumElems > RegisterNumElems && (NumElems % 2 == 0))
3885+
return getSplitVectorConversion(Context, ElemVT, NumElems);
3886+
3887+
// Type is either legal or not custom converted.
3888+
return None;
3889+
}
3890+
3891+
Optional<VETargetLowering::RegisterCountPair>
3892+
VETargetLowering::getRegistersForCallingConv(LLVMContext &Context,
3893+
CallingConv::ID CC, EVT VT) const {
3894+
using RegisterCount = VETargetLowering::RegisterCountPair;
3895+
if (CC != CallingConv::Fast)
3896+
return None;
3897+
if (!VT.isVector() || VT.isScalableVector())
3898+
return None;
3899+
3900+
MVT RegisterVT;
3901+
EVT IntermediateVT;
3902+
unsigned NumIntermediates;
3903+
unsigned NumRegs = getVectorTypeBreakdownForCallingConv(
3904+
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
3905+
return RegisterCount{RegisterVT, NumRegs};
3906+
}
3907+
3908+
unsigned VETargetLowering::getVectorTypeBreakdownForCallingConv(
3909+
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
3910+
unsigned &NumIntermediates, MVT &RegisterVT) const {
3911+
auto DefaultImpl = [&]() {
3912+
return TargetLoweringBase::getVectorTypeBreakdownForCallingConv(
3913+
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
3914+
};
3915+
3916+
auto ElemVT = VT.getVectorElementType();
3917+
unsigned NumElems = VT.isScalableVector() ? 0 : VT.getVectorNumElements();
3918+
const bool RequiresPackedRegister =
3919+
!VT.isScalableVector() &&
3920+
(isOverPackedType(VT) || (isPackableElemVT(ElemVT) && NumElems > 256));
3921+
3922+
if (CC != CallingConv::Fast || VT.isScalableVector() ||
3923+
(isVectorRegisterVT(VT) &&
3924+
!(Subtarget->hasPackedMode() && RequiresPackedRegister)))
3925+
return DefaultImpl();
3926+
3927+
// fastcc - map everything to vregs.
3928+
auto LK = getCustomTypeConversion(Context, VT);
3929+
// Non-custom converted type - back to builtin logic.
3930+
if (!LK.hasValue())
3931+
return DefaultImpl();
3932+
3933+
// Compute the fixed point of the custom type conversion rules.
3934+
// We want to have the same vector layout inside functions as well as across
3935+
// function boundaries.
3936+
3937+
// IntermediateVT : used to copy the parts.
3938+
IntermediateVT = VT;
3939+
NumIntermediates = 1;
3940+
3941+
EVT NextVT;
3942+
do {
3943+
NextVT = LK->second;
3944+
auto LTA = LK->first;
3945+
3946+
switch (LTA) {
3947+
default:
3948+
return DefaultImpl();
3949+
3950+
case LegalizeTypeAction::TypePromoteFloat:
3951+
case LegalizeTypeAction::TypePromoteInteger:
3952+
// Promote elements across call boundaries.
3953+
IntermediateVT = NextVT;
3954+
break;
3955+
3956+
case LegalizeTypeAction::TypeWidenVector:
3957+
// Retain all information about the original vector length.
3958+
// That is, keep the IntermediateVT at the original vector length if
3959+
// possible
3960+
break;
3961+
3962+
case LegalizeTypeAction::TypeSplitVector:
3963+
// The last split results in the intermediate VT used for copying vectors
3964+
// at calls.
3965+
IntermediateVT = NextVT;
3966+
NumIntermediates *= 2;
3967+
break;
3968+
}
3969+
3970+
LK = getCustomTypeConversion(Context, NextVT);
3971+
} while (LK.hasValue());
3972+
3973+
RegisterVT = NextVT.getSimpleVT();
3974+
3975+
// Must converge in a valid RegisterVT.
3976+
return NumIntermediates;
3977+
}

0 commit comments

Comments
 (0)