Skip to content

[VE][isel] Map EVT vectors to vector registers. #79

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,14 @@ class TargetLoweringBase {
return Legal;
}

// Use this to bypass the builtin legalization decisions for EVTs. The builtin
// scheme may lead to undesirable results (eg power-of-two-padding or
// scalarization) for EVT-typed nodes (eg v7f16).
virtual Optional<LegalizeKind> getCustomTypeConversion(LLVMContext &Context,
EVT VT) const {
return None;
}

/// Return how this operation should be treated: either it is legal, needs to
/// be promoted to a larger size, needs to be expanded to some other code
/// sequence, or the target has a custom expander for it.
Expand Down
49 changes: 43 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,27 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
return Val;
}

// Vector/Vector bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

if (ValueVT.isScalableVector()) {
assert(PartEVT.getVectorElementCount() ==
ValueVT.getVectorElementCount());
// Promote or truncate.
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
}

// Shorten and promote.
assert(PartEVT.getVectorNumElements() >= ValueVT.getVectorNumElements());
if (PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
EVT ClippedVT =
EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
ValueVT.getVectorNumElements());
Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ClippedVT, Val,
DAG.getVectorIdxConstant(0, DL));
}

// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
}
Expand Down Expand Up @@ -611,26 +632,42 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,

static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
const SDLoc &DL, EVT PartVT) {

if (!PartVT.isVector())
return SDValue();

EVT ValueVT = Val.getValueType();
ElementCount PartNumElts = PartVT.getVectorElementCount();
ElementCount ValueNumElts = ValueVT.getVectorElementCount();

// Widening a scalable vector to another scalable vector is done by inserting
// the vector into a larger undef one.
if (PartVT.isFixedLengthVector() &&
(PartNumElts.getFixedValue() > ValueNumElts.getFixedValue())) {
// Promote first?
if (PartVT.getVectorElementType() != ValueVT.getVectorElementType()) {
if (PartVT.getVectorElementType().getScalarSizeInBits() <
ValueVT.getVectorElementType().getScalarSizeInBits()) {
return SDValue();
}

// Promote, then extract.
EVT PromotedVT =
EVT::getVectorVT(*DAG.getContext(), PartVT.getVectorElementType(),
ValueVT.getVectorNumElements());
Val = DAG.getAnyExtOrTrunc(Val, DL, PromotedVT);
}
} else if (PartNumElts.isScalable())
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
Val, DAG.getVectorIdxConstant(0, DL));
// We only support widening vectors with equivalent element types and
// fixed/scalable properties. If a target needs to widen a fixed-length type
// to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
else if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
PartNumElts.isScalable() != ValueNumElts.isScalable() ||
PartVT.getVectorElementType() != ValueVT.getVectorElementType())
return SDValue();

// Widening a scalable vector to another scalable vector is done by inserting
// the vector into a larger undef one.
if (PartNumElts.isScalable())
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
Val, DAG.getVectorIdxConstant(0, DL));

EVT ElementVT = PartVT.getVectorElementType();
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,11 @@ void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {

TargetLoweringBase::LegalizeKind
TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// Fully customized legalization.
Optional<LegalizeKind> CustomLK = getCustomTypeConversion(Context, VT);
if (CustomLK)
return *CustomLK;

// If this is a simple type, use the ComputeRegisterProp mechanism.
if (VT.isSimple()) {
MVT SVT = VT.getSimpleVT();
Expand Down
19 changes: 10 additions & 9 deletions llvm/lib/Target/VE/VECallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ class CCIfNotSubtarget<string F, CCAction A>
class CCIfVPU<CCAction A> : CCIfSubtarget<"enableVPU()",A>;
class CCIfNotVPU<CCAction A> : CCIfNotSubtarget<"enableVPU()",A>;

class CCIfPacked<CCAction A> : CCIfVPU<CCIfSubtarget<"hasPackedMode()",A>>;
class CCIfNotPacked<CCAction A> : CCIfNotSubtarget<"hasPackedMode()",A>;

def CC_VE_C_Stack: CallingConv<[
// F128 are assigned to the stack in 16-byte aligned units
CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>,
Expand Down Expand Up @@ -176,24 +179,22 @@ def RetCC_VE_C : CallingConv<[
///// fastcc - fast vreg passing /////
def CC_VE_Fast : CallingConv<[
// Virtual packed registers.
CCIfVPU<CCIfType<[v512f64, v512i64],
CCIfPacked<CCIfType<[v512f64, v512i64],
CCAssignToRegWithShadow<[VP0, VP1, VP2, VP3],
[V0, V2, V4, V6]>>>,

// vector --> generic vector registers
CCIfVPU<CCIfType<[v256i32, v256f32, v256i64, v256f64,
v512i32, v512f32],
CCIfVPU<CCIfType<[v256i32, v256f32, v256i64, v256f64],
CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>>,

CCIfVPU<CCIfType<[v512i32, v512f32],
CCIfPacked<CCIfType<[v512i32, v512f32],
CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>>,

// vector mask --> generic vector mask registers
CCIfVPU<CCIfType<[v256i1],
CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>>,

// pair of vector mask --> generic vector mask registers
CCIfVPU<CCIfType<[v512i1],
CCIfPacked<CCIfType<[v512i1],
CCAssignToRegWithShadow<[VMP1, VMP2, VMP3], [VM1, VM3, VM5]>>>,

// Default to the standard cc
Expand All @@ -202,23 +203,23 @@ def CC_VE_Fast : CallingConv<[

def RetCC_VE_Fast : CallingConv<[
// Virtual packed registers.
CCIfVPU<CCIfType<[v512f64, v512i64],
CCIfPacked<CCIfType<[v512f64, v512i64],
CCAssignToRegWithShadow<[VP0, VP1, VP2, VP3],
[V0, V2, V4, V6]>>>,

// vector --> generic vector registers
CCIfVPU<CCIfType<[v256i32, v256f32, v256i64, v256f64],
CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>>,

CCIfVPU<CCIfType<[v512i32, v512f32],
CCIfPacked<CCIfType<[v512i32, v512f32],
CCAssignToReg<[V0, V1, V2, V3, V4, V5, V6, V7]>>>,

// vector mask --> generic vector mask registers
CCIfVPU<CCIfType<[v256i1],
CCAssignToReg<[VM1, VM2, VM3, VM4, VM5, VM6, VM7]>>>,

// pair of vector mask --> generic vector mask registers
CCIfVPU<CCIfType<[v512i1],
CCIfPacked<CCIfType<[v512i1],
CCAssignToRegWithShadow<[VMP1, VMP2, VMP3],
[VM1, VM3, VM5]>>>,

Expand Down
198 changes: 198 additions & 0 deletions llvm/lib/Target/VE/VEISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3777,3 +3777,201 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerVAARG(Op, DAG);
}
}

static bool isPackableElemVT(EVT VT) {
if (VT.isVector())
return false;
return VT.getScalarSizeInBits() <= 32;
}

static bool isVectorRegisterVT(EVT VT) {
if (!VT.isVector() || VT.isScalableVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
EVT ElemVT = VT.getVectorElementType();

// Not a legal element count.
if ((NumElems != 256) && (NumElems != 512))
return false;

// Legal as both regular and packed vectors.
if (ElemVT == MVT::i1 || ElemVT == MVT::i32 || ElemVT == MVT::f32)
return true;

// Only legal in regular mode.
return NumElems == 256;
}

static TargetLoweringBase::LegalizeKind
getPromoteElementConversion(LLVMContext &Context, EVT ElemVT,
unsigned NumElems) {
using LegalizeKind = TargetLoweringBase::LegalizeKind;
using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;

LegalizeTypeAction LTA;
MVT PromotedElemVT;
if (ElemVT.isFloatingPoint()) {
PromotedElemVT = MVT::f32;
LTA = LegalizeTypeAction::TypePromoteFloat;
} else {
assert(ElemVT.isInteger());
PromotedElemVT = MVT::i32;
LTA = LegalizeTypeAction::TypePromoteInteger;
}
return LegalizeKind(LTA, EVT::getVectorVT(Context, PromotedElemVT, NumElems));
}

static TargetLoweringBase::LegalizeKind
getWidenVectorConversion(LLVMContext &Context, EVT ElemVT,
unsigned LegalNumElems) {
using LegalizeKind = TargetLoweringBase::LegalizeKind;
using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;

return LegalizeKind(LegalizeTypeAction::TypeWidenVector,
EVT::getVectorVT(Context, ElemVT, LegalNumElems));
}

static TargetLoweringBase::LegalizeKind
getSplitVectorConversion(LLVMContext &Context, EVT ElemVT, unsigned NumElems) {
using LegalizeKind = TargetLoweringBase::LegalizeKind;
using LegalizeTypeAction = TargetLoweringBase::LegalizeTypeAction;

return LegalizeKind(LegalizeTypeAction::TypeSplitVector,
EVT::getVectorVT(Context, ElemVT, (NumElems + 1) / 2));
}

Optional<TargetLoweringBase::LegalizeKind>
VETargetLowering::getCustomTypeConversion(LLVMContext &Context, EVT VT) const {
// Do not interfere with SPU legalization.
if (!VT.isVector() || !Subtarget->enableVPU() ||
VT.getVectorNumElements() == 1)
return None;

EVT ElemVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
auto ElemBits = ElemVT.getScalarSizeInBits();

// Only use packed mode when surpassing the regular (256 elements) vector
// size.
const bool RequiresPackedRegister =
isOverPackedType(VT) || (isPackableElemVT(ElemVT) && NumElems > 256);

// Already a legal type.
if (isVectorRegisterVT(VT) &&
(!RequiresPackedRegister || Subtarget->hasPackedMode()))
return None;

// Promote small elements to i/f32.
if (1 < ElemBits && ElemBits < 32)
return getPromoteElementConversion(Context, ElemVT, NumElems);

// Excessive element size.
if (ElemBits > 64)
return None; // Defer to builtin expansion for oversized vectors.

// Only use packed mode when surpassing the regular (256 elements) vector
// size.
const bool UsePackedRegister =
Subtarget->hasPackedMode() && RequiresPackedRegister;

// Widen to register width.
const unsigned RegisterNumElems = UsePackedRegister ? 512 : 256;
if (NumElems < RegisterNumElems)
return getWidenVectorConversion(Context, ElemVT, RegisterNumElems);

// Split to register width.
// TODO: Teach isel to split non-power-of-two vectors.
if (NumElems > RegisterNumElems && (NumElems % 2 == 0))
return getSplitVectorConversion(Context, ElemVT, NumElems);

// Type is either legal or not custom converted.
return None;
}

Optional<VETargetLowering::RegisterCountPair>
VETargetLowering::getRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC, EVT VT) const {
using RegisterCount = VETargetLowering::RegisterCountPair;
if (CC != CallingConv::Fast)
return None;
if (!VT.isVector() || VT.isScalableVector())
return None;

MVT RegisterVT;
EVT IntermediateVT;
unsigned NumIntermediates;
unsigned NumRegs = getVectorTypeBreakdownForCallingConv(
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
return RegisterCount{RegisterVT, NumRegs};
}

unsigned VETargetLowering::getVectorTypeBreakdownForCallingConv(
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const {
auto DefaultImpl = [&]() {
return TargetLoweringBase::getVectorTypeBreakdownForCallingConv(
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
};

auto ElemVT = VT.getVectorElementType();
unsigned NumElems = VT.isScalableVector() ? 0 : VT.getVectorNumElements();
const bool RequiresPackedRegister =
!VT.isScalableVector() &&
(isOverPackedType(VT) || (isPackableElemVT(ElemVT) && NumElems > 256));

if (CC != CallingConv::Fast || VT.isScalableVector() ||
(isVectorRegisterVT(VT) &&
!(Subtarget->hasPackedMode() && RequiresPackedRegister)))
return DefaultImpl();

// fastcc - map everything to vregs.
auto LK = getCustomTypeConversion(Context, VT);
// Non-custom converted type - back to builtin logic.
if (!LK.hasValue())
return DefaultImpl();

// Compute the fixed point of the custom type conversion rules.
// We want to have the same vector layout inside functions as well as across
// function boundaries.

// IntermediateVT : used to copy the parts.
IntermediateVT = VT;
NumIntermediates = 1;

EVT NextVT;
do {
NextVT = LK->second;
auto LTA = LK->first;

switch (LTA) {
default:
return DefaultImpl();

case LegalizeTypeAction::TypePromoteFloat:
case LegalizeTypeAction::TypePromoteInteger:
// Promote elements across call boundaries.
IntermediateVT = NextVT;
break;

case LegalizeTypeAction::TypeWidenVector:
// Retain all information about the original vector length.
// That is, keep the IntermediateVT at the original vector length if
// possible
break;

case LegalizeTypeAction::TypeSplitVector:
// The last split results in the intermediate VT used for copying vectors
// at calls.
IntermediateVT = NextVT;
NumIntermediates *= 2;
break;
}

LK = getCustomTypeConversion(Context, NextVT);
} while (LK.hasValue());

RegisterVT = NextVT.getSimpleVT();

// Must converge in a valid RegisterVT.
return NumIntermediates;
}
Loading