-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[RISCV][GISEL] Legalize G_INSERT_SUBVECTOR #108859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
490594c
b9a272f
33a06a3
7283e05
0c558af
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -615,6 +615,12 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) | |
all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), | ||
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))); | ||
|
||
getActionDefinitionsBuilder(G_INSERT_SUBVECTOR) | ||
.customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), | ||
typeIsLegalBoolVec(1, BoolVecTys, ST))) | ||
.customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), | ||
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))); | ||
|
||
getLegacyLegalizerInfo().computeTables(); | ||
} | ||
|
||
|
@@ -834,9 +840,7 @@ static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, | |
/// Gets the two common "VL" operands: an all-ones mask and the vector length. | ||
/// VecTy is a scalable vector type. | ||
static std::pair<MachineInstrBuilder, MachineInstrBuilder> | ||
buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB, | ||
MachineRegisterInfo &MRI) { | ||
LLT VecTy = Dst.getLLTTy(MRI); | ||
buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { | ||
assert(VecTy.isScalableVector() && "Expecting scalable container type"); | ||
const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>(); | ||
LLT XLenTy(STI.getXLenVT()); | ||
|
@@ -890,7 +894,7 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI, | |
// Handle case of s64 element vectors on rv32 | ||
if (XLenTy.getSizeInBits() == 32 && | ||
VecTy.getElementType().getSizeInBits() == 64) { | ||
auto [_, VL] = buildDefaultVLOps(Dst, MIB, MRI); | ||
auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI); | ||
buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB, | ||
MRI); | ||
MI.eraseFromParent(); | ||
|
@@ -1025,6 +1029,134 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI, | |
return true; | ||
} | ||
|
||
bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI, | ||
LegalizerHelper &Helper, | ||
MachineIRBuilder &MIB) const { | ||
GInsertSubvector &IS = cast<GInsertSubvector>(MI); | ||
|
||
MachineRegisterInfo &MRI = *MIB.getMRI(); | ||
|
||
Register Dst = IS.getReg(0); | ||
Register BigVec = IS.getBigVec(); | ||
Register LitVec = IS.getSubVec(); | ||
uint64_t Idx = IS.getIndexImm(); | ||
|
||
LLT BigTy = MRI.getType(BigVec); | ||
LLT LitTy = MRI.getType(LitVec); | ||
|
||
if (Idx == 0 || | ||
MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) | ||
return true; | ||
|
||
// We don't have the ability to slide mask vectors up indexed by their i1 | ||
// elements; the smallest we can do is i8. Often we are able to bitcast to | ||
// equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8 | ||
// vectors and truncate down after the insert. | ||
if (LitTy.getElementType() == LLT::scalar(1)) { | ||
auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue(); | ||
auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue(); | ||
if (BigTyMinElts >= 8 && LitTyMinElts >= 8) | ||
Comment on lines
+1055
to
+1058
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could do this in the legalization rule instead of in the custom logic There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure? Above is this code is
This means that the legalization rule needs to check the immediate value to make sure its not zero, which I couldn't figure out how to do, and we need to check to see if the BigVec is not a G_IMPLICIT_DEF, which I also don't know how to do as part of the legalization rule. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, if you require that you can't do it in the rule. But do you really need this to be in the rule? I would expect that folding out the undef part to only require an optimizing combine There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can do the undef check in a combine, but I still don’t think it’s possible to check that the index is not zero is it? I don't think its possible to have a legalIf that checks when the idx is zero. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, if we promote the INSERT_SUBVECTOR to do the combine later, I am concerned that we will be left with an INSERT_SUBVECTOR that has a larger type than needed. |
||
return Helper.bitcast( | ||
IS, 0, | ||
LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8)); | ||
|
||
// We can't slide this mask vector up indexed by its i1 elements. | ||
// This poses a problem when we wish to insert a scalable vector which | ||
// can't be re-expressed as a larger type. Just choose the slow path and | ||
// extend to a larger type, then truncate back down. | ||
LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8)); | ||
return Helper.widenScalar(IS, 0, ExtBigTy); | ||
} | ||
|
||
const RISCVRegisterInfo *TRI = STI.getRegisterInfo(); | ||
unsigned SubRegIdx, RemIdx; | ||
std::tie(SubRegIdx, RemIdx) = | ||
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( | ||
getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI); | ||
|
||
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock); | ||
assert(isPowerOf2_64( | ||
STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue())); | ||
bool ExactlyVecRegSized = | ||
STI.expandVScale(LitTy.getSizeInBits()) | ||
.isKnownMultipleOf(STI.expandVScale(VecRegSize)); | ||
|
||
// If the Idx has been completely eliminated and this subvector's size is a | ||
// vector register or a multiple thereof, or the surrounding elements are | ||
// undef, then this is a subvector insert which naturally aligns to a vector | ||
// register. These can easily be handled using subregister manipulation. | ||
if (RemIdx == 0 && ExactlyVecRegSized) | ||
return true; | ||
|
||
// If the subvector is smaller than a vector register, then the insertion | ||
// must preserve the undisturbed elements of the register. We do this by | ||
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type | ||
// (which resolves to a subregister copy), performing a VSLIDEUP to place the | ||
// subvector within the vector register, and an INSERT_SUBVECTOR of that | ||
// LMUL=1 type back into the larger vector (resolving to another subregister | ||
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type | ||
// to avoid allocating a large register group to hold our subvector. | ||
|
||
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements | ||
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy | ||
// (in our case undisturbed). This means we can set up a subvector insertion | ||
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the | ||
// size of the subvector. | ||
const LLT XLenTy(STI.getXLenVT()); | ||
LLT InterLitTy = BigTy; | ||
Register AlignedExtract = BigVec; | ||
unsigned AlignedIdx = Idx - RemIdx; | ||
if (TypeSize::isKnownGT(BigTy.getSizeInBits(), | ||
getLMUL1Ty(BigTy).getSizeInBits())) { | ||
InterLitTy = getLMUL1Ty(BigTy); | ||
// Extract a subvector equal to the nearest full vector register type. This | ||
// should resolve to a G_EXTRACT on a subreg. | ||
AlignedExtract = | ||
MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0); | ||
} | ||
|
||
auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy), | ||
LitVec, 0); | ||
|
||
auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI); | ||
auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue()); | ||
|
||
// If we're inserting into the lowest elements, use a tail undisturbed | ||
// vmv.v.v. | ||
MachineInstrBuilder Inserted; | ||
bool NeedInsertSubvec = | ||
TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()); | ||
Register InsertedDst = | ||
NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst; | ||
if (RemIdx == 0) { | ||
Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst}, | ||
{AlignedExtract, Insert, VL}); | ||
} else { | ||
auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx); | ||
// Construct the vector length corresponding to RemIdx + length(LitTy). | ||
VL = MIB.buildAdd(XLenTy, SlideupAmt, VL); | ||
// Use tail agnostic policy if we're inserting over InterLitTy's tail. | ||
ElementCount EndIndex = | ||
ElementCount::getScalable(RemIdx) + LitTy.getElementCount(); | ||
uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; | ||
if (STI.expandVScale(EndIndex) == | ||
STI.expandVScale(InterLitTy.getElementCount())) | ||
Policy = RISCVII::TAIL_AGNOSTIC; | ||
|
||
Inserted = | ||
MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst}, | ||
{AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy}); | ||
} | ||
|
||
// If required, insert this subvector back into the correct vector register. | ||
// This should resolve to an INSERT_SUBREG instruction. | ||
if (NeedInsertSubvec) | ||
MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx); | ||
|
||
MI.eraseFromParent(); | ||
return true; | ||
} | ||
|
||
bool RISCVLegalizerInfo::legalizeCustom( | ||
LegalizerHelper &Helper, MachineInstr &MI, | ||
LostDebugLocObserver &LocObserver) const { | ||
|
@@ -1092,6 +1224,8 @@ bool RISCVLegalizerInfo::legalizeCustom( | |
return legalizeSplatVector(MI, MIRBuilder); | ||
case TargetOpcode::G_EXTRACT_SUBVECTOR: | ||
return legalizeExtractSubvector(MI, MIRBuilder); | ||
case TargetOpcode::G_INSERT_SUBVECTOR: | ||
return legalizeInsertSubvector(MI, Helper, MIRBuilder); | ||
case TargetOpcode::G_LOAD: | ||
case TargetOpcode::G_STORE: | ||
return legalizeLoadStore(MI, Helper, MIRBuilder); | ||
|
Uh oh!
There was an error while loading. Please reload this page.