Skip to content

[RISCV][GISEL] Legalize G_INSERT_SUBVECTOR #108859

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,18 @@ class GExtractSubvector : public GenericMachineInstr {
}
};

/// Represents a insert subvector.
class GInsertSubvector : public GenericMachineInstr {
public:
Register getBigVec() const { return getOperand(1).getReg(); }
Register getSubVec() const { return getOperand(2).getReg(); }
uint64_t getIndexImm() const { return getOperand(3).getImm(); }

static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR;
}
};

/// Represents a freeze.
class GFreeze : public GenericMachineInstr {
public:
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ class LegalizerHelper {
LLT CastTy);
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy);
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy);

LegalizeResult lowerConstant(MachineInstr &MI);
LegalizeResult lowerFConstant(MachineInstr &MI);
Expand Down
100 changes: 100 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3276,6 +3276,33 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_INSERT_SUBVECTOR: {
if (TypeIdx != 0)
return UnableToLegalize;

GInsertSubvector &IS = cast<GInsertSubvector>(MI);
Register BigVec = IS.getBigVec();
Register SubVec = IS.getSubVec();

LLT SubVecTy = MRI.getType(SubVec);
LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());

// Widen the G_INSERT_SUBVECTOR
auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
IS.getIndexImm());

// Truncate back down
auto SplatZero = MIRBuilder.buildSplatVector(
WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
SplatZero);

MI.eraseFromParent();

return Legalized;
}
}
}

Expand Down Expand Up @@ -3725,6 +3752,77 @@ LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}

/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
///
/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
/// <vscale x 8 x i1>,
/// N
///
/// ===>
///
/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
/// <vscale x 1 x i8>, N / 8
/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy) {
auto ES = cast<GInsertSubvector>(&MI);

if (!CastTy.isVector())
return UnableToLegalize;

if (TypeIdx != 0)
return UnableToLegalize;

Register Dst = ES->getReg(0);
Register BigVec = ES->getBigVec();
Register SubVec = ES->getSubVec();
uint64_t Idx = ES->getIndexImm();

MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

LLT DstTy = MRI.getType(Dst);
LLT BigVecTy = MRI.getType(BigVec);
LLT SubVecTy = MRI.getType(SubVec);

if (DstTy == CastTy)
return Legalized;

if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
return UnableToLegalize;

ElementCount DstTyEC = DstTy.getElementCount();
ElementCount BigVecTyEC = BigVecTy.getElementCount();
ElementCount SubVecTyEC = SubVecTy.getElementCount();
auto DstTyMinElts = DstTyEC.getKnownMinValue();
auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();

unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
if (CastEltSize < DstEltSize)
return UnableToLegalize;

auto AdjustAmt = CastEltSize / DstEltSize;
if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
return UnableToLegalize;

Idx /= AdjustAmt;
BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
auto PromotedIS =
MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
MIRBuilder.buildBitcast(Dst, PromotedIS);

ES->eraseFromParent();
return Legalized;
}

LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
Register DstReg = LoadMI.getDstReg();
Expand Down Expand Up @@ -4033,6 +4131,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
return bitcastConcatVector(MI, TypeIdx, CastTy);
case TargetOpcode::G_EXTRACT_SUBVECTOR:
return bitcastExtractSubvector(MI, TypeIdx, CastTy);
case TargetOpcode::G_INSERT_SUBVECTOR:
return bitcastInsertSubvector(MI, TypeIdx, CastTy);
default:
return UnableToLegalize;
}
Expand Down
142 changes: 138 additions & 4 deletions llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,12 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))));

getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
.customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
typeIsLegalBoolVec(1, BoolVecTys, ST)))
.customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));

getLegacyLegalizerInfo().computeTables();
}

Expand Down Expand Up @@ -834,9 +840,7 @@ static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
/// Gets the two common "VL" operands: an all-ones mask and the vector length.
/// VecTy is a scalable vector type.
static std::pair<MachineInstrBuilder, MachineInstrBuilder>
buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB,
MachineRegisterInfo &MRI) {
LLT VecTy = Dst.getLLTTy(MRI);
buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
assert(VecTy.isScalableVector() && "Expecting scalable container type");
const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
LLT XLenTy(STI.getXLenVT());
Expand Down Expand Up @@ -890,7 +894,7 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
// Handle case of s64 element vectors on rv32
if (XLenTy.getSizeInBits() == 32 &&
VecTy.getElementType().getSizeInBits() == 64) {
auto [_, VL] = buildDefaultVLOps(Dst, MIB, MRI);
auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI);
buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
MRI);
MI.eraseFromParent();
Expand Down Expand Up @@ -1025,6 +1029,134 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
return true;
}

bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
LegalizerHelper &Helper,
MachineIRBuilder &MIB) const {
GInsertSubvector &IS = cast<GInsertSubvector>(MI);

MachineRegisterInfo &MRI = *MIB.getMRI();

Register Dst = IS.getReg(0);
Register BigVec = IS.getBigVec();
Register LitVec = IS.getSubVec();
uint64_t Idx = IS.getIndexImm();

LLT BigTy = MRI.getType(BigVec);
LLT LitTy = MRI.getType(LitVec);

if (Idx == 0 ||
MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
return true;

// We don't have the ability to slide mask vectors up indexed by their i1
// elements; the smallest we can do is i8. Often we are able to bitcast to
// equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
// vectors and truncate down after the insert.
if (LitTy.getElementType() == LLT::scalar(1)) {
auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
Comment on lines +1055 to +1058
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could do this in the legalization rule instead of in the custom logic

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure?

Above is this code is

if (Idx == 0 ||
      MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)

This means that the legalization rule needs to check the immediate value to make sure its not zero, which I couldn't figure out how to do, and we need to check to see if the BigVec is not a G_IMPLICIT_DEF, which I also don't know how to do as part of the legalization rule.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, if you require that you can't do it in the rule. But do you really need this to be in the rule? I would expect that folding out the undef part to only require an optimizing combine

Copy link
Contributor Author

@michaelmaitland michaelmaitland Oct 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can do the undef check in a combine, but I still don’t think it’s possible to check that the index is not zero is it? I don't think its possible to have a legalIf that checks when the idx is zero.

Copy link
Contributor Author

@michaelmaitland michaelmaitland Oct 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, if we promote the INSERT_SUBVECTOR to do the combine later, I am concerned that we will be left with an INSERT_SUBVECTOR that has a larger type than needed.

return Helper.bitcast(
IS, 0,
LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8));

// We can't slide this mask vector up indexed by its i1 elements.
// This poses a problem when we wish to insert a scalable vector which
// can't be re-expressed as a larger type. Just choose the slow path and
// extend to a larger type, then truncate back down.
LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
return Helper.widenScalar(IS, 0, ExtBigTy);
}

const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
unsigned SubRegIdx, RemIdx;
std::tie(SubRegIdx, RemIdx) =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);

TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
assert(isPowerOf2_64(
STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
bool ExactlyVecRegSized =
STI.expandVScale(LitTy.getSizeInBits())
.isKnownMultipleOf(STI.expandVScale(VecRegSize));

// If the Idx has been completely eliminated and this subvector's size is a
// vector register or a multiple thereof, or the surrounding elements are
// undef, then this is a subvector insert which naturally aligns to a vector
// register. These can easily be handled using subregister manipulation.
if (RemIdx == 0 && ExactlyVecRegSized)
return true;

// If the subvector is smaller than a vector register, then the insertion
// must preserve the undisturbed elements of the register. We do this by
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
// subvector within the vector register, and an INSERT_SUBVECTOR of that
// LMUL=1 type back into the larger vector (resolving to another subregister
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
// to avoid allocating a large register group to hold our subvector.

// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
// (in our case undisturbed). This means we can set up a subvector insertion
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
// size of the subvector.
const LLT XLenTy(STI.getXLenVT());
LLT InterLitTy = BigTy;
Register AlignedExtract = BigVec;
unsigned AlignedIdx = Idx - RemIdx;
if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
getLMUL1Ty(BigTy).getSizeInBits())) {
InterLitTy = getLMUL1Ty(BigTy);
// Extract a subvector equal to the nearest full vector register type. This
// should resolve to a G_EXTRACT on a subreg.
AlignedExtract =
MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
}

auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
LitVec, 0);

auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());

// If we're inserting into the lowest elements, use a tail undisturbed
// vmv.v.v.
MachineInstrBuilder Inserted;
bool NeedInsertSubvec =
TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits());
Register InsertedDst =
NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst;
if (RemIdx == 0) {
Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst},
{AlignedExtract, Insert, VL});
} else {
auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
// Construct the vector length corresponding to RemIdx + length(LitTy).
VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
// Use tail agnostic policy if we're inserting over InterLitTy's tail.
ElementCount EndIndex =
ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
if (STI.expandVScale(EndIndex) ==
STI.expandVScale(InterLitTy.getElementCount()))
Policy = RISCVII::TAIL_AGNOSTIC;

Inserted =
MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst},
{AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
}

// If required, insert this subvector back into the correct vector register.
// This should resolve to an INSERT_SUBREG instruction.
if (NeedInsertSubvec)
MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx);

MI.eraseFromParent();
return true;
}

bool RISCVLegalizerInfo::legalizeCustom(
LegalizerHelper &Helper, MachineInstr &MI,
LostDebugLocObserver &LocObserver) const {
Expand Down Expand Up @@ -1092,6 +1224,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
return legalizeSplatVector(MI, MIRBuilder);
case TargetOpcode::G_EXTRACT_SUBVECTOR:
return legalizeExtractSubvector(MI, MIRBuilder);
case TargetOpcode::G_INSERT_SUBVECTOR:
return legalizeInsertSubvector(MI, Helper, MIRBuilder);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
return legalizeLoadStore(MI, Helper, MIRBuilder);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ class RISCVLegalizerInfo : public LegalizerInfo {
bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool legalizeExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool legalizeInsertSubvector(MachineInstr &MI, LegalizerHelper &Helper,
MachineIRBuilder &MIB) const;
bool legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper,
MachineIRBuilder &MIB) const;
};
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,20 @@ def G_VSLIDEDOWN_VL : RISCVGenericInstruction {
}
def : GINodeEquiv<G_VSLIDEDOWN_VL, riscv_slidedown_vl>;

// Pseudo equivalent to a RISCVISD::VMV_V_V_VL
def G_VMV_V_V_VL : RISCVGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$vec, type2:$vl);
let hasSideEffects = false;
}
def : GINodeEquiv<G_VMV_V_V_VL, riscv_vmv_v_v_vl>;

// Pseudo equivalent to a RISCVISD::VSLIDEUP_VL
def G_VSLIDEUP_VL : RISCVGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$merge, type0:$vec, type1:$idx, type2:$mask,
type3:$vl, type4:$policy);
let hasSideEffects = false;
}
def : GINodeEquiv<G_VSLIDEUP_VL, riscv_slideup_vl>;

Loading
Loading