@@ -9720,21 +9720,6 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9720
9720
Vec, Mask, VL, DL, DAG, Subtarget);
9721
9721
}
9722
9722
9723
- /// Returns true if \p LHS is known to be equal to \p RHS, taking into account
9724
- /// if VLEN is exactly known by \p Subtarget and thus vscale when handling
9725
- /// scalable quantities.
9726
- static bool isKnownEQ(ElementCount LHS, ElementCount RHS,
9727
- const RISCVSubtarget &Subtarget) {
9728
- if (auto VLen = Subtarget.getRealVLen()) {
9729
- const unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9730
- if (LHS.isScalable())
9731
- LHS = ElementCount::getFixed(LHS.getKnownMinValue() * Vscale);
9732
- if (RHS.isScalable())
9733
- RHS = ElementCount::getFixed(RHS.getKnownMinValue() * Vscale);
9734
- }
9735
- return LHS == RHS;
9736
- }
9737
-
9738
9723
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9739
9724
SelectionDAG &DAG) const {
9740
9725
SDValue Vec = Op.getOperand(0);
@@ -9872,29 +9857,25 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9872
9857
RemIdx = ElementCount::getScalable(Decompose.second);
9873
9858
}
9874
9859
9875
- RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(ContainerSubVecVT);
9876
- bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9877
- SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9878
- SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9879
- bool AlignedToVecReg = !IsSubVecPartReg;
9880
- if (SubVecVT.isFixedLengthVector())
9881
- AlignedToVecReg &= SubVecVT.getSizeInBits() ==
9882
- ContainerSubVecVT.getSizeInBits().getKnownMinValue() *
9883
- (*VLen / RISCV::RVVBitsPerBlock);
9860
+ TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
9861
+ bool ExactlyVecRegSized =
9862
+ Subtarget.expandVScale(SubVecVT.getSizeInBits())
9863
+ .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
9884
9864
9885
9865
// 1. If the Idx has been completely eliminated and this subvector's size is
9886
9866
// a vector register or a multiple thereof, or the surrounding elements are
9887
9867
// undef, then this is a subvector insert which naturally aligns to a vector
9888
9868
// register. These can easily be handled using subregister manipulation.
9889
- // 2. If the subvector isn't exactly aligned to a vector register group, then
9890
- // the insertion must preserve the undisturbed elements of the register. We do
9891
- // this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector
9892
- // type (which resolves to a subregister copy), performing a VSLIDEUP to place
9893
- // the subvector within the vector register, and an INSERT_SUBVECTOR of that
9894
- // LMUL=1 type back into the larger vector (resolving to another subregister
9895
- // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9896
- // to avoid allocating a large register group to hold our subvector.
9897
- if (RemIdx.isZero() && (AlignedToVecReg || Vec.isUndef())) {
9869
+ // 2. If the subvector isn't an exact multiple of a valid register group size,
9870
+ // then the insertion must preserve the undisturbed elements of the register.
9871
+ // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9872
+ // vector type (which resolves to a subregister copy), performing a VSLIDEUP
9873
+ // to place the subvector within the vector register, and an INSERT_SUBVECTOR
9874
+ // of that LMUL=1 type back into the larger vector (resolving to another
9875
+ // subregister operation). See below for how our VSLIDEUP works. We go via a
9876
+ // LMUL=1 type to avoid allocating a large register group to hold our
9877
+ // subvector.
9878
+ if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9898
9879
if (SubVecVT.isFixedLengthVector()) {
9899
9880
// We may get NoSubRegister if inserting at index 0 and the subvec
9900
9881
// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
@@ -9941,7 +9922,8 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9941
9922
9942
9923
// Use tail agnostic policy if we're inserting over InterSubVT's tail.
9943
9924
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9944
- if (isKnownEQ(EndIndex, InterSubVT.getVectorElementCount(), Subtarget))
9925
+ if (Subtarget.expandVScale(EndIndex) ==
9926
+ Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
9945
9927
Policy = RISCVII::TAIL_AGNOSTIC;
9946
9928
9947
9929
// If we're inserting into the lowest elements, use a tail undisturbed
0 commit comments