diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5ad4da43bca7d..685861a911655 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5355,14 +5355,17 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, const Value *Vec = Op->getOperand(0); const Value *Elt = Op->getOperand(1); auto *CIdx = dyn_cast(Op->getOperand(2)); - // Early out if the index is non-constant or out-of-range. unsigned NumElts = DemandedElts.getBitWidth(); - if (!CIdx || CIdx->getValue().uge(NumElts)) - return; + APInt DemandedVecElts = DemandedElts; + bool NeedsElt = true; + // If we know the index we are inserting too, clear it from Vec check. + if (CIdx && CIdx->getValue().ult(NumElts)) { + DemandedVecElts.clearBit(CIdx->getZExtValue()); + NeedsElt = DemandedElts[CIdx->getZExtValue()]; + } - unsigned EltIdx = CIdx->getZExtValue(); // Do we demand the inserted element? - if (DemandedElts[EltIdx]) { + if (NeedsElt) { computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q); // If we don't know any bits, early out. if (Known.isUnknown()) @@ -5371,10 +5374,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, Known.KnownFPClasses = fcNone; } - // We don't need the base vector element that has been inserted. - APInt DemandedVecElts = DemandedElts; - DemandedVecElts.clearBit(EltIdx); - if (!!DemandedVecElts) { + // Do we need anymore elements from Vec? + if (!DemandedVecElts.isZero()) { KnownFPClass Known2; computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, Depth + 1, Q); diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index 4df647cf3bb5b..4370cea3c285c 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -1513,6 +1513,25 @@ define <4 x float> @insertelement_constant_chain() { ret <4 x float> %ins.3 } +define <4 x float> @insertelement_non_constant_chain(i32 %idx) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero sub) <4 x float> @insertelement_non_constant_chain +; CHECK-SAME: (i32 [[IDX:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x float> poison, float 1.000000e+00, i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> [[INS_0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float -9.000000e+00, i32 2 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <4 x float> [[INS_2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <4 x float> [[INS_2]], float 4.000000e+00, i32 [[IDX]] +; CHECK-NEXT: ret <4 x float> [[INS_3]] +; + %ins.0 = insertelement <4 x float> poison, float 1.0, i32 0 + %ins.1 = insertelement <4 x float> %ins.0, float 0.0, i32 1 + %ins.2 = insertelement <4 x float> %ins.1, float -9.0, i32 2 + %ins.3 = insertelement <4 x float> %ins.2, float 3.0, i32 3 + %ins.4 = insertelement <4 x float> %ins.2, float 4.0, i32 %idx + ret <4 x float> %ins.4 +} + define @insertelement_scalable_constant_chain() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define @insertelement_scalable_constant_chain @@ -1582,7 +1601,7 @@ define float @insertelement_extractelement_unknown(<4 x float> nofpclass(zero) % define <4 x float> @insertelement_index_oob_chain() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define <4 x float> @insertelement_index_oob_chain +; CHECK-LABEL: define nofpclass(nan ninf nzero sub norm) <4 x float> @insertelement_index_oob_chain ; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> zeroinitializer, float 0x7FF0000000000000, i32 4 ; CHECK-NEXT: ret <4 x float> [[INSERT]]