diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 243f685cf25e2..4b081205eba10 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -140,6 +140,7 @@ class VectorCombine { bool foldShuffleOfCastops(Instruction &I); bool foldShuffleOfShuffles(Instruction &I); bool foldPermuteOfIntrinsic(Instruction &I); + bool foldShufflesOfLengthChangingShuffles(Instruction &I); bool foldShuffleOfIntrinsics(Instruction &I); bool foldShuffleToIdentity(Instruction &I); bool foldShuffleFromReductions(Instruction &I); @@ -2878,6 +2879,195 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { return true; } +/// Try to convert a chain of length-preserving shuffles that are fed by +/// length-changing shuffles from the same source, e.g. a chain of length 3: +/// +/// "shuffle (shuffle (shuffle x, (shuffle y, undef)), +/// (shuffle y, undef)), +// (shuffle y, undef)" +/// +/// into a single shuffle fed by a length-changing shuffle: +/// +/// "shuffle x, (shuffle y, undef)" +/// +/// Such chains arise e.g. from folding extract/insert sequences. +bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) { + FixedVectorType *TrunkType = dyn_cast(I.getType()); + if (!TrunkType) + return false; + + unsigned ChainLength = 0; + SmallVector Mask; + SmallVector YMask; + InstructionCost OldCost = 0; + InstructionCost NewCost = 0; + Value *Trunk = &I; + unsigned NumTrunkElts = TrunkType->getNumElements(); + Value *Y = nullptr; + + for (;;) { + // Match the current trunk against (commutations of) the pattern + // "shuffle trunk', (shuffle y, undef)" + ArrayRef OuterMask; + Value *OuterV0, *OuterV1; + if (ChainLength != 0 && !Trunk->hasOneUse()) + break; + if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1), + m_Mask(OuterMask)))) + break; + if (OuterV0->getType() != TrunkType) { + // This shuffle is not length-preserving, so it cannot be part of the + // chain. + break; + } + + ArrayRef InnerMask0, InnerMask1; + Value *A0, *A1, *B0, *B1; + bool Match0 = + match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0), m_Mask(InnerMask0))); + bool Match1 = + match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1), m_Mask(InnerMask1))); + bool Match0Leaf = Match0 && A0->getType() != I.getType(); + bool Match1Leaf = Match1 && A1->getType() != I.getType(); + if (Match0Leaf == Match1Leaf) { + // Only handle the case of exactly one leaf in each step. The "two leaves" + // case is handled by foldShuffleOfShuffles. + break; + } + + SmallVector CommutedOuterMask; + if (Match0Leaf) { + std::swap(OuterV0, OuterV1); + std::swap(InnerMask0, InnerMask1); + std::swap(A0, A1); + std::swap(B0, B1); + llvm::append_range(CommutedOuterMask, OuterMask); + for (int &M : CommutedOuterMask) { + if (M == PoisonMaskElem) + continue; + if (M < (int)NumTrunkElts) + M += NumTrunkElts; + else + M -= NumTrunkElts; + } + OuterMask = CommutedOuterMask; + } + if (!OuterV1->hasOneUse()) + break; + + if (!isa(A1)) { + if (!Y) + Y = A1; + else if (Y != A1) + break; + } + if (!isa(B1)) { + if (!Y) + Y = B1; + else if (Y != B1) + break; + } + + auto *YType = cast(A1->getType()); + int NumLeafElts = YType->getNumElements(); + SmallVector LocalYMask(InnerMask1); + for (int &M : LocalYMask) { + if (M >= NumLeafElts) + M -= NumLeafElts; + } + + InstructionCost LocalOldCost = + TTI.getInstructionCost(cast(Trunk), CostKind) + + TTI.getInstructionCost(cast(OuterV1), CostKind); + + // Handle the initial (start of chain) case. + if (!ChainLength) { + Mask.assign(OuterMask); + YMask.assign(LocalYMask); + OldCost = NewCost = LocalOldCost; + Trunk = OuterV0; + ChainLength++; + continue; + } + + // For the non-root case, first attempt to combine masks. + SmallVector NewYMask(YMask); + bool Valid = true; + for (auto [CombinedM, LeafM] : llvm::zip(NewYMask, LocalYMask)) { + if (LeafM == -1 || CombinedM == LeafM) + continue; + if (CombinedM == -1) { + CombinedM = LeafM; + } else { + Valid = false; + break; + } + } + if (!Valid) + break; + + SmallVector NewMask; + NewMask.reserve(NumTrunkElts); + for (int M : Mask) { + if (M < 0 || M >= static_cast(NumTrunkElts)) + NewMask.push_back(M); + else + NewMask.push_back(OuterMask[M]); + } + + // Break the chain if adding this new step complicates the shuffles such + // that it would increase the new cost by more than the old cost of this + // step. + InstructionCost LocalNewCost = + TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, TrunkType, + YType, NewYMask, CostKind) + + TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, TrunkType, + TrunkType, NewMask, CostKind); + + if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost) + break; + + LLVM_DEBUG({ + if (ChainLength == 1) { + dbgs() << "Found chain of shuffles fed by length-changing shuffles: " + << I << '\n'; + } + dbgs() << " next chain link: " << *Trunk << '\n' + << " old cost: " << (OldCost + LocalOldCost) + << " new cost: " << LocalNewCost << '\n'; + }); + + Mask = NewMask; + YMask = NewYMask; + OldCost += LocalOldCost; + NewCost = LocalNewCost; + Trunk = OuterV0; + ChainLength++; + } + if (ChainLength <= 1) + return false; + + if (llvm::all_of(Mask, [&](int M) { + return M < 0 || M >= static_cast(NumTrunkElts); + })) { + // Produce a canonical simplified form if all elements are sourced from Y. + for (int &M : Mask) { + if (M >= static_cast(NumTrunkElts)) + M = YMask[M - NumTrunkElts]; + } + Value *Root = + Builder.CreateShuffleVector(Y, PoisonValue::get(Y->getType()), Mask); + replaceValue(I, *Root); + return true; + } + + Value *Leaf = + Builder.CreateShuffleVector(Y, PoisonValue::get(Y->getType()), YMask); + Value *Root = Builder.CreateShuffleVector(Trunk, Leaf, Mask); + replaceValue(I, *Root); + return true; +} + /// Try to convert /// "shuffle (intrinsic), (intrinsic)" into "intrinsic (shuffle), (shuffle)". bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { @@ -4368,22 +4558,15 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { SmallVector Mask(NumDstElts, PoisonMaskElem); bool NeedExpOrNarrow = NumSrcElts != NumDstElts; - bool IsExtIdxInBounds = ExtIdx < NumDstElts; bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec); if (NeedDstSrcSwap) { SK = TargetTransformInfo::SK_PermuteSingleSrc; - if (!IsExtIdxInBounds && NeedExpOrNarrow) - Mask[InsIdx] = 0; - else - Mask[InsIdx] = ExtIdx; + Mask[InsIdx] = ExtIdx % NumDstElts; std::swap(DstVec, SrcVec); } else { SK = TargetTransformInfo::SK_PermuteTwoSrc; std::iota(Mask.begin(), Mask.end(), 0); - if (!IsExtIdxInBounds && NeedExpOrNarrow) - Mask[InsIdx] = NumDstElts; - else - Mask[InsIdx] = ExtIdx + NumDstElts; + Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts; } // Cost @@ -4404,14 +4587,11 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0, nullptr, {DstVec, SrcVec}); } else { - // When creating length-changing-vector, always create with a Mask whose - // first element has an ExtIdx, so that the first element of the vector - // being created is always the target to be extracted. + // When creating a length-changing-vector, always try to keep the relevant + // element in an equivalent position, so that bulk shuffles are more likely + // to be useful. ExtToVecMask.assign(NumDstElts, PoisonMaskElem); - if (IsExtIdxInBounds) - ExtToVecMask[ExtIdx] = ExtIdx; - else - ExtToVecMask[0] = ExtIdx; + ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx; // Add cost for expanding or narrowing NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DstVecTy, SrcVecTy, ExtToVecMask, CostKind); @@ -4799,6 +4979,8 @@ bool VectorCombine::run() { return true; if (foldPermuteOfIntrinsic(I)) return true; + if (foldShufflesOfLengthChangingShuffles(I)) + return true; if (foldShuffleOfIntrinsics(I)) return true; if (foldSelectShuffle(I)) diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll index 7a415f4cb71d0..6c92892949175 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll @@ -6,38 +6,10 @@ define <32 x i8> @extract_insert_chain(<8 x i8> %in0, <8 x i8> %in1, <8 x i8> %i ; OPT-SAME: <8 x i8> [[IN0:%.*]], <8 x i8> [[IN1:%.*]], <8 x i8> [[IN2:%.*]], <8 x i8> [[IN3:%.*]]) #[[ATTR0:[0-9]+]] { ; OPT-NEXT: [[ENTRY:.*:]] ; OPT-NEXT: [[O_1_7:%.*]] = shufflevector <8 x i8> [[IN0]], <8 x i8> [[IN1]], <32 x i32> -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_2_0:%.*]] = shufflevector <32 x i8> [[O_1_7]], <32 x i8> [[TMP1]], <32 x i32> -; OPT-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_2_1:%.*]] = shufflevector <32 x i8> [[O_2_0]], <32 x i8> [[TMP8]], <32 x i32> -; OPT-NEXT: [[TMP16:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_2_2:%.*]] = shufflevector <32 x i8> [[O_2_1]], <32 x i8> [[TMP16]], <32 x i32> -; OPT-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_2_3:%.*]] = shufflevector <32 x i8> [[O_2_2]], <32 x i8> [[TMP3]], <32 x i32> -; OPT-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_2_4:%.*]] = shufflevector <32 x i8> [[O_2_3]], <32 x i8> [[TMP4]], <32 x i32> -; OPT-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_2_5:%.*]] = shufflevector <32 x i8> [[O_2_4]], <32 x i8> [[TMP5]], <32 x i32> -; OPT-NEXT: [[TMP6:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_2_6:%.*]] = shufflevector <32 x i8> [[O_2_5]], <32 x i8> [[TMP6]], <32 x i32> -; OPT-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_2_7:%.*]] = shufflevector <32 x i8> [[O_2_6]], <32 x i8> [[TMP7]], <32 x i32> -; OPT-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_3_0:%.*]] = shufflevector <32 x i8> [[O_2_7]], <32 x i8> [[TMP2]], <32 x i32> -; OPT-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_3_1:%.*]] = shufflevector <32 x i8> [[O_3_0]], <32 x i8> [[TMP9]], <32 x i32> -; OPT-NEXT: [[TMP10:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_3_2:%.*]] = shufflevector <32 x i8> [[O_3_1]], <32 x i8> [[TMP10]], <32 x i32> -; OPT-NEXT: [[TMP11:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_3_3:%.*]] = shufflevector <32 x i8> [[O_3_2]], <32 x i8> [[TMP11]], <32 x i32> -; OPT-NEXT: [[TMP12:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_3_4:%.*]] = shufflevector <32 x i8> [[O_3_3]], <32 x i8> [[TMP12]], <32 x i32> -; OPT-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_3_5:%.*]] = shufflevector <32 x i8> [[O_3_4]], <32 x i8> [[TMP13]], <32 x i32> -; OPT-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_3_6:%.*]] = shufflevector <32 x i8> [[O_3_5]], <32 x i8> [[TMP14]], <32 x i32> -; OPT-NEXT: [[TMP15:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_3_7:%.*]] = shufflevector <32 x i8> [[O_3_6]], <32 x i8> [[TMP15]], <32 x i32> +; OPT-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_7:%.*]] = shufflevector <32 x i8> [[O_1_7]], <32 x i8> [[TMP2]], <32 x i32> +; OPT-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_7:%.*]] = shufflevector <32 x i8> [[O_2_7]], <32 x i8> [[TMP3]], <32 x i32> ; OPT-NEXT: ret <32 x i8> [[O_3_7]] ; entry: @@ -116,22 +88,8 @@ entry: define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) { ; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening( ; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17 -; OPT-NEXT: [[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18 -; OPT-NEXT: [[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19 -; OPT-NEXT: [[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21 -; OPT-NEXT: [[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22 -; OPT-NEXT: [[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23 -; OPT-NEXT: [[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> -; OPT-NEXT: [[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1 -; OPT-NEXT: [[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2 -; OPT-NEXT: [[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3 -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> -; OPT-NEXT: [[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> [[TMP1]], <8 x i32> -; OPT-NEXT: [[O_5:%.*]] = insertelement <8 x i8> [[O_4]], i8 [[I_5]], i32 5 -; OPT-NEXT: [[O_6:%.*]] = insertelement <8 x i8> [[O_5]], i8 [[I_6]], i32 6 -; OPT-NEXT: [[O_7:%.*]] = insertelement <8 x i8> [[O_6]], i8 [[I_7]], i32 7 -; OPT-NEXT: ret <8 x i8> [[O_7]] +; OPT-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> +; OPT-NEXT: ret <8 x i8> [[TMP1]] ; %i.0 = extractelement <32 x i8> %in, i64 16 %i.1 = extractelement <32 x i8> %in, i64 17 diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/shuffles-of-length-changing-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/shuffles-of-length-changing-shuffles.ll index e028b367a186c..3e5a43849cccd 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/shuffles-of-length-changing-shuffles.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/shuffles-of-length-changing-shuffles.ll @@ -4,10 +4,8 @@ define <8 x i8> @extending0(<8 x i8> %a, <4 x i8> %b) { ; OPT-LABEL: define <8 x i8> @extending0( ; OPT-SAME: <8 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; OPT-NEXT: [[EXT0:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> [[B]], <8 x i32> -; OPT-NEXT: [[EXT1:%.*]] = shufflevector <4 x i8> poison, <4 x i8> [[B]], <8 x i32> -; OPT-NEXT: [[MERGE0:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[EXT0]], <8 x i32> -; OPT-NEXT: [[MERGE1:%.*]] = shufflevector <8 x i8> [[EXT1]], <8 x i8> [[MERGE0]], <8 x i32> +; OPT-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <8 x i32> +; OPT-NEXT: [[MERGE1:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[TMP1]], <8 x i32> ; OPT-NEXT: ret <8 x i8> [[MERGE1]] ; %ext0 = shufflevector <4 x i8> %b, <4 x i8> %b, <8 x i32> @@ -36,10 +34,8 @@ define <8 x i8> @extending_conflict(<8 x i8> %a, <4 x i8> %b) { define <4 x i8> @shrinking0(<4 x i8> %a, <8 x i8> %b) { ; OPT-LABEL: define <4 x i8> @shrinking0( ; OPT-SAME: <4 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[SHRINK0:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> [[B]], <4 x i32> -; OPT-NEXT: [[SHRINK1:%.*]] = shufflevector <8 x i8> poison, <8 x i8> [[B]], <4 x i32> -; OPT-NEXT: [[MERGE0:%.*]] = shufflevector <4 x i8> [[A]], <4 x i8> [[SHRINK0]], <4 x i32> -; OPT-NEXT: [[MERGE1:%.*]] = shufflevector <4 x i8> [[MERGE0]], <4 x i8> [[SHRINK1]], <4 x i32> +; OPT-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> +; OPT-NEXT: [[MERGE1:%.*]] = shufflevector <4 x i8> [[A]], <4 x i8> [[TMP1]], <4 x i32> ; OPT-NEXT: ret <4 x i8> [[MERGE1]] ; %shrink0 = shufflevector <8 x i8> %b, <8 x i8> %b, <4 x i32> diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll index e85c092b1b213..228f161698bb2 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll @@ -140,10 +140,14 @@ define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) } define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 -; CHECK-NEXT: ret <2 x double> [[INS]] +; SSE-LABEL: @src_ins0_v2f64_ext3_v4f64( +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; SSE-NEXT: ret <2 x double> [[INS]] +; +; AVX-LABEL: @src_ins0_v2f64_ext3_v4f64( +; AVX-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 +; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; AVX-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 3 %ins = insertelement <2 x double> poison, double %ext, i32 0 diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll index 193ad36616a4a..e591ea55a453d 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll @@ -136,8 +136,8 @@ define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 3 @@ -185,8 +185,8 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 3 diff --git a/llvm/test/Transforms/VectorCombine/X86/pr126085.ll b/llvm/test/Transforms/VectorCombine/X86/pr126085.ll index f596807027db6..d29cdb3d95c81 100644 --- a/llvm/test/Transforms/VectorCombine/X86/pr126085.ll +++ b/llvm/test/Transforms/VectorCombine/X86/pr126085.ll @@ -6,8 +6,8 @@ define i32 @test(ptr %a0) { ; CHECK-SAME: ptr [[A0:%.*]]) { ; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[A0]], align 1 ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <16 x i8> [[LOAD]], <16 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i8> [[LOAD]], i64 11 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i8> [[SHUF]], i8 [[ELT]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x i8> [[SHUF]], <4 x i8> [[TMP1]], <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i8> [[INS]] to i32 ; CHECK-NEXT: ret i32 [[RES]] ;