diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index ea53a1acebd1d..a28401bd84930 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3067,42 +3067,73 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { m_ConstantInt(InsIdx)))) return false; - auto *VecTy = dyn_cast(I.getType()); - if (!VecTy || SrcVec->getType() != VecTy) + auto *DstVecTy = dyn_cast(I.getType()); + auto *SrcVecTy = dyn_cast(SrcVec->getType()); + // We can try combining vectors with different element sizes. + if (!DstVecTy || !SrcVecTy || + SrcVecTy->getElementType() != DstVecTy->getElementType()) return false; - unsigned NumElts = VecTy->getNumElements(); - if (ExtIdx >= NumElts || InsIdx >= NumElts) + unsigned NumDstElts = DstVecTy->getNumElements(); + unsigned NumSrcElts = SrcVecTy->getNumElements(); + if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1) return false; // Insertion into poison is a cheaper single operand shuffle. TargetTransformInfo::ShuffleKind SK; - SmallVector Mask(NumElts, PoisonMaskElem); - if (isa(DstVec) && !isa(SrcVec)) { + SmallVector Mask(NumDstElts, PoisonMaskElem); + + bool NeedExpOrNarrow = NumSrcElts != NumDstElts; + bool IsExtIdxInBounds = ExtIdx < NumDstElts; + bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec); + if (NeedDstSrcSwap) { SK = TargetTransformInfo::SK_PermuteSingleSrc; - Mask[InsIdx] = ExtIdx; + if (!IsExtIdxInBounds && NeedExpOrNarrow) + Mask[InsIdx] = 0; + else + Mask[InsIdx] = ExtIdx; std::swap(DstVec, SrcVec); } else { SK = TargetTransformInfo::SK_PermuteTwoSrc; std::iota(Mask.begin(), Mask.end(), 0); - Mask[InsIdx] = ExtIdx + NumElts; + if (!IsExtIdxInBounds && NeedExpOrNarrow) + Mask[InsIdx] = NumDstElts; + else + Mask[InsIdx] = ExtIdx + NumDstElts; } // Cost auto *Ins = cast(&I); auto *Ext = cast(I.getOperand(1)); InstructionCost InsCost = - TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx); + TTI.getVectorInstrCost(*Ins, DstVecTy, CostKind, InsIdx); InstructionCost ExtCost = - TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx); + TTI.getVectorInstrCost(*Ext, DstVecTy, CostKind, ExtIdx); InstructionCost OldCost = ExtCost + InsCost; - // Ignore 'free' identity insertion shuffle. - // TODO: getShuffleCost should return TCC_Free for Identity shuffles. InstructionCost NewCost = 0; - if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts)) - NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr, - {DstVec, SrcVec}); + SmallVector ExtToVecMask; + if (!NeedExpOrNarrow) { + // Ignore 'free' identity insertion shuffle. + // TODO: getShuffleCost should return TCC_Free for Identity shuffles. + if (!ShuffleVectorInst::isIdentityMask(Mask, NumSrcElts)) + NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, + {DstVec, SrcVec}); + } else { + // When creating length-changing-vector, always create with a Mask whose + // first element has an ExtIdx, so that the first element of the vector + // being created is always the target to be extracted. + ExtToVecMask.assign(NumDstElts, PoisonMaskElem); + if (IsExtIdxInBounds) + ExtToVecMask[ExtIdx] = ExtIdx; + else + ExtToVecMask[0] = ExtIdx; + // Add cost for expanding or narrowing + NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, + DstVecTy, ExtToVecMask, CostKind); + NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind); + } + if (!Ext->hasOneUse()) NewCost += ExtCost; @@ -3113,9 +3144,16 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { if (OldCost < NewCost) return false; + if (NeedExpOrNarrow) { + if (!NeedDstSrcSwap) + SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask); + else + DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask); + } + // Canonicalize undef param to RHS to help further folds. if (isa(DstVec) && !isa(SrcVec)) { - ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); + ShuffleVectorInst::commuteShuffleMask(Mask, NumDstElts); std::swap(DstVec, SrcVec); } diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll new file mode 100644 index 0000000000000..0a9386c0b8db1 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll @@ -0,0 +1,196 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX + + +define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> poison, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> poison, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64( +; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2 +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; AVX-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> poison, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64( +; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3 +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; AVX-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> poison, double %ext, i32 3 + ret <4 x double> %ins +} + +define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64( +; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0 +; AVX-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> poison, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> poison, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> poison, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> poison, double %ext, i32 3 + ret <4 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> poison, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> poison, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> poison, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> poison, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> poison, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> poison, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> poison, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> poison, double %ext, i32 1 + ret <2 x double> %ins +} + diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll new file mode 100644 index 0000000000000..41200e517f5ed --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX + + +define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> %a, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> %a, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> %a, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> %a, double %ext, i32 3 + ret <4 x double> %ins +} + +define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64( +; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0 +; AVX-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> %a, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> %a, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> %a, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> %a, double %ext, i32 3 + ret <4 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> %a, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> %a, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> %a, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> %a, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> %a, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> %a, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> %a, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> %a, double %ext, i32 1 + ret <2 x double> %ins +} + diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll index 2db1e21b3e95a..3b2aa5e6603b0 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll @@ -589,8 +589,8 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer ; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use( ; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4 -; CHECK-NEXT: [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0 -; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> poison, i32 [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %l = load <1 x i32>, ptr %p, align 4