diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 5b40e49714069..4d5cd963e0926 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1376,7 +1376,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { return TargetTTI->getShuffleCost( IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy, - AdjustMask, CostKind, 0, nullptr, {}, Shuffle); + AdjustMask, CostKind, 0, nullptr, Operands, Shuffle); } // Narrowing shuffle - perform shuffle at original wider width and @@ -1385,7 +1385,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { InstructionCost ShuffleCost = TargetTTI->getShuffleCost( IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, - VecSrcTy, AdjustMask, CostKind, 0, nullptr, {}, Shuffle); + VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle); SmallVector ExtractMask(Mask.size()); std::iota(ExtractMask.begin(), ExtractMask.end(), 0); diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 4ffa6349871ba..d7ec3c16bec21 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2631,6 +2631,16 @@ class ShuffleVectorInst : public Instruction { return isInterleaveMask(Mask, Factor, NumInputElts, StartIndexes); } + /// Check if the mask is a DE-interleave mask of the given factor + /// \p Factor like: + /// + static bool isDeInterleaveMaskOfFactor(ArrayRef Mask, unsigned Factor, + unsigned &Index); + static bool isDeInterleaveMaskOfFactor(ArrayRef Mask, unsigned Factor) { + unsigned Unused; + return isDeInterleaveMaskOfFactor(Mask, Factor, Unused); + } + /// Checks if the shuffle is a bit rotation of the first operand across /// multiple subelements, e.g: /// diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 438ac1c3cc6e2..8989eabbe6df2 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -200,28 +200,6 @@ FunctionPass *llvm::createInterleavedAccessPass() { return new InterleavedAccess(); } -/// Check if the mask is a DE-interleave mask of the given factor -/// \p Factor like: -/// -static bool isDeInterleaveMaskOfFactor(ArrayRef Mask, unsigned Factor, - unsigned &Index) { - // Check all potential start indices from 0 to (Factor - 1). - for (Index = 0; Index < Factor; Index++) { - unsigned i = 0; - - // Check that elements are in ascending order by Factor. Ignore undef - // elements. - for (; i < Mask.size(); i++) - if (Mask[i] >= 0 && static_cast(Mask[i]) != Index + i * Factor) - break; - - if (i == Mask.size()) - return true; - } - - return false; -} - /// Check if the mask is a DE-interleave mask for an interleaved load. /// /// E.g. DE-interleave masks (Factor = 2) could be: @@ -238,7 +216,7 @@ static bool isDeInterleaveMask(ArrayRef Mask, unsigned &Factor, // Make sure we don't produce a load wider than the input load. if (Mask.size() * Factor > NumLoadElements) return false; - if (isDeInterleaveMaskOfFactor(Mask, Factor, Index)) + if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index)) return true; } @@ -333,8 +311,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( for (auto *Shuffle : Shuffles) { if (Shuffle->getType() != VecTy) return false; - if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor, - Index)) + if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor( + Shuffle->getShuffleMask(), Factor, Index)) return false; assert(Shuffle->getShuffleMask().size() <= NumLoadElements); @@ -343,8 +321,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( for (auto *Shuffle : BinOpShuffles) { if (Shuffle->getType() != VecTy) return false; - if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor, - Index)) + if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor( + Shuffle->getShuffleMask(), Factor, Index)) return false; assert(Shuffle->getShuffleMask().size() <= NumLoadElements); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index cec02e2ce6338..d2babc748731a 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2978,6 +2978,31 @@ bool ShuffleVectorInst::isInterleaveMask( return true; } +/// Check if the mask is a DE-interleave mask of the given factor +/// \p Factor like: +/// +bool ShuffleVectorInst::isDeInterleaveMaskOfFactor(ArrayRef Mask, + unsigned Factor, + unsigned &Index) { + // Check all potential start indices from 0 to (Factor - 1). + for (unsigned Idx = 0; Idx < Factor; Idx++) { + unsigned I = 0; + + // Check that elements are in ascending order by Factor. Ignore undef + // elements. + for (; I < Mask.size(); I++) + if (Mask[I] >= 0 && static_cast(Mask[I]) != Idx + I * Factor) + break; + + if (I == Mask.size()) { + Index = Idx; + return true; + } + } + + return false; +} + /// Try to lower a vector shuffle as a bit rotation. /// /// Look for a repeated rotation pattern in each sub group. diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e80931a03f30b..700242b88346c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3827,9 +3827,18 @@ InstructionCost AArch64TTIImpl::getShuffleCost( Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() && Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) { + // Check for LD3/LD4 instructions, which are represented in llvm IR as + // deinterleaving-shuffle(load). The shuffle cost could potentially be free, + // but we model it with a cost of LT.first so that LD3/LD4 have a higher + // cost than just the load. + if (Args.size() >= 1 && isa(Args[0]) && + (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 3) || + ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 4))) + return std::max(1, LT.first / 4); + // Check for ST3/ST4 instructions, which are represented in llvm IR as // store(interleaving-shuffle). The shuffle cost could potentially be free, - // but we model it with a cost of LT.first so that LD3/LD3 have a higher + // but we model it with a cost of LT.first so that ST3/ST4 have a higher // cost than just the store. if (CxtI && CxtI->hasOneUse() && isa(*CxtI->user_begin()) && (ShuffleVectorInst::isInterleaveMask( diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll index 106f2f9edc2e4..bf81d7aa5e689 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll @@ -572,45 +572,45 @@ define void @vld3(ptr %p) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <24 x i8>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = load <48 x i8>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = load <6 x i16>, ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <12 x i16>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = load <24 x i16>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = load <48 x i16>, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = load <6 x i32>, ptr %p, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = load <12 x i32>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = load <24 x i32>, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32 = load <48 x i32>, ptr %p, align 256 -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <6 x i64>, ptr %p, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> @@ -639,45 +639,45 @@ define void @vld3(ptr %p) { ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <24 x i8>, ptr %p, align 32 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = load <48 x i8>, ptr %p, align 64 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = load <6 x i16>, ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <12 x i16>, ptr %p, align 32 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = load <24 x i16>, ptr %p, align 64 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = load <48 x i16>, ptr %p, align 128 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = load <6 x i32>, ptr %p, align 32 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = load <12 x i32>, ptr %p, align 64 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = load <24 x i32>, ptr %p, align 128 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32 = load <48 x i32>, ptr %p, align 256 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <6 x i64>, ptr %p, align 64 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> @@ -780,55 +780,55 @@ define void @vld4(ptr %p) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256 -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> @@ -863,55 +863,55 @@ define void @vld4(ptr %p) { ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32>