diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b74417f4606e7..87315c358b0e0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9713,6 +9713,23 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, } VecCost = std::min(VecCost, IntrinsicCost); } + if (auto *SI = dyn_cast(VL0)) { + auto *CondType = + getWidenedType(SI->getCondition()->getType(), VL.size()); + unsigned CondNumElements = CondType->getNumElements(); + unsigned VecTyNumElements = getNumElements(VecTy); + assert(VecTyNumElements >= CondNumElements && + VecTyNumElements % CondNumElements == 0 && + "Cannot vectorize Instruction::Select"); + if (CondNumElements != VecTyNumElements) { + // When the return type is i1 but the source is fixed vector type, we + // need to duplicate the condition value. + VecCost += TTI->getShuffleCost( + TTI::SK_PermuteSingleSrc, CondType, + createReplicatedMask(VecTyNumElements / CondNumElements, + CondNumElements)); + } + } return VecCost + CommonCost; }; return GetCostDiff(GetScalarCost, GetVectorCost); @@ -13196,6 +13213,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { False = Builder.CreateIntCast(False, VecTy, GetOperandSignedness(2)); } + unsigned CondNumElements = getNumElements(Cond->getType()); + unsigned TrueNumElements = getNumElements(True->getType()); + assert(TrueNumElements >= CondNumElements && + TrueNumElements % CondNumElements == 0 && + "Cannot vectorize Instruction::Select"); + assert(TrueNumElements == getNumElements(False->getType()) && + "Cannot vectorize Instruction::Select"); + if (CondNumElements != TrueNumElements) { + // When the return type is i1 but the source is fixed vector type, we + // need to duplicate the condition value. + Cond = Builder.CreateShuffleVector( + Cond, createReplicatedMask(TrueNumElements / CondNumElements, + CondNumElements)); + } + assert(getNumElements(Cond->getType()) == TrueNumElements && + "Cannot vectorize Instruction::Select"); Value *V = Builder.CreateSelect(Cond, True, False); V = FinalShuffle(V, E, VecTy); diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index c2dc6d0ab73b7..a6e1061189980 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -58,3 +58,33 @@ entry: store <8 x i16> %4, ptr %5, align 2 ret void } + +define void @test3(ptr %x, ptr %y, ptr %z) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> poison, ptr [[X:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> [[TMP0]], ptr [[Y:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x ptr> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[X]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[Y]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]] +; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[Z:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr inbounds i32, ptr %x, i64 4 + %1 = getelementptr inbounds i32, ptr %y, i64 4 + %2 = load <4 x i32>, ptr %x, align 4 + %3 = load <4 x i32>, ptr %0, align 4 + %4 = load <4 x i32>, ptr %y, align 4 + %5 = load <4 x i32>, ptr %1, align 4 + %6 = icmp eq ptr %x, null + %7 = icmp eq ptr %y, null + %8 = select i1 %6, <4 x i32> %2, <4 x i32> %4 + %9 = select i1 %7, <4 x i32> %3, <4 x i32> %5 + %10 = getelementptr inbounds i32, ptr %z, i64 4 + store <4 x i32> %8, ptr %z, align 4 + store <4 x i32> %9, ptr %10, align 4 + ret void +}