diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index bd1bee3a88887..167d36b687580 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2426,6 +2426,12 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe { return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized()); } + /// Set mask number \p Idx to \p V. + void setMask(unsigned Idx, VPValue *V) { + assert((Idx > 0 || !isNormalized()) && "First index has no mask!"); + Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V); + } + void execute(VPTransformState &State) override { llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends"); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 2e9a36adbbf3c..0c27d535b680e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -67,10 +67,6 @@ class VPPredicator { return EdgeMaskCache[{Src, Dst}] = Mask; } - /// Given a phi \p PhiR, try to see if its incoming blocks all share a common - /// edge and return its mask. - VPValue *findCommonEdgeMask(const VPPhi *PhiR) const; - public: /// Returns the precomputed predicate of the edge from \p Src to \p Dst. VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const { @@ -232,21 +228,6 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) { setEdgeMask(Src, DefaultDst, DefaultMask); } -VPValue *VPPredicator::findCommonEdgeMask(const VPPhi *PhiR) const { - VPValue *EdgeMask = getEdgeMask(PhiR->getIncomingBlock(0), PhiR->getParent()); - VPValue *CommonEdgeMask; - if (!EdgeMask || - !match(EdgeMask, m_LogicalAnd(m_VPValue(CommonEdgeMask), m_VPValue()))) - return nullptr; - for (const VPBasicBlock *InVPBB : drop_begin(PhiR->incoming_blocks())) { - EdgeMask = getEdgeMask(InVPBB, PhiR->getParent()); - assert(EdgeMask && "Both null and non-null edge masks found"); - if (!match(EdgeMask, m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue()))) - return nullptr; - } - return CommonEdgeMask; -} - void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { SmallVector Phis; for (VPRecipeBase &R : VPBB->phis()) @@ -258,7 +239,6 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { // be duplications since this is a simple recursive scan, but future // optimizations will clean it up. - VPValue *CommonEdgeMask = findCommonEdgeMask(PhiR); SmallVector OperandsWithMask; for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) { OperandsWithMask.push_back(InVPV); @@ -269,14 +249,6 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { break; } - // If all incoming blocks share a common edge, remove it from the mask. - if (CommonEdgeMask) { - VPValue *X; - if (match(EdgeMask, - m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue(X)))) - EdgeMask = X; - } - OperandsWithMask.push_back(EdgeMask); } PHINode *IRPhi = cast_or_null(PhiR->getUnderlyingValue()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2560b1af2e5aa..7de94717f56e5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1108,6 +1108,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith( Builder.createLogicalAnd(X, Builder.createOr(Y, Z))); + // x && !x -> 0 + if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X))))) + return Def->replaceAllUsesWith(Plan->getOrAddLiveIn( + ConstantInt::getFalse(VPTypeAnalysis(*Plan).inferScalarType(Def)))); + if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) return Def->replaceAllUsesWith(X); @@ -1318,6 +1323,23 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { } } +/// Try to see if all of \p Blend's masks share a common value logically and'ed +/// and remove it from the masks. +static void removeCommonBlendMask(VPBlendRecipe *Blend) { + if (Blend->isNormalized()) + return; + VPValue *CommonEdgeMask; + if (!match(Blend->getMask(0), + m_LogicalAnd(m_VPValue(CommonEdgeMask), m_VPValue()))) + return; + for (unsigned I = 0; I < Blend->getNumIncomingValues(); I++) + if (!match(Blend->getMask(I), + m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue()))) + return; + for (unsigned I = 0; I < Blend->getNumIncomingValues(); I++) + Blend->setMask(I, Blend->getMask(I)->getDefiningRecipe()->getOperand(1)); +} + /// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes /// to make sure the masks are simplified. static void simplifyBlends(VPlan &Plan) { @@ -1328,6 +1350,8 @@ static void simplifyBlends(VPlan &Plan) { if (!Blend) continue; + removeCommonBlendMask(Blend); + // Try to remove redundant blend recipes. SmallPtrSet UniqueValues; if (Blend->isNormalized() || !match(Blend->getMask(0), m_False())) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 8b212f4ef9706..b330b6cd82c0a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -26,13 +26,9 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1001, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i32 [[TMP25]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP25]] to i64 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv8i32() -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult [[TMP10]], [[BROADCAST_SPLAT8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP9:%.*]] = select [[TMP13]], [[TMP14]], zeroinitializer @@ -42,9 +38,8 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP19:%.*]] = select [[TMP17]], [[TMP18]], zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP14]], splat (i1 true) ; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP13]], [[TMP20]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP11]], [[TMP21]], zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0( zeroinitializer, ptr align 2 [[TMP24]], [[TMP22]], i32 [[TMP25]]) diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 306bdc0030154..8c7624e570cf5 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -130,8 +130,7 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI]], <4 x i64> undef +; CHECK-NEXT: [[PREDPHI1:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI1]], <4 x i64> undef ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP1]] @@ -146,7 +145,7 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: store i16 0, ptr [[TMP6]], align 2 ; CHECK-NEXT: store i16 0, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[PREDPHI]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[PREDPHI1]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: