From 29bfcd064cf5d26d03d84857c678173386e565d4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 20 Mar 2025 21:41:54 +0000 Subject: [PATCH 1/3] [VPlan] Invert condition if needed when creating inner regions. --- llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 11 +++++++++++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +++ .../outer-loop-inner-latch-successors.ll | 6 +++--- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index f24d42256caef..b803081804aac 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -420,6 +420,17 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; + // We are canonicalizing the successors of the latch when introducing the + // region. We will exit the region of the latch condition is true; invert the + // original condition if the original CFG branches to the header on true. + if (!LatchVPBB->getSingleSuccessor() && + LatchVPBB->getSuccessors()[0] == HeaderVPB) { + auto *Term = cast(LatchVPBB)->getTerminator(); + auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)}); + Not->insertBefore(Term); + Term->setOperand(0, Not); + } + VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB); VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB); VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3d82742f0adab..ac12b0923b5ab 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -55,6 +55,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( make_early_inc_range(make_range(VPBB->begin(), EndIter))) { VPValue *VPV = Ingredient.getVPSingleValue(); + if (!VPV->getUnderlyingValue()) + continue; + Instruction *Inst = cast(VPV->getUnderlyingValue()); VPRecipeBase *NewRecipe = nullptr; diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll index 388da8540646f..afd1308a2d24a 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll @@ -4,7 +4,6 @@ @A = common global [1024 x i64] zeroinitializer, align 16 @B = common global [1024 x i64] zeroinitializer, align 16 -; FIXME: The exit condition of the inner loop is incorrect when vectorizing. define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-LABEL: define void @inner_latch_header_first_successor( ; CHECK-SAME: i64 [[N:%.*]], i32 [[C:%.*]], i64 [[M:%.*]]) { @@ -35,8 +34,9 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[TMP3]] = add nsw <4 x i64> [[TMP2]], [[VEC_PHI4]] ; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 -; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[INNER3]] +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 +; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_LATCH]], label %[[INNER3]] ; CHECK: [[VECTOR_LATCH]]: ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[VEC_PHI6]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) From fa812b5c298cdfd75ff30eaa07623f8e7e6e5230 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 26 Apr 2025 21:28:08 +0100 Subject: [PATCH 2/3] !fixup address latest comments, thanks --- .../Vectorize/VPlanConstruction.cpp | 65 +++++++++++-------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index b803081804aac..9ae6d333c5310 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -386,51 +386,60 @@ std::unique_ptr VPlanTransforms::buildPlainCFG( /// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it /// has exactly 2 predecessors (preheader and latch), where the block /// dominates the latch and the preheader dominates the block. If it is a -/// header block return true, making sure the preheader appears first and -/// the latch second. Otherwise return false. -static bool canonicalHeader(VPBlockBase *HeaderVPB, - const VPDominatorTree &VPDT) { +/// header block return true and canonicalize the predecessors of the header +/// (making sure the preheader appears first and the latch second) and the +/// successors of the latch (making sure the loop exit comes first). Otherwise +/// return false. +static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, + const VPDominatorTree &VPDT) { ArrayRef Preds = HeaderVPB->getPredecessors(); if (Preds.size() != 2) return false; auto *PreheaderVPBB = Preds[0]; auto *LatchVPBB = Preds[1]; - if (VPDT.dominates(PreheaderVPBB, HeaderVPB) && - VPDT.dominates(HeaderVPB, LatchVPBB)) - return true; - - std::swap(PreheaderVPBB, LatchVPBB); + if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) || + !VPDT.dominates(HeaderVPB, LatchVPBB)) { + std::swap(PreheaderVPBB, LatchVPBB); - if (VPDT.dominates(PreheaderVPBB, HeaderVPB) && - VPDT.dominates(HeaderVPB, LatchVPBB)) { - // Canonicalize predecessors of header so that preheader is first and latch - // second. - HeaderVPB->swapPredecessors(); - for (VPRecipeBase &R : cast(HeaderVPB)->phis()) - R.swapOperands(); - return true; + if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) || + !VPDT.dominates(HeaderVPB, LatchVPBB)) { + return false; + } else { + // Canonicalize predecessors of header so that preheader is first and + // latch second. + HeaderVPB->swapPredecessors(); + for (VPRecipeBase &R : cast(HeaderVPB)->phis()) + R.swapOperands(); + } } - return false; -} - -/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB. -static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { - auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; - auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; - - // We are canonicalizing the successors of the latch when introducing the - // region. We will exit the region of the latch condition is true; invert the + // The two successors of conditional branch match the condition, with the + // first successor corresponding to true and the second to false. We + // canonicalize the successors of the latch when introducing the region, such + // that the latch exits the region when its condition is true; invert the // original condition if the original CFG branches to the header on true. if (!LatchVPBB->getSingleSuccessor() && LatchVPBB->getSuccessors()[0] == HeaderVPB) { + assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors"); auto *Term = cast(LatchVPBB)->getTerminator(); + assert(cast(Term)->getOpcode() == + VPInstruction::BranchOnCond && + "terminator must be a BranchOnCond"); auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)}); Not->insertBefore(Term); Term->setOperand(0, Not); + LatchVPBB->swapSuccessors(); } + return true; +} + +/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB. +static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { + auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; + auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; + VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB); VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB); VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); @@ -458,7 +467,7 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, VPDominatorTree VPDT; VPDT.recalculate(Plan); for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry())) - if (canonicalHeader(HeaderVPB, VPDT)) + if (canonicalHeaderAndLatch(HeaderVPB, VPDT)) createLoopRegion(Plan, HeaderVPB); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); From 633934663e3ac969488e9f59dc451904953fa77c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 27 Apr 2025 20:38:10 +0100 Subject: [PATCH 3/3] !fixup address latest comments, thanks --- .../Vectorize/VPlanConstruction.cpp | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 70fb852e87dda..5eb2f058f329f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -403,15 +403,14 @@ static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, std::swap(PreheaderVPBB, LatchVPBB); if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) || - !VPDT.dominates(HeaderVPB, LatchVPBB)) { + !VPDT.dominates(HeaderVPB, LatchVPBB)) return false; - } else { - // Canonicalize predecessors of header so that preheader is first and - // latch second. - HeaderVPB->swapPredecessors(); - for (VPRecipeBase &R : cast(HeaderVPB)->phis()) - R.swapOperands(); - } + + // Canonicalize predecessors of header so that preheader is first and + // latch second. + HeaderVPB->swapPredecessors(); + for (VPRecipeBase &R : cast(HeaderVPB)->phis()) + R.swapOperands(); } // The two successors of conditional branch match the condition, with the @@ -419,18 +418,20 @@ static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, // canonicalize the successors of the latch when introducing the region, such // that the latch exits the region when its condition is true; invert the // original condition if the original CFG branches to the header on true. - if (!LatchVPBB->getSingleSuccessor() && - LatchVPBB->getSuccessors()[0] == HeaderVPB) { - assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors"); - auto *Term = cast(LatchVPBB)->getTerminator(); - assert(cast(Term)->getOpcode() == - VPInstruction::BranchOnCond && - "terminator must be a BranchOnCond"); - auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)}); - Not->insertBefore(Term); - Term->setOperand(0, Not); - LatchVPBB->swapSuccessors(); - } + // Note that the exit edge is not yet connected for top-level loops. + if (LatchVPBB->getSingleSuccessor() || + LatchVPBB->getSuccessors()[0] != HeaderVPB) + return true; + + assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors"); + auto *Term = cast(LatchVPBB)->getTerminator(); + assert(cast(Term)->getOpcode() == + VPInstruction::BranchOnCond && + "terminator must be a BranchOnCond"); + auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)}); + Not->insertBefore(Term); + Term->setOperand(0, Not); + LatchVPBB->swapSuccessors(); return true; }