diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 29f3940ed6fa7..bdd419f909603 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7647,8 +7647,19 @@ DenseMap LoopVectorizationPlanner::executePlan( (!VectorizingEpilogue && !ExpandedSCEVs)) && "expanded SCEVs to reuse can only be used during epilogue vectorization"); - // TODO: Move to VPlan transform stage once the transition to the VPlan-based - // cost model is complete for better cost estimates. + bool IVUpdateMayOverflow = + !isIndvarOverflowCheckKnownFalse(&CM, BestVF, BestUF); + TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow); + bool WithoutRuntimeCheck = + Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; + // Use NUW for the induction increment if we proved that it won't overflow in + // the vector loop or when not folding the tail. In the latter case, we know + // that the canonical induction increment will not overflow as the vector trip + // count is >= increment and a multiple of the increment. + bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None; + // TODO: Move transforms to VPlan transform stage once the transition to the + // VPlan-based cost model is complete for better cost estimates. + VPlanTransforms::convertCanonicalIV(BestVPlan, HasNUW, WithoutRuntimeCheck); VPlanTransforms::unrollByUF(BestVPlan, BestUF, OrigLoop->getHeader()->getContext()); VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE); @@ -8902,29 +8913,26 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, } } -// Add the necessary canonical IV and branch recipes required to control the -// loop. -static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, - DebugLoc DL) { +// Add the required canonical IV along with its loop branch, but w/o its +// increment - which is introduced later. +static void addCanonicalIV(VPlan &Plan, Type *IdxTy, DebugLoc DL) { Value *StartIdx = ConstantInt::get(IdxTy, 0); auto *StartV = Plan.getOrAddLiveIn(StartIdx); // Add a VPCanonicalIVPHIRecipe starting at 0 to the header. + // TODO: Introduce a separate scalar phi recipe that can be used for codegen, + // turning VPCanonicalIVPHIRecipe into an 'abstract' recipe which cannot be + // executed directly. auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = TopRegion->getEntryBasicBlock(); Header->insert(CanonicalIVPHI, Header->begin()); - VPBuilder Builder(TopRegion->getExitingBasicBlock()); - // Add a VPInstruction to increment the scalar canonical IV by VF * UF. - auto *CanonicalIVIncrement = Builder.createOverflowingOp( - Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL, - "index.next"); - CanonicalIVPHI->addOperand(CanonicalIVIncrement); - // Add the BranchOnCount VPInstruction to the latch. + VPBuilder Builder(TopRegion->getExitingBasicBlock()); + // TODO: introduce branch-on-count during VPlan final (pre-codegen) lowering. Builder.createNaryOp(VPInstruction::BranchOnCount, - {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); + {CanonicalIVPHI, &Plan.getVectorTripCount()}, DL); } /// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the @@ -9230,22 +9238,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { PSE, RequiresScalarEpilogueCheck, CM.foldTailByMasking(), OrigLoop); - // Don't use getDecisionAndClampRange here, because we don't know the UF - // so this function is better to be conservative, rather than to split - // it up into different VPlans. - // TODO: Consider using getDecisionAndClampRange here to split up VPlans. - bool IVUpdateMayOverflow = false; - for (ElementCount VF : Range) - IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF); - DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); - TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow); - // Use NUW for the induction increment if we proved that it won't overflow in - // the vector loop or when not folding the tail. In the later case, we know - // that the canonical induction increment will not overflow as the vector trip - // count is >= increment and a multiple of the increment. - bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None; - addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL); + addCanonicalIV(*Plan, Legal->getWidestInductionType(), DL); VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, Builder); @@ -9471,6 +9465,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder)) return nullptr; + // Don't use getDecisionAndClampRange here, because we don't know the UF + // so this function is better to be conservative, rather than to split + // it up into different VPlans. + // TODO: Consider using getDecisionAndClampRange here to split up VPlans. + bool IVUpdateMayOverflow = false; + for (ElementCount VF : Range) + IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF); + + TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow); if (useActiveLaneMask(Style)) { // TODO: Move checks to VPlanTransforms::addActiveLaneMask once // TailFoldingStyle is visible there. @@ -9516,11 +9519,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { Plan->getVectorLoopRegion()->getExitingBasicBlock()->getTerminator(); Term->eraseFromParent(); - // Tail folding is not supported for outer loops, so the induction increment - // is guaranteed to not wrap. - bool HasNUW = true; - addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, - DebugLoc()); + addCanonicalIV(*Plan, Legal->getWidestInductionType(), DebugLoc()); // Collect mapping of IR header phis to header phi recipes, to be used in // addScalarResumePhis. @@ -10272,7 +10271,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, isa(U) || isa(U) || cast(U)->getOpcode() == - Instruction::Add; + VPInstruction::BranchOnCount; }) && "the canonical IV should only be used by its increment or " "ScalarIVSteps when resetting the start value"); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index f1228368804be..f26c3bffb07c7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -20,6 +20,7 @@ #include "LoopVectorizationPlanner.h" #include "VPlanCFG.h" #include "VPlanPatternMatch.h" +#include "VPlanVerifier.h" #include "VPlanTransforms.h" #include "VPlanUtils.h" #include "llvm/ADT/PostOrderIterator.h" diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index db45ad8aadbbe..b431977233eb6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3239,7 +3239,8 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { VPCanonicalIVPHIRecipe *clone() override { auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc()); - R->addOperand(getBackedgeValue()); + if (getNumOperands() == 2) + R->addOperand(getBackedgeValue()); return R; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9febd612c644e..1c743276d9f17 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1454,52 +1454,30 @@ void VPlanTransforms::optimize(VPlan &Plan) { licm(Plan); } -// Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace -// the loop terminator with a branch-on-cond recipe with the negated -// active-lane-mask as operand. Note that this turns the loop into an -// uncountable one. Only the existing terminator is replaced, all other existing -// recipes/users remain unchanged, except for poison-generating flags being -// dropped from the canonical IV increment. Return the created +// Add a VPActiveLaneMaskPHIRecipe and recipes to compute its start lane mask to +// \p Plan, but w/o its increment in the loop region or adjusting the exit +// conditions of the loop region - those are adjusted later. Return the created // VPActiveLaneMaskPHIRecipe. // // The function uses the following definitions: // -// %TripCount = DataWithControlFlowWithoutRuntimeCheck ? -// calculate-trip-count-minus-VF (original TC) : original TC -// %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ? -// CanonicalIVPhi : CanonicalIVIncrement // %StartV is the canonical induction start value. // // The function adds the following recipes: // // vector.ph: -// %TripCount = calculate-trip-count-minus-VF (original TC) -// [if DataWithControlFlowWithoutRuntimeCheck] // %EntryInc = canonical-iv-increment-for-part %StartV -// %EntryALM = active-lane-mask %EntryInc, %TripCount +// %EntryALM = active-lane-mask %EntryInc, original trip count // // vector.body: // ... -// %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ] -// ... -// %InLoopInc = canonical-iv-increment-for-part %IncrementValue -// %ALM = active-lane-mask %InLoopInc, TripCount -// %Negated = Not %ALM -// branch-on-cond %Negated +// %P = active-lane-mask-phi [ %EntryALM, %vector.ph ] // -static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( - VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) { - VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); - VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); +static VPActiveLaneMaskPHIRecipe *createActiveLaneMaskPhi(VPlan &Plan) { auto *CanonicalIVPHI = Plan.getCanonicalIV(); VPValue *StartV = CanonicalIVPHI->getStartValue(); - auto *CanonicalIVIncrement = - cast(CanonicalIVPHI->getBackedgeValue()); - // TODO: Check if dropping the flags is needed if - // !DataAndControlFlowWithoutRuntimeCheck. - CanonicalIVIncrement->dropPoisonGeneratingFlags(); - DebugLoc DL = CanonicalIVIncrement->getDebugLoc(); + DebugLoc DL = CanonicalIVPHI->getDebugLoc(); // We can't use StartV directly in the ActiveLaneMask VPInstruction, since // we have to take unrolling into account. Each part needs to start at // Part * VF @@ -1509,21 +1487,6 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // Create the ActiveLaneMask instruction using the correct start values. VPValue *TC = Plan.getTripCount(); - VPValue *TripCount, *IncrementValue; - if (!DataAndControlFlowWithoutRuntimeCheck) { - // When the loop is guarded by a runtime overflow check for the loop - // induction variable increment by VF, we can increment the value before - // the get.active.lane mask and use the unmodified tripcount. - IncrementValue = CanonicalIVIncrement; - TripCount = TC; - } else { - // When avoiding a runtime check, the active.lane.mask inside the loop - // uses a modified trip count and the induction variable increment is - // done after the active.lane.mask intrinsic is called. - IncrementValue = CanonicalIVPHI; - TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF, - {TC}, DL); - } auto *EntryIncrement = Builder.createOverflowingOp( VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL, "index.part.next"); @@ -1537,24 +1500,6 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // preheader ActiveLaneMask instruction. auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc()); LaneMaskPhi->insertAfter(CanonicalIVPHI); - - // Create the active lane mask for the next iteration of the loop before the - // original terminator. - VPRecipeBase *OriginalTerminator = EB->getTerminator(); - Builder.setInsertPoint(OriginalTerminator); - auto *InLoopIncrement = - Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart, - {IncrementValue}, {false, false}, DL); - auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, - {InLoopIncrement, TripCount}, DL, - "active.lane.mask.next"); - LaneMaskPhi->addOperand(ALM); - - // Replace the original terminator with BranchOnCond. We have to invert the - // mask here because a true condition means jumping to the exit block. - auto *NotMask = Builder.createNot(ALM, DL); - Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL); - OriginalTerminator->eraseFromParent(); return LaneMaskPhi; } @@ -1620,8 +1565,7 @@ void VPlanTransforms::addActiveLaneMask( cast(*FoundWidenCanonicalIVUser); VPSingleDefRecipe *LaneMask; if (UseActiveLaneMaskForControlFlow) { - LaneMask = addVPLaneMaskPhiAndUpdateExitBranch( - Plan, DataAndControlFlowWithoutRuntimeCheck); + LaneMask = createActiveLaneMaskPhi(Plan); } else { VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask, @@ -1823,6 +1767,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength( auto *CanonicalIVPHI = Plan.getCanonicalIV(); VPValue *StartV = CanonicalIVPHI->getStartValue(); + VPBasicBlock *Latch = Plan.getVectorLoopRegion()->getExitingBasicBlock(); // Create the ExplicitVectorLengthPhi recipe in the main loop. auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); @@ -1841,30 +1786,28 @@ bool VPlanTransforms::tryAddExplicitVectorLength( auto *VPEVL = Builder.createNaryOp(VPInstruction::ExplicitVectorLength, AVL, DebugLoc()); - auto *CanonicalIVIncrement = - cast(CanonicalIVPHI->getBackedgeValue()); VPSingleDefRecipe *OpVPEVL = VPEVL; + VPRecipeBase *LatchBranch = Latch->getTerminator(); if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits(); IVSize != 32) { OpVPEVL = new VPScalarCastRecipe( IVSize < 32 ? Instruction::Trunc : Instruction::ZExt, OpVPEVL, - CanonicalIVPHI->getScalarType(), CanonicalIVIncrement->getDebugLoc()); - OpVPEVL->insertBefore(CanonicalIVIncrement); + CanonicalIVPHI->getScalarType(), CanonicalIVPHI->getDebugLoc()); + OpVPEVL->insertBefore(LatchBranch); } + // TODO: Set flags when introducing the increment here. auto *NextEVLIV = - new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi}, - {CanonicalIVIncrement->hasNoUnsignedWrap(), - CanonicalIVIncrement->hasNoSignedWrap()}, - CanonicalIVIncrement->getDebugLoc(), "index.evl.next"); - NextEVLIV->insertBefore(CanonicalIVIncrement); + new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi}, {false, false}, + CanonicalIVPHI->getDebugLoc(), "index.evl.next"); + NextEVLIV->insertBefore(LatchBranch); EVLPhi->addOperand(NextEVLIV); transformRecipestoEVLRecipes(Plan, *VPEVL); - // Replace all uses of VPCanonicalIVPHIRecipe by - // VPEVLBasedIVPHIRecipe except for the canonical IV increment. - CanonicalIVPHI->replaceAllUsesWith(EVLPhi); - CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); + // Replace all uses of VPCanonicalIVPHIRecipe by VPEVLBasedIVPHIRecipe except + // LatchBranch. + CanonicalIVPHI->replaceUsesWithIf( + EVLPhi, [LatchBranch](VPUser &U, unsigned) { return &U != LatchBranch; }); // TODO: support unroll factor > 1. Plan.setUF(1); return true; @@ -2043,6 +1986,98 @@ void VPlanTransforms::createInterleaveGroups( } } +void VPlanTransforms::convertCanonicalIV( + VPlan &Plan, bool HasNUW, bool DataAndControlFlowWithoutRuntimeCheck) { + auto *CanIV = Plan.getCanonicalIV(); + + VPBasicBlock *Latch = Plan.getVectorLoopRegion()->getExitingBasicBlock(); + auto *LatchTerm = Latch->getTerminator(); + VPBuilder Builder(LatchTerm); + DebugLoc DL = CanIV->getDebugLoc(); + // Add a VPInstruction to increment the scalar canonical IV by VF * UF. + auto *CanonicalIVIncrement = + Builder.createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()}, + {HasNUW, false}, DL, "index.next"); + CanIV->addOperand(CanonicalIVIncrement); + + auto FoundLaneMaskPhi = find_if( + Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(), + [](VPRecipeBase &P) { return isa(P); }); + + if (FoundLaneMaskPhi == + Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis().end()) { + + // Update BranchOnCount VPInstruction in the latch to use increment. + // TODO: Should have separate opcodes for separate semantics. + LatchTerm->setOperand(0, CanonicalIVIncrement); + return; + } + + // Now adjust the start value of the active-lane-mask depending on + // DataAndControlFlowWithoutRuntimeCheck, introduce its increment and a + // conditional branch to control the loop until the lane mask is exhausted. + // Concretely, we add the following recipes: + // + // vector.ph: + // %TripCount = calculate-trip-count-minus-VF (original TC) + // [if DataWithControlFlowWithoutRuntimeCheck] + // %EntryInc = canonical-iv-increment-for-part %StartV + // %EntryALM = active-lane-mask %EntryInc, %TripCount (replaces the + // existing start value for the existing active-lane-mask-phi) + // + // vector.body: + // ... + // (existing) %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], + // (added increment) [ %ALM, %vector.body ] + // ... + // %InLoopInc = canonical-iv-increment-for-part %IncrementValue + // %ALM = active-lane-mask %InLoopInc, TripCount + // %Negated = Not %ALM + // branch-on-cond %Negated (replaces existing BranchOnCount) + + auto *LaneMaskPhi = cast(&*FoundLaneMaskPhi); + auto *VecPreheader = + cast(Plan.getVectorLoopRegion()->getSinglePredecessor()); + Builder.setInsertPoint(VecPreheader); + + VPValue *TC = Plan.getTripCount(); + + // TODO: Check if dropping the flags is needed if + // !DataAndControlFlowWithoutRuntimeCheck. + CanonicalIVIncrement->dropPoisonGeneratingFlags(); + VPValue *TripCount, *IncrementValue; + if (!DataAndControlFlowWithoutRuntimeCheck) { + // When the loop is guarded by a runtime overflow check for the loop + // induction variable increment by VF, we can increment the value before + // the get.active.lane mask and use the unmodified tripcount. + IncrementValue = CanonicalIVIncrement; + TripCount = TC; + } else { + // When avoiding a runtime check, the active.lane.mask inside the loop + // uses a modified trip count and the induction variable increment is + // done after the active.lane.mask intrinsic is called. + IncrementValue = CanIV; + TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF, + {TC}, DL); + } + // Create the active lane mask for the next iteration of the loop before the + // original terminator. + Builder.setInsertPoint(Latch); + auto *InLoopIncrement = + Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart, + {IncrementValue}, {false, false}, DL); + auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, + {InLoopIncrement, TripCount}, DL, + "active.lane.mask.next"); + LaneMaskPhi->addOperand(ALM); + + // Replace the original terminator with BranchOnCond. We have to invert the + // mask here because a true condition means jumping to the exit block. + auto *NotMask = Builder.createNot(ALM, DL); + Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL); + LatchTerm->eraseFromParent(); +} + void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index a751b8b5e8dc5..49e7817275698 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -135,6 +135,14 @@ struct VPlanTransforms { BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder); + /// Complete the canonical IV of \p Plan by introducing an explicit increment + /// between its header phi and latch branch on count. If there is an + /// active-lane-mask PHI, adjust the start value depending on \p + /// DataAndControlFlowWithoutRuntimeCheck, add its increment and adjust the + /// loop region's terminator to BranchOnCond based on the active-lane-mask. + static void convertCanonicalIV(VPlan &Plan, bool HasNUW, + bool DataAndControlFlowWithoutRuntimeCheck); + /// Lower abstract recipes to concrete ones, that can be codegen'd. static void convertToConcreteRecipes(VPlan &Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index e40af3e2e3d30..5049715ac99a9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -90,8 +90,9 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { } auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV(); - // Canonical IV chain is uniform. - if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue()) + // Canonical IV chain is uniform and so is its backedge value if it exists. + if (V == CanonicalIV || (CanonicalIV->getNumOperands() == 2 && + V == CanonicalIV->getBackedgeValue())) return true; return TypeSwitch(R) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 2c37593be7861..d63e78200e47d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -118,12 +118,12 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] @@ -242,12 +242,12 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]]) ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll index c3e8c895fce24..8ef57bee0d380 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll @@ -10,10 +10,10 @@ define i64 @test(ptr %a, ptr %b) #0 { ; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0> ; CHECK: Cost for VF 8: 30 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0> ; CHECK: Cost for VF 16: 56 ; CHECK: LV: Selecting VF: 16 entry: @@ -44,11 +44,11 @@ define i64 @test_external_iv_user(ptr %a, ptr %b) #0 { ; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0> ; CHECK: Cost for VF 8: 30 ; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0> ; CHECK: Cost for VF 16: 57 ; CHECK: LV: Selecting VF: vscale x 2 entry: @@ -81,11 +81,11 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 { ; CHECK-NEXT: Cost of 1 for VF 8: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0> ; CHECK: Cost for VF 8: 24 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0> ; CHECK: Cost for VF 16: 42 ; CHECK: LV: Selecting VF: 16 entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 56a468ed1310b..e14e724ed05a4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -112,15 +112,15 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[TMP0]]) +; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 +; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; PRED-NEXT: [[TMP16:%.*]] = trunc [[BROADCAST_SPLAT]] to ; PRED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 ; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], [[TMP12]] ; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], [[TMP12]] ; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[TMP0]]) -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PRED-NEXT: [[TMP16:%.*]] = trunc [[BROADCAST_SPLAT]] to ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -280,10 +280,10 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1 ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]]) ; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]]) ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[MUL_X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -454,10 +454,10 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 +; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP0]]) ; PRED-NEXT: [[TMP14:%.*]] = sub i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP14]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP0]]) ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[MUL]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -651,10 +651,10 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 +; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP0]]) ; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 4 ; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP0]]) ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index cb4fd04d1bc4f..aac12a13fd4ab 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -141,7 +141,6 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -653,11 +652,11 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-ORDERED-TF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] ; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -915,7 +914,6 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[TMP7]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], [[TMP7]] ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1154,7 +1152,6 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 1f7d0b745f929..e4704ff4579e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -53,12 +53,12 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll index f61d473a12ddb..407d27f05ed9e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -78,10 +78,10 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index bd8fe69a44630..a52e7f54e4be3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -20,12 +20,12 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -75,12 +75,12 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 +; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-IN-LOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-IN-LOOP: vector.body: ; CHECK-IN-LOOP-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -148,12 +148,12 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -202,12 +202,12 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-IN-LOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-IN-LOOP: vector.body: ; CHECK-IN-LOOP-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -273,12 +273,12 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -341,12 +341,12 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 +; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-IN-LOOP-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-IN-LOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-IN-LOOP: vector.body: ; CHECK-IN-LOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 3f33ee883ac48..ec45df0f91ec8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -18,11 +18,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP61]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 ; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP26]] @@ -36,6 +31,11 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[UMAX]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[UMAX]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -122,11 +122,6 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP83]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 ; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP26]] @@ -140,6 +135,11 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[UMAX]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[UMAX]]) +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16 +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[UMAX]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp ugt i64 [[UMAX]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i64 [[TMP22]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -198,10 +198,10 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[TMP91:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP92:%.*]] = mul i64 [[TMP91]], 12 ; CHECK-NEXT: [[TMP93:%.*]] = add i64 [[INDEX6]], [[TMP92]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX6]], i64 [[TMP9]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP87]], i64 [[TMP9]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT15]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP90]], i64 [[TMP9]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP9]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX6]], i64 [[TMP24]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP87]], i64 [[TMP24]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT15]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP90]], i64 [[TMP24]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP24]]) ; CHECK-NEXT: [[TMP94:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP98:%.*]] = extractelement [[TMP94]], i32 0 ; CHECK-NEXT: br i1 [[TMP98]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 75b2df93c9350..cd9c368129ab0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -18,12 +18,12 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -79,10 +79,10 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[UMAX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[UMAX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[TMP0]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -143,12 +143,12 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -218,12 +218,12 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP13]], splat (i64 4) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP15]] @@ -294,12 +294,12 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] @@ -371,12 +371,12 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -444,12 +444,12 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -540,12 +540,12 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -610,12 +610,12 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] @@ -688,12 +688,12 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll index c119248c0be43..fb487c2a20489 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll @@ -12,7 +12,6 @@ target triple = "aarch64-unknown-linux-gnu" ;; Check that the scalar plan contains the original instructions. ; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { -; CHECK-NEXT: Live-in [[VFxUF:.*]] = VF * UF ; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count ; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count ; CHECK-EMPTY: @@ -24,7 +23,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]] +; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1> ; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] ; CHECK-NEXT: CLONE [[IDX:.*]] = load [[GEP_IDX]] @@ -33,8 +32,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE [[HISTVAL:.*]] = load [[GEP_BUCKET]] ; CHECK-NEXT: CLONE [[UPDATE:.*]] = add nsw [[HISTVAL]], ir<1> ; CHECK-NEXT: CLONE store [[UPDATE]], [[GEP_BUCKET]] -; CHECK-NEXT: EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]] -; CHECK-NEXT: EMIT branch-on-count [[IV_NEXT]], [[VTC]] +; CHECK-NEXT: EMIT branch-on-count [[IV]], [[VTC]] ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -59,7 +57,6 @@ target triple = "aarch64-unknown-linux-gnu" ;; Check that the vectorized plan contains a histogram recipe instead. ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2,vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in [[VFxUF:.*]] = VF * UF ; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count ; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count ; CHECK-EMPTY: @@ -71,7 +68,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]] +; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1> ; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] ; CHECK-NEXT: [[VECP_IDX:vp.*]] = vector-pointer [[GEP_IDX]] @@ -79,8 +76,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: WIDEN-CAST [[EXT_IDX:.*]] = zext [[IDX]] to i64 ; CHECK-NEXT: WIDEN-GEP Inv[Var] [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]] ; CHECK-NEXT: WIDEN-HISTOGRAM buckets: [[GEP_BUCKET]], inc: ir<1> -; CHECK-NEXT: EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]] -; CHECK-NEXT: EMIT branch-on-count [[IV_NEXT]], [[VTC]] +; CHECK-NEXT: EMIT branch-on-count [[IV]], [[VTC]] ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 30ee30b595fcb..d922254089160 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -685,10 +685,10 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP5]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index 0c246c6ee93e3..b28cdd12e71d1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -10,8 +10,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-LABEL: LV: Checking a loop in 'test_v4_v4m' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -31,8 +29,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -56,8 +52,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: } ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -77,8 +71,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -107,8 +99,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4m' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -128,8 +118,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXST:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -153,8 +141,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: } ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -174,8 +160,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -203,8 +187,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -224,8 +206,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -249,8 +229,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: } ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -270,8 +248,6 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 0cfb2d5edcc63..0a0894b2d14bd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -211,12 +211,12 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll index ce8492cd77362..52a032ee27c83 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll @@ -12,10 +12,10 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -38,12 +38,12 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 -; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]]) -; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 1 +; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 [[N]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 [[N]]) +; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; INTERLEAVE-NEXT: [[TMP6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP5]]) ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -67,8 +67,8 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl i64 [[TMP15]], 1 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP6]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP6]]) ; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: @@ -98,10 +98,10 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -124,12 +124,12 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 -; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]]) -; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 1 +; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 [[N]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP3]], i64 [[N]]) +; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; INTERLEAVE-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; INTERLEAVE-NEXT: [[TMP6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP5]]) ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -153,8 +153,8 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl i64 [[TMP15]], 1 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) -; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP6]]) +; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP6]]) ; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: @@ -200,9 +200,9 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; INTERLEAVE-LABEL: define void @test_uniform_not_invariant ; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; INTERLEAVE-NEXT: entry: -; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 2) ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = icmp ne i64 [[N]], 0 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ugt i64 [[N]], 1 +; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 2) ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index ccf8540b4ebf7..8715d03bd1f58 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -8,7 +8,6 @@ target triple = "aarch64-none-unknown-elf" define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK: VPlan 'Initial VPlan for VF={8,16},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -20,7 +19,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi ir<0>, ir<[[REDUCE:%.+]]> (VF scaled by 1/4) ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]> @@ -33,8 +32,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> ; CHECK-NEXT: PARTIAL-REDUCE ir<[[REDUCE]]> = add ir<%mul>, ir<[[ACC]]> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -42,7 +40,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, ir<[[REDUCE]]> ; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-from-end vp<[[RED_RESULT]]>, ir<1> -; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<%1> +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index a119707bed120..5786feef21337 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -7,9 +7,7 @@ target triple = "arm64-apple-ios" ; CHECK-LABEL: LV: Checking a loop in 'test' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count - ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -29,8 +27,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: WIDEN-CALL ir<%s> = call reassoc nnan ninf nsz arcp contract afn @llvm.sin.f64(ir<%conv>) (using library function: __simd_sin_v2f64) ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -54,7 +51,6 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: } ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -75,8 +71,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: WIDEN-INTRINSIC ir<%s> = call reassoc nnan ninf nsz arcp contract afn llvm.sin(ir<%conv>) ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll index 31d5149272c22..35308d33c2b1a 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll @@ -9,7 +9,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; CHECK-LABEL: VPlan 'Initial VPlan for VF={2,4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<%N> = original trip-count @@ -22,7 +21,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_INC:%.*]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.store @@ -49,8 +48,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; CHECK-NEXT: Successor(s): for.body.2 ; CHECK-EMPTY: ; CHECK-NEXT: for.body.2: -; CHECK-NEXT: EMIT vp<[[CAN_INC:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -76,7 +74,6 @@ for.cond.cleanup: define void @safe_dep(ptr %p) { ; CHECK-LABEL: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<512> = original trip-count ; CHECK-EMPTY: @@ -88,7 +85,7 @@ define void @safe_dep(ptr %p) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_INC:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%a1> = getelementptr ir<%p>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VPTR1:%.+]]> = vector-pointer ir<%a1> @@ -97,8 +94,7 @@ define void @safe_dep(ptr %p) { ; CHECK-NEXT: CLONE ir<%a2> = getelementptr ir<%p>, ir<%offset> ; CHECK-NEXT: vp<[[VPTR2:%.+]]> = vector-pointer ir<%a2> ; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%v> -; CHECK-NEXT: EMIT vp<[[CAN_INC]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index f630f4f21e065..e1d4d2b0d6015 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -54,7 +54,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: @@ -82,8 +81,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -303,7 +301,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: @@ -331,8 +328,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv1> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 68b36f23de4b0..fd334ffbcf7d5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -33,7 +33,7 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.vp.trunc.nxv1i8.nxv1i16( [[VP_OP1]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0( [[TMP8]], align 1 zeroinitializer, splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll index 1af03e740ef1a..f348f735d4bb3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll @@ -12,7 +12,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL: VPlan 'Initial VPlan for VF={4},UF>=1' { -; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF ; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count ; IF-EVL-NEXT: Live-in vp<[[BETC:%[0-9]+]]> = backedge-taken count ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count @@ -36,13 +35,11 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[MASK]]> -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> -; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV]]>, vp<[[VTC]]> ; IF-EVL-NEXT: No successors ; IF-EVL-NEXT: } ; NO-VP: VPlan 'Initial VPlan for VF={4},UF>=1' { -; NO-VP-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF ; NO-VP-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count ; NO-VP-NEXT: Live-in ir<%N> = original trip-count ; NO-VP-EMPTY: @@ -63,8 +60,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> -; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> -; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; NO-VP-NEXT: EMIT branch-on-count vp<[[IV]]>, vp<[[VTC]]> ; NO-VP-NEXT: No successors ; NO-VP-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 32d32a64049ac..e92b9c7e20016 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -5,7 +5,6 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-LABEL: 'test_chained_first_order_recurrences_1' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -29,8 +28,7 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: WIDEN ir<%add> = add vp<[[FOR1_SPLICE]]>, vp<[[FOR2_SPLICE]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.ptr> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -81,7 +79,6 @@ exit: define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-LABEL: 'test_chained_first_order_recurrences_3' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -108,8 +105,7 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: WIDEN ir<%add.2> = add ir<%add.1>, vp<[[FOR3_SPLICE]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.ptr> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add.2> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -169,7 +165,6 @@ exit: define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-LABEL: 'test_chained_first_order_recurrences_4' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<4098> = original trip-count ; CHECK-EMPTY: @@ -182,7 +177,7 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -193,8 +188,7 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%for.y.i64> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -245,7 +239,6 @@ ret: define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-LABEL: 'test_chained_first_order_recurrences_5_hoist_to_load' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<4098> = original trip-count ; CHECK-EMPTY: @@ -257,7 +250,7 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -271,8 +264,7 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: WIDEN-CAST ir<%for.y.i64> = sext vp<[[SPLICE_Y]]> to i64 ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%for.y.i64> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 0b2e7fe484390..3ec5d518d8fd5 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -8,8 +8,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize { ; CHECK-LABEL: sink_replicate_region_1 -; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK: VPlan 'Final VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count @@ -118,8 +117,7 @@ exit: define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-LABEL: sink_replicate_region_2 -; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK: VPlan 'Final VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count @@ -207,7 +205,7 @@ exit: define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-LABEL: sink_replicate_region_3_reduction -; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK: VPlan 'Final VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count @@ -404,7 +402,7 @@ exit: ; Test case that requires sinking a recipe in a replicate region after another replicate region. define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias %dst.2, i32 %x, i8 %y) optsize { ; CHECK-LABEL: sink_replicate_region_after_replicate_region -; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK: VPlan 'Final VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count @@ -498,7 +496,7 @@ exit: ; preds = %loop define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias %dst) { ; CHECK-LABEL: need_new_block_after_sinking_pr56146 -; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK: VPlan 'Final VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll index 8759fd0872c10..ecd90930b0bc5 100644 --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -36,7 +36,7 @@ for.end: ; Check for crash exposed by D76992. ; CHECK-LABEL: 'test' -; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK: VPlan 'Final VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll index 50755f8f05e35..626495f22edae 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable { ; CHECK: digraph VPlan { -; CHECK-NEXT: graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1\nLive-in vp\<[[VFxUF:%.+]]\> = VF * UF\nLive-in vp\<[[VEC_TC:%.+]]\> = vector-trip-count\nLive-in ir\<%n\> = original trip-count\n"] +; CHECK-NEXT: graph [labelloc=t, fontsize=30; label="Vectorization Plan\nFinal VPlan for VF=\{4\},UF\>=1\nLive-in vp\<[[VFxUF:%.+]]\> = VF * UF\nLive-in vp\<[[VEC_TC:%.+]]\> = vector-trip-count\nLive-in ir\<%n\> = original trip-count\n"] ; CHECK-NEXT: node [shape=rect, fontname=Courier, fontsize=30] ; CHECK-NEXT: edge [fontname=Courier, fontsize=30] ; CHECK-NEXT: compound=true diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index be1eb78cab607..9f9db85be6bd2 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -3,7 +3,7 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-LABEL: LV: Checking a loop in 'iv_no_binary_op_in_descriptor' -; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' { +; CHECK: VPlan 'Final VPlan for VF={8},UF=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count @@ -23,7 +23,6 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%dst>, vp<[[STEPS:%.+]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%iv> -; CHECK-NEXT: EMIT vp<[[CAN_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 00d8de67a3b40..763a31cd923f1 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -9,7 +9,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable { ; CHECK-LABEL: Checking a loop in 'print_call_and_memory' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -21,7 +20,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> @@ -30,8 +29,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -77,7 +75,6 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-LABEL: Checking a loop in 'print_widen_gep_and_select' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -89,7 +86,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr inbounds ir<%y>, ir<%iv> @@ -101,8 +98,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -149,7 +145,6 @@ for.end: ; preds = %for.body, %entry define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-LABEL: Checking a loop in 'print_reduction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -161,15 +156,14 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -216,7 +210,6 @@ for.end: ; preds = %for.body, %entry define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr noalias %dst) { ; CHECK-LABEL: Checking a loop in 'print_reduction_with_invariant_store' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -228,15 +221,14 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -285,7 +277,6 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-LABEL: Checking a loop in 'print_replicate_predicated_phi' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: @@ -298,7 +289,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: ir<%i> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%i>, ir<5> @@ -324,8 +315,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%d> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -378,7 +368,6 @@ for.end: ; preds = %for.inc define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-LABEL: Checking a loop in 'print_interleave_groups' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<256> = original trip-count ; CHECK-EMPTY: @@ -391,7 +380,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<4> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<4> ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr inbounds ir<@AB>, ir<0>, vp<[[STEPS]]> @@ -406,8 +395,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: store ir<1> to index 1 ; CHECK-NEXT: store ir<2> to index 2 ; CHECK-NEXT: store ir<%AB.3> to index 3 -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -464,7 +452,6 @@ for.end: define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-LABEL: Checking a loop in 'print_fmuladd_strict' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -476,7 +463,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> @@ -487,8 +474,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: WIDEN ir<%l.b> = load vp<[[VEC_PTR2]]> ; CHECK-NEXT: EMIT vp<[[FMUL:%.+]]> = fmul nnan ninf nsz ir<%l.a>, ir<%l.b> ; CHECK-NEXT: REDUCE ir<[[MULADD:%.+]]> = ir<%sum.07> + nnan ninf nsz reduce.fadd (vp<[[FMUL]]>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -538,7 +524,6 @@ for.end: define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 { ; CHECK-LABEL: Checking a loop in 'debug_loc_vpinstruction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<128> = original trip-count ; CHECK-EMPTY: @@ -550,7 +535,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%isd> = getelementptr inbounds ir<%asd>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%isd> @@ -584,8 +569,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]> ir<%psd>/vp<[[SEL2]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%isd> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT:} ; CHECK-NEXT: Successor(s): middle.block @@ -645,7 +629,6 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-LABEL: Checking a loop in 'print_expand_scev' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.*]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: @@ -662,15 +645,14 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]> (truncated to i8) ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * vp<[[EXP_SCEV]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[EXP_SCEV]]> ; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%iv>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%v3>, ir<%gep> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -717,7 +699,6 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-LABEL: Checking a loop in 'print_exit_value' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -729,15 +710,14 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%iv>, ir<%off> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -782,7 +762,6 @@ exit: define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr %z) { ; CHECK-LABEL: Checking a loop in 'print_fast_math_flags' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -794,7 +773,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.y> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.y> @@ -805,8 +784,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%div> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -852,7 +830,6 @@ exit: define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-LABEL: Checking a loop in 'print_exact_flags' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -864,7 +841,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> @@ -874,8 +851,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%div.1>, ir<%div.2> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -920,7 +896,6 @@ exit: define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-LABEL: Checking a loop in 'print_call_flags' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -932,7 +907,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%ld.addr> @@ -963,8 +938,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%st.addr> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%st.value> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -1019,7 +993,6 @@ end: define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-LABEL: Checking a loop in 'print_disjoint_flags' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -1031,7 +1004,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> @@ -1041,8 +1014,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%or.1>, ir<%or.2> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -1087,7 +1059,6 @@ exit: define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-LABEL: LV: Checking a loop in 'zext_nneg' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -1099,15 +1070,14 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%p>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-CAST ir<%zext> = zext nneg ir<%l> ; CHECK-NEXT: REPLICATE store ir<%zext>, ir<%p1> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -1131,7 +1101,6 @@ exit: define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-LABEL: 'print_first_order_recurrence_and_result' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -1153,8 +1122,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: WIDEN ir<%add> = add vp<[[FOR1_SPLICE]]>, ir<1> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.ptr> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll index b58a14952f7a8..53df1277c71d7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -8,7 +8,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-LABEL: sink_with_sideeffects ; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: ir<1024> = original trip-count ; CHECK-EMPTY: @@ -42,8 +41,7 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: } ; CHECK: if.then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index aa05bb153966e..eb821dc713ab3 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -12,7 +12,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-LABEL: LV: Checking a loop in 'sink1' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -51,8 +50,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -81,7 +79,6 @@ exit: ; CHECK-LABEL: LV: Checking a loop in 'sink2' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -136,8 +133,7 @@ exit: ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -166,7 +162,6 @@ exit: ; CHECK-LABEL: LV: Checking a loop in 'sink3' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -221,8 +216,7 @@ exit: ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -253,7 +247,6 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: LV: Checking a loop in 'uniform_gep' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<11> = original trip-count @@ -296,8 +289,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Successor(s): loop.then.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -329,7 +321,6 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-LABEL: LV: Checking a loop in 'pred_cfg1' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -388,8 +379,7 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): next.0.1 ; CHECK-EMPTY: ; CHECK-NEXT: next.0.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -427,7 +417,6 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-LABEL: LV: Checking a loop in 'pred_cfg2' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -488,8 +477,7 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.1.1 ; CHECK-EMPTY: ; CHECK-NEXT: then.1.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -534,7 +522,6 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-LABEL: LV: Checking a loop in 'pred_cfg3' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -596,8 +583,7 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.1.2 ; CHECK-EMPTY: ; CHECK-NEXT: then.1.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -641,7 +627,6 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-LABEL: LV: Checking a loop in 'merge_3_replicate_region' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -705,8 +690,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.0.4 ; CHECK-EMPTY: ; CHECK-NEXT: then.0.4: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -747,7 +731,6 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-LABEL: LV: Checking a loop in 'update_2_uses_in_same_recipe_in_merged_block' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -785,8 +768,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -813,7 +795,6 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-LABEL: LV: Checking a loop in 'recipe_in_merge_candidate_used_by_first_order_recurrence' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -870,8 +851,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -898,7 +878,6 @@ exit: define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) { ; CHECK-LABEL: LV: Checking a loop in 'update_multiple_users' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<999> = original trip-count ; CHECK-EMPTY: @@ -932,8 +911,7 @@ define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) { ; CHECK-NEXT: Successor(s): loop.then.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.then.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -965,7 +943,6 @@ exit: define void @sinking_requires_duplication(ptr %addr) { ; CHECK-LABEL: LV: Checking a loop in 'sinking_requires_duplication' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<201> = original trip-count ; CHECK-EMPTY: @@ -1002,8 +979,7 @@ define void @sinking_requires_duplication(ptr %addr) { ; CHECK-NEXT: Successor(s): then.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -1038,7 +1014,6 @@ exit: define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr noalias %dst) optsize { ; CHECK-LABEL: LV: Checking a loop in 'merge_with_dead_gep_between_regions' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<%n> = original trip-count @@ -1077,8 +1052,7 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -1128,7 +1102,6 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-LABEL: LV: Checking a loop in 'ptr_induction_remove_dead_recipe' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: @@ -1169,8 +1142,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): if.then.0 ; CHECK-EMPTY: ; CHECK-NEXT: if.then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll index 9778ef6853a70..07753736ef475 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll @@ -8,7 +8,7 @@ define void @test_unused_interleave(ptr %src, i32 %length) { ; CHECK-LABEL: Checking a loop in 'test_unused_interleave' -; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK: VPlan 'Final VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%0> = VF * UF ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count ; CHECK-NEXT: Live-in ir<%length> = original trip-count