@@ -1235,16 +1235,10 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
12351235static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch (
12361236 VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
12371237 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion ();
1238- VPBasicBlock *EB = TopRegion->getExitingBasicBlock ();
12391238 auto *CanonicalIVPHI = Plan.getCanonicalIV ();
12401239 VPValue *StartV = CanonicalIVPHI->getStartValue ();
12411240
1242- auto *CanonicalIVIncrement =
1243- cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue ());
1244- // TODO: Check if dropping the flags is needed if
1245- // !DataAndControlFlowWithoutRuntimeCheck.
1246- CanonicalIVIncrement->dropPoisonGeneratingFlags ();
1247- DebugLoc DL = CanonicalIVIncrement->getDebugLoc ();
1241+ DebugLoc DL = CanonicalIVPHI->getDebugLoc ();
12481242 // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
12491243 // we have to take unrolling into account. Each part needs to start at
12501244 // Part * VF
@@ -1254,21 +1248,6 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
12541248 // Create the ActiveLaneMask instruction using the correct start values.
12551249 VPValue *TC = Plan.getTripCount ();
12561250
1257- VPValue *TripCount, *IncrementValue;
1258- if (!DataAndControlFlowWithoutRuntimeCheck) {
1259- // When the loop is guarded by a runtime overflow check for the loop
1260- // induction variable increment by VF, we can increment the value before
1261- // the get.active.lane mask and use the unmodified tripcount.
1262- IncrementValue = CanonicalIVIncrement;
1263- TripCount = TC;
1264- } else {
1265- // When avoiding a runtime check, the active.lane.mask inside the loop
1266- // uses a modified trip count and the induction variable increment is
1267- // done after the active.lane.mask intrinsic is called.
1268- IncrementValue = CanonicalIVPHI;
1269- TripCount = Builder.createNaryOp (VPInstruction::CalculateTripCountMinusVF,
1270- {TC}, DL);
1271- }
12721251 auto *EntryIncrement = Builder.createOverflowingOp (
12731252 VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false , false }, DL,
12741253 " index.part.next" );
@@ -1282,24 +1261,6 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
12821261 // preheader ActiveLaneMask instruction.
12831262 auto LaneMaskPhi = new VPActiveLaneMaskPHIRecipe (EntryALM, DebugLoc ());
12841263 LaneMaskPhi->insertAfter (CanonicalIVPHI);
1285-
1286- // Create the active lane mask for the next iteration of the loop before the
1287- // original terminator.
1288- VPRecipeBase *OriginalTerminator = EB->getTerminator ();
1289- Builder.setInsertPoint (OriginalTerminator);
1290- auto *InLoopIncrement =
1291- Builder.createOverflowingOp (VPInstruction::CanonicalIVIncrementForPart,
1292- {IncrementValue}, {false , false }, DL);
1293- auto *ALM = Builder.createNaryOp (VPInstruction::ActiveLaneMask,
1294- {InLoopIncrement, TripCount}, DL,
1295- " active.lane.mask.next" );
1296- LaneMaskPhi->addOperand (ALM);
1297-
1298- // Replace the original terminator with BranchOnCond. We have to invert the
1299- // mask here because a true condition means jumping to the exit block.
1300- auto *NotMask = Builder.createNot (ALM, DL);
1301- Builder.createNaryOp (VPInstruction::BranchOnCond, {NotMask}, DL);
1302- OriginalTerminator->eraseFromParent ();
13031264 return LaneMaskPhi;
13041265}
13051266
@@ -1418,6 +1379,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
14181379 return false ;
14191380 auto *CanonicalIVPHI = Plan.getCanonicalIV ();
14201381 VPValue *StartV = CanonicalIVPHI->getStartValue ();
1382+ VPBasicBlock *Latch = Plan.getVectorLoopRegion ()->getExitingBasicBlock ();
14211383
14221384 // Create the ExplicitVectorLengthPhi recipe in the main loop.
14231385 auto *EVLPhi = new VPEVLBasedIVPHIRecipe (StartV, DebugLoc ());
@@ -1426,22 +1388,18 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
14261388 {EVLPhi, Plan.getTripCount ()});
14271389 VPEVL->insertBefore (*Header, Header->getFirstNonPhi ());
14281390
1429- auto *CanonicalIVIncrement =
1430- cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue ());
14311391 VPSingleDefRecipe *OpVPEVL = VPEVL;
14321392 if (unsigned IVSize = CanonicalIVPHI->getScalarType ()->getScalarSizeInBits ();
14331393 IVSize != 32 ) {
14341394 OpVPEVL = new VPScalarCastRecipe (IVSize < 32 ? Instruction::Trunc
14351395 : Instruction::ZExt,
14361396 OpVPEVL, CanonicalIVPHI->getScalarType ());
1437- OpVPEVL-> insertBefore (CanonicalIVIncrement );
1397+ Latch-> appendRecipe (OpVPEVL );
14381398 }
14391399 auto *NextEVLIV =
1440- new VPInstruction (Instruction::Add, {OpVPEVL, EVLPhi},
1441- {CanonicalIVIncrement->hasNoUnsignedWrap (),
1442- CanonicalIVIncrement->hasNoSignedWrap ()},
1443- CanonicalIVIncrement->getDebugLoc (), " index.evl.next" );
1444- NextEVLIV->insertBefore (CanonicalIVIncrement);
1400+ new VPInstruction (Instruction::Add, {OpVPEVL, EVLPhi}, {false , false },
1401+ CanonicalIVPHI->getDebugLoc (), " index.evl.next" );
1402+ Latch->appendRecipe (NextEVLIV);
14451403 EVLPhi->addOperand (NextEVLIV);
14461404
14471405 for (VPValue *HeaderMask : collectAllHeaderMasks (Plan)) {
@@ -1468,9 +1426,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
14681426 recursivelyDeleteDeadRecipes (HeaderMask);
14691427 }
14701428 // Replace all uses of VPCanonicalIVPHIRecipe by
1471- // VPEVLBasedIVPHIRecipe except for the canonical IV increment .
1429+ // VPEVLBasedIVPHIRecipe.
14721430 CanonicalIVPHI->replaceAllUsesWith (EVLPhi);
1473- CanonicalIVIncrement->setOperand (0 , CanonicalIVPHI);
14741431 // TODO: support unroll factor > 1.
14751432 Plan.setUF (1 );
14761433 return true ;
@@ -1572,3 +1529,71 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
15721529 }
15731530 }
15741531}
1532+
1533+ void VPlanTransforms::finalizePlan (VPlan &Plan, bool HasNUW,
1534+ bool DataAndControlFlowWithoutRuntimeCheck) {
1535+ auto *CanIV = Plan.getCanonicalIV ();
1536+
1537+ VPBasicBlock *EB = Plan.getVectorLoopRegion ()->getExitingBasicBlock ();
1538+ VPBuilder Builder (EB);
1539+ DebugLoc DL = CanIV->getDebugLoc ();
1540+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
1541+ auto *CanonicalIVIncrement =
1542+ Builder.createOverflowingOp (Instruction::Add, {CanIV, &Plan.getVFxUF ()},
1543+ {HasNUW, false }, DL, " index.next" );
1544+
1545+ CanIV->addOperand (CanonicalIVIncrement);
1546+
1547+ auto FoundLaneMaskPhi = find_if (
1548+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis (),
1549+ [](VPRecipeBase &P) { return isa<VPActiveLaneMaskPHIRecipe>(P); });
1550+
1551+ if (FoundLaneMaskPhi ==
1552+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ().end ()) {
1553+ // Add the BranchOnCount VPInstruction to the latch.
1554+ Builder.createNaryOp (VPInstruction::BranchOnCount,
1555+ {CanonicalIVIncrement, &Plan.getVectorTripCount ()},
1556+ DL);
1557+ return ;
1558+ }
1559+ auto *LaneMaskPhi = cast<VPActiveLaneMaskPHIRecipe>(&*FoundLaneMaskPhi);
1560+ auto *VecPreheader =
1561+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSinglePredecessor ());
1562+ Builder.setInsertPoint (VecPreheader);
1563+
1564+ VPValue *TC = Plan.getTripCount ();
1565+
1566+ // TODO: Check if dropping the flags is needed if
1567+ // !DataAndControlFlowWithoutRuntimeCheck.
1568+ CanonicalIVIncrement->dropPoisonGeneratingFlags ();
1569+ VPValue *TripCount, *IncrementValue;
1570+ if (!DataAndControlFlowWithoutRuntimeCheck) {
1571+ // When the loop is guarded by a runtime overflow check for the loop
1572+ // induction variable increment by VF, we can increment the value before
1573+ // the get.active.lane mask and use the unmodified tripcount.
1574+ IncrementValue = CanonicalIVIncrement;
1575+ TripCount = TC;
1576+ } else {
1577+ // When avoiding a runtime check, the active.lane.mask inside the loop
1578+ // uses a modified trip count and the induction variable increment is
1579+ // done after the active.lane.mask intrinsic is called.
1580+ IncrementValue = CanIV;
1581+ TripCount = Builder.createNaryOp (VPInstruction::CalculateTripCountMinusVF,
1582+ {TC}, DL);
1583+ }
1584+ // Create the active lane mask for the next iteration of the loop before the
1585+ // original terminator.
1586+ Builder.setInsertPoint (EB);
1587+ auto *InLoopIncrement =
1588+ Builder.createOverflowingOp (VPInstruction::CanonicalIVIncrementForPart,
1589+ {IncrementValue}, {false , false }, DL);
1590+ auto *ALM = Builder.createNaryOp (VPInstruction::ActiveLaneMask,
1591+ {InLoopIncrement, TripCount}, DL,
1592+ " active.lane.mask.next" );
1593+ LaneMaskPhi->addOperand (ALM);
1594+
1595+ // Replace the original terminator with BranchOnCond. We have to invert the
1596+ // mask here because a true condition means jumping to the exit block.
1597+ auto *NotMask = Builder.createNot (ALM, DL);
1598+ Builder.createNaryOp (VPInstruction::BranchOnCond, {NotMask}, DL);
1599+ }
0 commit comments