@@ -1235,16 +1235,10 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
1235
1235
static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch (
1236
1236
VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
1237
1237
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion ();
1238
- VPBasicBlock *EB = TopRegion->getExitingBasicBlock ();
1239
1238
auto *CanonicalIVPHI = Plan.getCanonicalIV ();
1240
1239
VPValue *StartV = CanonicalIVPHI->getStartValue ();
1241
1240
1242
- auto *CanonicalIVIncrement =
1243
- cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue ());
1244
- // TODO: Check if dropping the flags is needed if
1245
- // !DataAndControlFlowWithoutRuntimeCheck.
1246
- CanonicalIVIncrement->dropPoisonGeneratingFlags ();
1247
- DebugLoc DL = CanonicalIVIncrement->getDebugLoc ();
1241
+ DebugLoc DL = CanonicalIVPHI->getDebugLoc ();
1248
1242
// We can't use StartV directly in the ActiveLaneMask VPInstruction, since
1249
1243
// we have to take unrolling into account. Each part needs to start at
1250
1244
// Part * VF
@@ -1254,21 +1248,6 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
1254
1248
// Create the ActiveLaneMask instruction using the correct start values.
1255
1249
VPValue *TC = Plan.getTripCount ();
1256
1250
1257
- VPValue *TripCount, *IncrementValue;
1258
- if (!DataAndControlFlowWithoutRuntimeCheck) {
1259
- // When the loop is guarded by a runtime overflow check for the loop
1260
- // induction variable increment by VF, we can increment the value before
1261
- // the get.active.lane mask and use the unmodified tripcount.
1262
- IncrementValue = CanonicalIVIncrement;
1263
- TripCount = TC;
1264
- } else {
1265
- // When avoiding a runtime check, the active.lane.mask inside the loop
1266
- // uses a modified trip count and the induction variable increment is
1267
- // done after the active.lane.mask intrinsic is called.
1268
- IncrementValue = CanonicalIVPHI;
1269
- TripCount = Builder.createNaryOp (VPInstruction::CalculateTripCountMinusVF,
1270
- {TC}, DL);
1271
- }
1272
1251
auto *EntryIncrement = Builder.createOverflowingOp (
1273
1252
VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false , false }, DL,
1274
1253
" index.part.next" );
@@ -1282,24 +1261,6 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
1282
1261
// preheader ActiveLaneMask instruction.
1283
1262
auto LaneMaskPhi = new VPActiveLaneMaskPHIRecipe (EntryALM, DebugLoc ());
1284
1263
LaneMaskPhi->insertAfter (CanonicalIVPHI);
1285
-
1286
- // Create the active lane mask for the next iteration of the loop before the
1287
- // original terminator.
1288
- VPRecipeBase *OriginalTerminator = EB->getTerminator ();
1289
- Builder.setInsertPoint (OriginalTerminator);
1290
- auto *InLoopIncrement =
1291
- Builder.createOverflowingOp (VPInstruction::CanonicalIVIncrementForPart,
1292
- {IncrementValue}, {false , false }, DL);
1293
- auto *ALM = Builder.createNaryOp (VPInstruction::ActiveLaneMask,
1294
- {InLoopIncrement, TripCount}, DL,
1295
- " active.lane.mask.next" );
1296
- LaneMaskPhi->addOperand (ALM);
1297
-
1298
- // Replace the original terminator with BranchOnCond. We have to invert the
1299
- // mask here because a true condition means jumping to the exit block.
1300
- auto *NotMask = Builder.createNot (ALM, DL);
1301
- Builder.createNaryOp (VPInstruction::BranchOnCond, {NotMask}, DL);
1302
- OriginalTerminator->eraseFromParent ();
1303
1264
return LaneMaskPhi;
1304
1265
}
1305
1266
@@ -1418,6 +1379,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
1418
1379
return false ;
1419
1380
auto *CanonicalIVPHI = Plan.getCanonicalIV ();
1420
1381
VPValue *StartV = CanonicalIVPHI->getStartValue ();
1382
+ VPBasicBlock *Latch = Plan.getVectorLoopRegion ()->getExitingBasicBlock ();
1421
1383
1422
1384
// Create the ExplicitVectorLengthPhi recipe in the main loop.
1423
1385
auto *EVLPhi = new VPEVLBasedIVPHIRecipe (StartV, DebugLoc ());
@@ -1426,22 +1388,18 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
1426
1388
{EVLPhi, Plan.getTripCount ()});
1427
1389
VPEVL->insertBefore (*Header, Header->getFirstNonPhi ());
1428
1390
1429
- auto *CanonicalIVIncrement =
1430
- cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue ());
1431
1391
VPSingleDefRecipe *OpVPEVL = VPEVL;
1432
1392
if (unsigned IVSize = CanonicalIVPHI->getScalarType ()->getScalarSizeInBits ();
1433
1393
IVSize != 32 ) {
1434
1394
OpVPEVL = new VPScalarCastRecipe (IVSize < 32 ? Instruction::Trunc
1435
1395
: Instruction::ZExt,
1436
1396
OpVPEVL, CanonicalIVPHI->getScalarType ());
1437
- OpVPEVL-> insertBefore (CanonicalIVIncrement );
1397
+ Latch-> appendRecipe (OpVPEVL );
1438
1398
}
1439
1399
auto *NextEVLIV =
1440
- new VPInstruction (Instruction::Add, {OpVPEVL, EVLPhi},
1441
- {CanonicalIVIncrement->hasNoUnsignedWrap (),
1442
- CanonicalIVIncrement->hasNoSignedWrap ()},
1443
- CanonicalIVIncrement->getDebugLoc (), " index.evl.next" );
1444
- NextEVLIV->insertBefore (CanonicalIVIncrement);
1400
+ new VPInstruction (Instruction::Add, {OpVPEVL, EVLPhi}, {false , false },
1401
+ CanonicalIVPHI->getDebugLoc (), " index.evl.next" );
1402
+ Latch->appendRecipe (NextEVLIV);
1445
1403
EVLPhi->addOperand (NextEVLIV);
1446
1404
1447
1405
for (VPValue *HeaderMask : collectAllHeaderMasks (Plan)) {
@@ -1468,9 +1426,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
1468
1426
recursivelyDeleteDeadRecipes (HeaderMask);
1469
1427
}
1470
1428
// Replace all uses of VPCanonicalIVPHIRecipe by
1471
- // VPEVLBasedIVPHIRecipe except for the canonical IV increment .
1429
+ // VPEVLBasedIVPHIRecipe.
1472
1430
CanonicalIVPHI->replaceAllUsesWith (EVLPhi);
1473
- CanonicalIVIncrement->setOperand (0 , CanonicalIVPHI);
1474
1431
// TODO: support unroll factor > 1.
1475
1432
Plan.setUF (1 );
1476
1433
return true ;
@@ -1572,3 +1529,71 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
1572
1529
}
1573
1530
}
1574
1531
}
1532
+
1533
+ void VPlanTransforms::finalizePlan (VPlan &Plan, bool HasNUW,
1534
+ bool DataAndControlFlowWithoutRuntimeCheck) {
1535
+ auto *CanIV = Plan.getCanonicalIV ();
1536
+
1537
+ VPBasicBlock *EB = Plan.getVectorLoopRegion ()->getExitingBasicBlock ();
1538
+ VPBuilder Builder (EB);
1539
+ DebugLoc DL = CanIV->getDebugLoc ();
1540
+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
1541
+ auto *CanonicalIVIncrement =
1542
+ Builder.createOverflowingOp (Instruction::Add, {CanIV, &Plan.getVFxUF ()},
1543
+ {HasNUW, false }, DL, " index.next" );
1544
+
1545
+ CanIV->addOperand (CanonicalIVIncrement);
1546
+
1547
+ auto FoundLaneMaskPhi = find_if (
1548
+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis (),
1549
+ [](VPRecipeBase &P) { return isa<VPActiveLaneMaskPHIRecipe>(P); });
1550
+
1551
+ if (FoundLaneMaskPhi ==
1552
+ Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ().end ()) {
1553
+ // Add the BranchOnCount VPInstruction to the latch.
1554
+ Builder.createNaryOp (VPInstruction::BranchOnCount,
1555
+ {CanonicalIVIncrement, &Plan.getVectorTripCount ()},
1556
+ DL);
1557
+ return ;
1558
+ }
1559
+ auto *LaneMaskPhi = cast<VPActiveLaneMaskPHIRecipe>(&*FoundLaneMaskPhi);
1560
+ auto *VecPreheader =
1561
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSinglePredecessor ());
1562
+ Builder.setInsertPoint (VecPreheader);
1563
+
1564
+ VPValue *TC = Plan.getTripCount ();
1565
+
1566
+ // TODO: Check if dropping the flags is needed if
1567
+ // !DataAndControlFlowWithoutRuntimeCheck.
1568
+ CanonicalIVIncrement->dropPoisonGeneratingFlags ();
1569
+ VPValue *TripCount, *IncrementValue;
1570
+ if (!DataAndControlFlowWithoutRuntimeCheck) {
1571
+ // When the loop is guarded by a runtime overflow check for the loop
1572
+ // induction variable increment by VF, we can increment the value before
1573
+ // the get.active.lane mask and use the unmodified tripcount.
1574
+ IncrementValue = CanonicalIVIncrement;
1575
+ TripCount = TC;
1576
+ } else {
1577
+ // When avoiding a runtime check, the active.lane.mask inside the loop
1578
+ // uses a modified trip count and the induction variable increment is
1579
+ // done after the active.lane.mask intrinsic is called.
1580
+ IncrementValue = CanIV;
1581
+ TripCount = Builder.createNaryOp (VPInstruction::CalculateTripCountMinusVF,
1582
+ {TC}, DL);
1583
+ }
1584
+ // Create the active lane mask for the next iteration of the loop before the
1585
+ // original terminator.
1586
+ Builder.setInsertPoint (EB);
1587
+ auto *InLoopIncrement =
1588
+ Builder.createOverflowingOp (VPInstruction::CanonicalIVIncrementForPart,
1589
+ {IncrementValue}, {false , false }, DL);
1590
+ auto *ALM = Builder.createNaryOp (VPInstruction::ActiveLaneMask,
1591
+ {InLoopIncrement, TripCount}, DL,
1592
+ " active.lane.mask.next" );
1593
+ LaneMaskPhi->addOperand (ALM);
1594
+
1595
+ // Replace the original terminator with BranchOnCond. We have to invert the
1596
+ // mask here because a true condition means jumping to the exit block.
1597
+ auto *NotMask = Builder.createNot (ALM, DL);
1598
+ Builder.createNaryOp (VPInstruction::BranchOnCond, {NotMask}, DL);
1599
+ }
0 commit comments