@@ -226,8 +226,7 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
226
226
InnerLoopVectorizer *ILV, VPlan *Plan,
227
227
LLVMContext &Ctx)
228
228
: VF(VF), UF(UF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229
- LVer(nullptr ),
230
- TypeAnalysis(Plan->getCanonicalIV ()->getScalarType(), Ctx) {}
229
+ LVer(nullptr ), TypeAnalysis(IntegerType::get(Ctx, 64 ), Ctx) {}
231
230
232
231
Value *VPTransformState::get (VPValue *Def, const VPIteration &Instance) {
233
232
if (Def->isLiveIn ())
@@ -278,8 +277,8 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
278
277
// Place the code for broadcasting invariant variables in the new preheader.
279
278
IRBuilder<>::InsertPointGuard Guard (Builder);
280
279
if (SafeToHoist) {
281
- BasicBlock *LoopVectorPreHeader = CFG. VPBB2IRBB [cast<VPBasicBlock>(
282
- Plan-> getVectorLoopRegion ()-> getSinglePredecessor ())];
280
+ BasicBlock *LoopVectorPreHeader =
281
+ CFG. VPBB2IRBB [cast<VPBasicBlock>(Plan-> getEntry ())];
283
282
if (LoopVectorPreHeader)
284
283
Builder.SetInsertPoint (LoopVectorPreHeader->getTerminator ());
285
284
}
@@ -934,7 +933,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
934
933
935
934
IRBuilder<> Builder (State.CFG .PrevBB ->getTerminator ());
936
935
// FIXME: Model VF * UF computation completely in VPlan.
937
- assert (VFxUF.getNumUsers () && " VFxUF expected to always have users" );
936
+ // assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
938
937
if (VF.getNumUsers ()) {
939
938
Value *RuntimeVF = getRuntimeVF (Builder, TCTy, State.VF );
940
939
VF.setUnderlyingValue (RuntimeVF);
@@ -1005,8 +1004,13 @@ void VPlan::execute(VPTransformState *State) {
1005
1004
// skeleton creation, so we can only create the VPIRBasicBlocks now during
1006
1005
// VPlan execution rather than earlier during VPlan construction.
1007
1006
BasicBlock *MiddleBB = State->CFG .ExitBB ;
1008
- VPBasicBlock *MiddleVPBB =
1009
- cast<VPBasicBlock>(getVectorLoopRegion ()->getSingleSuccessor ());
1007
+ VPBlockBase *Leaf = nullptr ;
1008
+ for (VPBlockBase *VPB : vp_depth_first_shallow (getEntry ()))
1009
+ if (VPB->getNumSuccessors () == 0 ) {
1010
+ Leaf = VPB;
1011
+ break ;
1012
+ }
1013
+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(Leaf->getSinglePredecessor ());
1010
1014
// Find the VPBB for the scalar preheader, relying on the current structure
1011
1015
// when creating the middle block and its successrs: if there's a single
1012
1016
// predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1034,64 +1038,66 @@ void VPlan::execute(VPTransformState *State) {
1034
1038
for (VPBlockBase *Block : vp_depth_first_shallow (Entry))
1035
1039
Block->execute (State);
1036
1040
1037
- VPBasicBlock *LatchVPBB = getVectorLoopRegion ()->getExitingBasicBlock ();
1038
- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1039
-
1040
- // Fix the latch value of canonical, reduction and first-order recurrences
1041
- // phis in the vector loop.
1042
- VPBasicBlock *Header = getVectorLoopRegion ()->getEntryBasicBlock ();
1043
- for (VPRecipeBase &R : Header->phis ()) {
1044
- // Skip phi-like recipes that generate their backedege values themselves.
1045
- if (isa<VPWidenPHIRecipe>(&R))
1046
- continue ;
1047
-
1048
- if (isa<VPWidenPointerInductionRecipe>(&R) ||
1049
- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1050
- PHINode *Phi = nullptr ;
1051
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1052
- Phi = cast<PHINode>(State->get (R.getVPSingleValue (), 0 ));
1053
- } else {
1054
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1055
- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1056
- " recipe generating only scalars should have been replaced" );
1057
- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi, 0 ));
1058
- Phi = cast<PHINode>(GEP->getPointerOperand ());
1059
- }
1060
-
1061
- Phi->setIncomingBlock (1 , VectorLatchBB);
1041
+ if (auto *LoopRegion =
1042
+ dyn_cast<VPRegionBlock>(getEntry ()->getSingleSuccessor ())) {
1043
+ VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
1044
+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1045
+
1046
+ // Fix the latch value of canonical, reduction and first-order recurrences
1047
+ // phis in the vector loop.
1048
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
1049
+ for (VPRecipeBase &R : Header->phis ()) {
1050
+ // Skip phi-like recipes that generate their backedege values themselves.
1051
+ if (isa<VPWidenPHIRecipe>(&R))
1052
+ continue ;
1062
1053
1063
- // Move the last step to the end of the latch block. This ensures
1064
- // consistent placement of all induction updates.
1065
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1066
- Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1067
- continue ;
1068
- }
1054
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
1055
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1056
+ PHINode *Phi = nullptr ;
1057
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1058
+ Phi = cast<PHINode>(State->get (R.getVPSingleValue (), 0 ));
1059
+ } else {
1060
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1061
+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1062
+ " recipe generating only scalars should have been replaced" );
1063
+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi, 0 ));
1064
+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1065
+ }
1066
+
1067
+ Phi->setIncomingBlock (1 , VectorLatchBB);
1068
+
1069
+ // Move the last step to the end of the latch block. This ensures
1070
+ // consistent placement of all induction updates.
1071
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1072
+ Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
1073
+ continue ;
1074
+ }
1069
1075
1070
- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1071
- // For canonical IV, first-order recurrences and in-order reduction phis,
1072
- // only a single part is generated, which provides the last part from the
1073
- // previous iteration. For non-ordered reductions all UF parts are
1074
- // generated.
1075
- bool SinglePartNeeded =
1076
- isa<VPCanonicalIVPHIRecipe>(PhiR) ||
1077
- isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1078
- (isa<VPReductionPHIRecipe>(PhiR) &&
1079
- cast<VPReductionPHIRecipe>(PhiR)->isOrdered ());
1080
- bool NeedsScalar =
1081
- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1082
- (isa<VPReductionPHIRecipe>(PhiR) &&
1083
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1084
- unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF ;
1085
-
1086
- for (unsigned Part = 0 ; Part < LastPartForNewPhi; ++Part) {
1087
- Value *Phi = State->get (PhiR, Part, NeedsScalar);
1088
- Value *Val =
1089
- State->get (PhiR->getBackedgeValue (),
1090
- SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
1091
- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1076
+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1077
+ // For canonical IV, first-order recurrences and in-order reduction phis,
1078
+ // only a single part is generated, which provides the last part from the
1079
+ // previous iteration. For non-ordered reductions all UF parts are
1080
+ // generated.
1081
+ bool SinglePartNeeded =
1082
+ isa<VPCanonicalIVPHIRecipe>(PhiR) ||
1083
+ isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1084
+ (isa<VPReductionPHIRecipe>(PhiR) &&
1085
+ cast<VPReductionPHIRecipe>(PhiR)->isOrdered ());
1086
+ bool NeedsScalar =
1087
+ isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1088
+ (isa<VPReductionPHIRecipe>(PhiR) &&
1089
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1090
+ unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF ;
1091
+
1092
+ for (unsigned Part = 0 ; Part < LastPartForNewPhi; ++Part) {
1093
+ Value *Phi = State->get (PhiR, Part, NeedsScalar);
1094
+ Value *Val =
1095
+ State->get (PhiR->getBackedgeValue (),
1096
+ SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
1097
+ cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1098
+ }
1092
1099
}
1093
1100
}
1094
-
1095
1101
State->CFG .DTU .flush ();
1096
1102
assert (State->CFG .DTU .getDomTree ().verify (
1097
1103
DominatorTree::VerificationLevel::Fast) &&
0 commit comments