@@ -170,9 +170,7 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
170
170
}
171
171
172
172
void VPBlockBase::setPlan (VPlan *ParentPlan) {
173
- assert (
174
- (ParentPlan->getEntry () == this || ParentPlan->getPreheader () == this ) &&
175
- " Can only set plan on its entry or preheader block." );
173
+ assert (ParentPlan->getEntry () == this && " Can only set plan on its entry." );
176
174
Plan = ParentPlan;
177
175
}
178
176
@@ -823,16 +821,25 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
823
821
}
824
822
#endif
825
823
824
+ VPlan::VPlan (VPBasicBlock *OriginalPreheader, VPValue *TC,
825
+ VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader)
826
+ : VPlan(OriginalPreheader, TC, ScalarHeader) {
827
+ VPBlockUtils::connectBlocks (OriginalPreheader, EntryVectorPreHeader);
828
+ }
829
+
830
+ VPlan::VPlan (VPBasicBlock *OriginalPreheader,
831
+ VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader)
832
+ : VPlan(OriginalPreheader, ScalarHeader) {
833
+ VPBlockUtils::connectBlocks (OriginalPreheader, EntryVectorPreHeader);
834
+ }
835
+
826
836
VPlan::~VPlan () {
827
837
if (Entry) {
828
838
VPValue DummyValue;
829
839
for (VPBlockBase *Block : vp_depth_first_shallow (Entry))
830
840
Block->dropAllReferences (&DummyValue);
831
841
832
842
VPBlockBase::deleteCFG (Entry);
833
-
834
- Preheader->dropAllReferences (&DummyValue);
835
- delete Preheader;
836
843
}
837
844
for (VPValue *VPV : VPLiveInsToFree)
838
845
delete VPV;
@@ -855,9 +862,16 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
855
862
VPIRBasicBlock *Entry =
856
863
VPIRBasicBlock::fromBasicBlock (TheLoop->getLoopPreheader ());
857
864
VPBasicBlock *VecPreheader = new VPBasicBlock (" vector.ph" );
865
+ // Connect entry only to vector preheader initially. Entry will also be
866
+ // connected to the scalar preheader later, during skeleton creation when
867
+ // runtime guards are added as needed. Note that when executing the VPlan for
868
+ // an epilogue vector loop, the original entry block here will be replaced by
869
+ // a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after
870
+ // generating code for the main vector loop.
871
+ VPBlockUtils::connectBlocks (Entry, VecPreheader);
858
872
VPIRBasicBlock *ScalarHeader =
859
873
VPIRBasicBlock::fromBasicBlock (TheLoop->getHeader ());
860
- auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
874
+ auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader);
861
875
862
876
// Create SCEV and VPValue for the trip count.
863
877
// We use the symbolic max backedge-taken-count, which works also when
@@ -981,15 +995,21 @@ void VPlan::execute(VPTransformState *State) {
981
995
State->CFG .DTU .applyUpdates (
982
996
{{DominatorTree::Delete, VectorPreHeader, State->CFG .ExitBB }});
983
997
984
- // Replace regular VPBB's for the middle and scalar preheader blocks with
985
- // VPIRBasicBlocks wrapping their IR blocks. The IR blocks are created during
986
- // skeleton creation, so we can only create the VPIRBasicBlocks now during
987
- // VPlan execution rather than earlier during VPlan construction.
998
+ // Replace regular VPBB's for the vector preheader, middle and scalar
999
+ // preheader blocks with VPIRBasicBlocks wrapping their IR blocks. The IR
1000
+ // blocks are created during skeleton creation, so we can only create the
1001
+ // VPIRBasicBlocks now during VPlan execution rather than earlier during VPlan
1002
+ // construction.
988
1003
BasicBlock *MiddleBB = State->CFG .ExitBB ;
989
- VPBasicBlock *MiddleVPBB = getMiddleBlock ();
990
1004
BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor ();
1005
+ replaceVPBBWithIRVPBB (getVectorPreheader (), VectorPreHeader);
1006
+ replaceVPBBWithIRVPBB (getMiddleBlock (), MiddleBB);
991
1007
replaceVPBBWithIRVPBB (getScalarPreheader (), ScalarPh);
992
- replaceVPBBWithIRVPBB (MiddleVPBB, MiddleBB);
1008
+
1009
+ LLVM_DEBUG (dbgs () << " Executing best plan with VF=" << State->VF
1010
+ << " , UF=" << getUF () << ' \n ' );
1011
+ setName (" Final VPlan" );
1012
+ LLVM_DEBUG (dump ());
993
1013
994
1014
LLVM_DEBUG (dbgs () << " Executing best plan with VF=" << State->VF
995
1015
<< " , UF=" << getUF () << ' \n ' );
@@ -1062,9 +1082,6 @@ void VPlan::execute(VPTransformState *State) {
1062
1082
}
1063
1083
1064
1084
State->CFG .DTU .flush ();
1065
- assert (State->CFG .DTU .getDomTree ().verify (
1066
- DominatorTree::VerificationLevel::Fast) &&
1067
- " DT not preserved correctly" );
1068
1085
}
1069
1086
1070
1087
InstructionCost VPlan::cost (ElementCount VF, VPCostContext &Ctx) {
@@ -1117,11 +1134,6 @@ void VPlan::print(raw_ostream &O) const {
1117
1134
1118
1135
printLiveIns (O);
1119
1136
1120
- if (!getPreheader ()->empty ()) {
1121
- O << " \n " ;
1122
- getPreheader ()->print (O, " " , SlotTracker);
1123
- }
1124
-
1125
1137
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
1126
1138
RPOT (getEntry ());
1127
1139
for (const VPBlockBase *Block : RPOT) {
@@ -1155,6 +1167,21 @@ std::string VPlan::getName() const {
1155
1167
return Out;
1156
1168
}
1157
1169
1170
+ VPRegionBlock *VPlan::getVectorLoopRegion () {
1171
+ // TODO: Cache if possible.
1172
+ for (VPBlockBase *B : vp_depth_first_shallow (getEntry ()))
1173
+ if (auto *R = dyn_cast<VPRegionBlock>(B))
1174
+ return R;
1175
+ return nullptr ;
1176
+ }
1177
+
1178
+ const VPRegionBlock *VPlan::getVectorLoopRegion () const {
1179
+ for (const VPBlockBase *B : vp_depth_first_shallow (getEntry ()))
1180
+ if (auto *R = dyn_cast<VPRegionBlock>(B))
1181
+ return R;
1182
+ return nullptr ;
1183
+ }
1184
+
1158
1185
LLVM_DUMP_METHOD
1159
1186
void VPlan::printDOT (raw_ostream &O) const {
1160
1187
VPlanPrinter Printer (O, *this );
@@ -1205,7 +1232,6 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1205
1232
1206
1233
VPlan *VPlan::duplicate () {
1207
1234
// Clone blocks.
1208
- VPBasicBlock *NewPreheader = Preheader->clone ();
1209
1235
const auto &[NewEntry, __] = cloneFrom (Entry);
1210
1236
1211
1237
BasicBlock *ScalarHeaderIRBB = getScalarHeader ()->getIRBasicBlock ();
@@ -1215,8 +1241,7 @@ VPlan *VPlan::duplicate() {
1215
1241
return VPIRBB && VPIRBB->getIRBasicBlock () == ScalarHeaderIRBB;
1216
1242
}));
1217
1243
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
1218
- auto *NewPlan =
1219
- new VPlan (NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1244
+ auto *NewPlan = new VPlan (cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1220
1245
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1221
1246
for (VPValue *OldLiveIn : VPLiveInsToFree) {
1222
1247
Old2NewVPValues[OldLiveIn] =
@@ -1236,7 +1261,6 @@ VPlan *VPlan::duplicate() {
1236
1261
// else NewTripCount will be created and inserted into Old2NewVPValues when
1237
1262
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
1238
1263
1239
- remapOperands (Preheader, NewPreheader, Old2NewVPValues);
1240
1264
remapOperands (Entry, NewEntry, Old2NewVPValues);
1241
1265
1242
1266
// Initialize remaining fields of cloned VPlan.
@@ -1288,8 +1312,6 @@ void VPlanPrinter::dump() {
1288
1312
OS << " edge [fontname=Courier, fontsize=30]\n " ;
1289
1313
OS << " compound=true\n " ;
1290
1314
1291
- dumpBlock (Plan.getPreheader ());
1292
-
1293
1315
for (const VPBlockBase *Block : vp_depth_first_shallow (Plan.getEntry ()))
1294
1316
dumpBlock (Block);
1295
1317
@@ -1550,7 +1572,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
1550
1572
assignName (Plan.BackedgeTakenCount );
1551
1573
for (VPValue *LI : Plan.VPLiveInsToFree )
1552
1574
assignName (LI);
1553
- assignNames (Plan.getPreheader ());
1554
1575
1555
1576
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1556
1577
RPOT (VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry ()));
0 commit comments