@@ -131,7 +131,7 @@ class SelectInstToUnfold {
131
131
explicit operator bool () const { return SI && SIUse; }
132
132
};
133
133
134
- void unfold (DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
134
+ void unfold (DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
135
135
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
136
136
std::vector<BasicBlock *> *NewBBs);
137
137
@@ -142,6 +142,7 @@ class DFAJumpThreading {
142
142
: AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
143
143
144
144
bool run (Function &F);
145
+ bool LoopInfoBroken;
145
146
146
147
private:
147
148
void
@@ -157,7 +158,7 @@ class DFAJumpThreading {
157
158
158
159
std::vector<SelectInstToUnfold> NewSIsToUnfold;
159
160
std::vector<BasicBlock *> NewBBs;
160
- unfold (&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs);
161
+ unfold (&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
161
162
162
163
// Put newly discovered select instructions into the work list.
163
164
for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
@@ -201,7 +202,7 @@ void createBasicBlockAndSinkSelectInst(
201
202
// / created basic blocks into \p NewBBs.
202
203
// /
203
204
// / TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
204
- void unfold (DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
205
+ void unfold (DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
205
206
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
206
207
std::vector<BasicBlock *> *NewBBs) {
207
208
SelectInst *SI = SIToUnfold.getInst ();
@@ -307,6 +308,12 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
307
308
DTU->applyUpdates ({{DominatorTree::Insert, StartBlock, TT},
308
309
{DominatorTree::Insert, StartBlock, FT}});
309
310
311
+ // Preserve loop info
312
+ if (Loop *L = LI->getLoopFor (SI->getParent ())) {
313
+ for (BasicBlock *NewBB : *NewBBs)
314
+ L->addBasicBlockToLoop (NewBB, *LI);
315
+ }
316
+
310
317
// The select is now dead.
311
318
assert (SI->use_empty () && " Select must be dead now" );
312
319
SI->eraseFromParent ();
@@ -522,9 +529,10 @@ struct MainSwitch {
522
529
};
523
530
524
531
struct AllSwitchPaths {
525
- AllSwitchPaths (const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE)
526
- : Switch(MSwitch->getInstr ()), SwitchBlock(Switch->getParent ()),
527
- ORE(ORE) {}
532
+ AllSwitchPaths (const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
533
+ LoopInfo *LI)
534
+ : Switch(MSwitch->getInstr ()), SwitchBlock(Switch->getParent ()), ORE(ORE),
535
+ LI(LI) {}
528
536
529
537
std::vector<ThreadingPath> &getThreadingPaths () { return TPaths; }
530
538
unsigned getNumThreadingPaths () { return TPaths.size (); }
@@ -596,6 +604,12 @@ struct AllSwitchPaths {
596
604
597
605
Visited.insert (BB);
598
606
607
+ // Stop if we have reached the BB out of loop, since its successors have no
608
+ // impact on the DFA.
609
+ // TODO: Do we need to stop exploring if BB is the outer loop of the switch?
610
+ if (!LI->getLoopFor (BB))
611
+ return Res;
612
+
599
613
// Some blocks have multiple edges to the same successor, and this set
600
614
// is used to prevent a duplicate path from being generated
601
615
SmallSet<BasicBlock *, 4 > Successors;
@@ -737,6 +751,7 @@ struct AllSwitchPaths {
737
751
BasicBlock *SwitchBlock;
738
752
OptimizationRemarkEmitter *ORE;
739
753
std::vector<ThreadingPath> TPaths;
754
+ LoopInfo *LI;
740
755
};
741
756
742
757
struct TransformDFA {
@@ -1283,6 +1298,7 @@ bool DFAJumpThreading::run(Function &F) {
1283
1298
1284
1299
SmallVector<AllSwitchPaths, 2 > ThreadableLoops;
1285
1300
bool MadeChanges = false ;
1301
+ LoopInfoBroken = false ;
1286
1302
1287
1303
for (BasicBlock &BB : F) {
1288
1304
auto *SI = dyn_cast<SwitchInst>(BB.getTerminator ());
@@ -1304,7 +1320,7 @@ bool DFAJumpThreading::run(Function &F) {
1304
1320
if (!Switch.getSelectInsts ().empty ())
1305
1321
MadeChanges = true ;
1306
1322
1307
- AllSwitchPaths SwitchPaths (&Switch, ORE);
1323
+ AllSwitchPaths SwitchPaths (&Switch, ORE, LI );
1308
1324
SwitchPaths.run ();
1309
1325
1310
1326
if (SwitchPaths.getNumThreadingPaths () > 0 ) {
@@ -1315,10 +1331,15 @@ bool DFAJumpThreading::run(Function &F) {
1315
1331
// strict requirement but it can cause buggy behavior if there is an
1316
1332
// overlap of blocks in different opportunities. There is a lot of room to
1317
1333
// experiment with catching more opportunities here.
1334
+ // NOTE: To release this contraint, we must handle LoopInfo invalidation
1318
1335
break ;
1319
1336
}
1320
1337
}
1321
1338
1339
+ #ifdef NDEBUG
1340
+ LI->verify (*DT);
1341
+ #endif
1342
+
1322
1343
SmallPtrSet<const Value *, 32 > EphValues;
1323
1344
if (ThreadableLoops.size () > 0 )
1324
1345
CodeMetrics::collectEphemeralValues (&F, AC, EphValues);
@@ -1327,6 +1348,7 @@ bool DFAJumpThreading::run(Function &F) {
1327
1348
TransformDFA Transform (&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1328
1349
Transform.run ();
1329
1350
MadeChanges = true ;
1351
+ LoopInfoBroken = true ;
1330
1352
}
1331
1353
1332
1354
#ifdef EXPENSIVE_CHECKS
@@ -1347,11 +1369,13 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
1347
1369
LoopInfo &LI = AM.getResult <LoopAnalysis>(F);
1348
1370
TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
1349
1371
OptimizationRemarkEmitter ORE (&F);
1350
-
1351
- if (!DFAJumpThreading (&AC, &DT, &LI, &TTI, &ORE) .run (F))
1372
+ DFAJumpThreading ThreadImpl (&AC, &DT, &LI, &TTI, &ORE);
1373
+ if (!ThreadImpl .run (F))
1352
1374
return PreservedAnalyses::all ();
1353
1375
1354
1376
PreservedAnalyses PA;
1355
1377
PA.preserve <DominatorTreeAnalysis>();
1378
+ if (!ThreadImpl.LoopInfoBroken )
1379
+ PA.preserve <LoopAnalysis>();
1356
1380
return PA;
1357
1381
}
0 commit comments