@@ -67,8 +67,6 @@ static cl::opt<unsigned int> MaxMemInstrCount(
67
67
68
68
namespace {
69
69
70
- using LoopVector = SmallVector<Loop *, 8 >;
71
-
72
70
// TODO: Check if we can use a sparse matrix here.
73
71
using CharMatrix = std::vector<std::vector<char >>;
74
72
@@ -84,6 +82,14 @@ static cl::opt<unsigned int> MaxLoopNestDepth(
84
82
" loop-interchange-max-loop-nest-depth" , cl::init(10 ), cl::Hidden,
85
83
cl::desc(" Maximum depth of loop nest considered for the transform" ));
86
84
85
+ // Whether to apply by default.
86
+ // TODO: Once this pass is enabled by default, remove this option and use the
87
+ // value of PipelineTuningOptions.
88
+ static cl::opt<bool > OnlyWhenForced (
89
+ " loop-interchange-only-when-forced" , cl::init(false ), cl::ReallyHidden,
90
+ cl::desc(
91
+ " Apply interchanges only when explicitly specified metadata exists" ));
92
+
87
93
#ifndef NDEBUG
88
94
static void printDepMatrix (CharMatrix &DepMatrix) {
89
95
for (auto &Row : DepMatrix) {
@@ -233,7 +239,7 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
233
239
return true ;
234
240
}
235
241
236
- static void populateWorklist (Loop &L, LoopVector &LoopList) {
242
+ static void populateWorklist (Loop &L, SmallVectorImpl<Loop *> &LoopList) {
237
243
LLVM_DEBUG (dbgs () << " Calling populateWorklist on Func: "
238
244
<< L.getHeader ()->getParent ()->getName () << " Loop: %"
239
245
<< L.getHeader ()->getName () << ' \n ' );
@@ -245,7 +251,7 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
245
251
// nested.
246
252
// Discard all loops above it added into Worklist.
247
253
if (Vec->size () != 1 ) {
248
- LoopList = {} ;
254
+ LoopList. clear () ;
249
255
return ;
250
256
}
251
257
@@ -256,27 +262,6 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
256
262
LoopList.push_back (CurrentLoop);
257
263
}
258
264
259
- static bool hasSupportedLoopDepth (SmallVectorImpl<Loop *> &LoopList,
260
- OptimizationRemarkEmitter &ORE) {
261
- unsigned LoopNestDepth = LoopList.size ();
262
- if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
263
- LLVM_DEBUG (dbgs () << " Unsupported depth of loop nest " << LoopNestDepth
264
- << " , the supported range is [" << MinLoopNestDepth
265
- << " , " << MaxLoopNestDepth << " ].\n " );
266
- Loop **OuterLoop = LoopList.begin ();
267
- ORE.emit ([&]() {
268
- return OptimizationRemarkMissed (DEBUG_TYPE, " UnsupportedLoopNestDepth" ,
269
- (*OuterLoop)->getStartLoc (),
270
- (*OuterLoop)->getHeader ())
271
- << " Unsupported depth of loop nest, the supported range is ["
272
- << std::to_string (MinLoopNestDepth) << " , "
273
- << std::to_string (MaxLoopNestDepth) << " ].\n " ;
274
- });
275
- return false ;
276
- }
277
- return true ;
278
- }
279
-
280
265
static bool isComputableLoopNest (ScalarEvolution *SE,
281
266
ArrayRef<Loop *> LoopList) {
282
267
for (Loop *L : LoopList) {
@@ -299,6 +284,26 @@ static bool isComputableLoopNest(ScalarEvolution *SE,
299
284
300
285
namespace {
301
286
287
+ // / LoopInterchangeList manages the list of loops and the range to which the
288
+ // / interchange may be applied.
289
+ struct LoopInterchangeList {
290
+ SmallVector<Loop *, 8 > LoopList;
291
+ unsigned ListBegin = 0 ;
292
+ unsigned ListEnd = 0 ;
293
+
294
+ LoopInterchangeList (LoopNest &LN)
295
+ : LoopList(LN.getLoops()), ListBegin(0 ), ListEnd(LoopList.size()) {}
296
+
297
+ LoopInterchangeList (Loop &L) {
298
+ populateWorklist (L, LoopList);
299
+ ListBegin = 0 ;
300
+ ListEnd = LoopList.size ();
301
+ }
302
+
303
+ void checkMetadata (bool OnlyWhenForced);
304
+ bool hasSupportedLoopDepth (OptimizationRemarkEmitter &ORE);
305
+ };
306
+
302
307
// / LoopInterchangeLegality checks if it is legal to interchange the loop.
303
308
class LoopInterchangeLegality {
304
309
public:
@@ -439,39 +444,38 @@ struct LoopInterchange {
439
444
bool run (Loop *L) {
440
445
if (L->getParentLoop ())
441
446
return false ;
442
- SmallVector<Loop *, 8 > LoopList;
443
- populateWorklist (*L, LoopList);
444
- return processLoopList (LoopList);
447
+ LoopInterchangeList LIL (*L);
448
+ return processLoopList (LIL);
445
449
}
446
450
447
- bool run (LoopNest &LN ) {
448
- SmallVector<Loop *, 8 > LoopList (LN. getLoops ()) ;
451
+ bool run (LoopInterchangeList &LIL ) {
452
+ const auto & LoopList = LIL. LoopList ;
449
453
for (unsigned I = 1 ; I < LoopList.size (); ++I)
450
454
if (LoopList[I]->getParentLoop () != LoopList[I - 1 ])
451
455
return false ;
452
- return processLoopList (LoopList );
456
+ return processLoopList (LIL );
453
457
}
454
458
455
- unsigned selectLoopForInterchange (ArrayRef<Loop *> LoopList ) {
459
+ unsigned selectLoopForInterchange (LoopInterchangeList &LIL ) {
456
460
// TODO: Add a better heuristic to select the loop to be interchanged based
457
461
// on the dependence matrix. Currently we select the innermost loop.
458
- return LoopList. size () - 1 ;
462
+ return LIL. ListEnd - 1 ;
459
463
}
460
464
461
- bool processLoopList (SmallVectorImpl<Loop *> &LoopList ) {
465
+ bool processLoopList (LoopInterchangeList &LIL ) {
462
466
bool Changed = false ;
463
467
464
468
// Ensure proper loop nest depth.
465
- assert (hasSupportedLoopDepth (LoopList, *ORE) &&
469
+ assert (LIL. hasSupportedLoopDepth (*ORE) &&
466
470
" Unsupported depth of loop nest." );
467
471
468
- unsigned LoopNestDepth = LoopList.size ();
472
+ unsigned LoopNestDepth = LIL. LoopList .size ();
469
473
470
474
LLVM_DEBUG (dbgs () << " Processing LoopList of size = " << LoopNestDepth
471
475
<< " \n " );
472
476
473
477
CharMatrix DependencyMatrix;
474
- Loop *OuterMostLoop = *(LoopList.begin ());
478
+ Loop *OuterMostLoop = *(LIL. LoopList .begin ());
475
479
if (!populateDependencyMatrix (DependencyMatrix, LoopNestDepth,
476
480
OuterMostLoop, DI, SE, ORE)) {
477
481
LLVM_DEBUG (dbgs () << " Populating dependency matrix failed\n " );
@@ -488,7 +492,7 @@ struct LoopInterchange {
488
492
return false ;
489
493
}
490
494
491
- unsigned SelecLoopId = selectLoopForInterchange (LoopList );
495
+ unsigned SelectLoopId = selectLoopForInterchange (LIL );
492
496
// Obtain the loop vector returned from loop cache analysis beforehand,
493
497
// and put each <Loop, index> pair into a map for constant time query
494
498
// later. Indices in loop vector reprsent the optimal order of the
@@ -504,19 +508,20 @@ struct LoopInterchange {
504
508
CostMap[LoopCosts[i].first ] = i;
505
509
}
506
510
}
511
+
507
512
// We try to achieve the globally optimal memory access for the loopnest,
508
513
// and do interchange based on a bubble-sort fasion. We start from
509
514
// the innermost loop, move it outwards to the best possible position
510
515
// and repeat this process.
511
- for (unsigned j = SelecLoopId ; j > 0 ; j--) {
516
+ for (unsigned j = LIL. ListEnd - LIL. ListBegin - 1 ; j > 0 ; j--) {
512
517
bool ChangedPerIter = false ;
513
- for (unsigned i = SelecLoopId ; i > SelecLoopId - j; i--) {
514
- bool Interchanged = processLoop (LoopList[i], LoopList[i - 1 ], i, i - 1 ,
515
- DependencyMatrix, CostMap);
518
+ for (unsigned i = SelectLoopId ; i > SelectLoopId - j; i--) {
519
+ bool Interchanged = processLoop (LIL. LoopList [i], LIL. LoopList [i - 1 ], i,
520
+ i - 1 , DependencyMatrix, CostMap);
516
521
if (!Interchanged)
517
522
continue ;
518
523
// Loops interchanged, update LoopList accordingly.
519
- std::swap (LoopList[i - 1 ], LoopList[i]);
524
+ std::swap (LIL. LoopList [i - 1 ], LIL. LoopList [i]);
520
525
// Update the DependencyMatrix
521
526
interChangeDependencies (DependencyMatrix, i, i - 1 );
522
527
@@ -526,6 +531,7 @@ struct LoopInterchange {
526
531
ChangedPerIter |= Interchanged;
527
532
Changed |= Interchanged;
528
533
}
534
+
529
535
// Early abort if there was no interchange during an entire round of
530
536
// moving loops outwards.
531
537
if (!ChangedPerIter)
@@ -572,6 +578,70 @@ struct LoopInterchange {
572
578
573
579
} // end anonymous namespace
574
580
581
+ bool LoopInterchangeList::hasSupportedLoopDepth (
582
+ OptimizationRemarkEmitter &ORE) {
583
+ unsigned LoopNestDepth = ListEnd - ListBegin;
584
+ if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
585
+ LLVM_DEBUG (dbgs () << " Unsupported depth of loop nest " << LoopNestDepth
586
+ << " , the supported range is [" << MinLoopNestDepth
587
+ << " , " << MaxLoopNestDepth << " ].\n " );
588
+ Loop *OuterLoop = LoopList[ListBegin];
589
+ ORE.emit ([&]() {
590
+ return OptimizationRemarkMissed (DEBUG_TYPE, " UnsupportedLoopNestDepth" ,
591
+ OuterLoop->getStartLoc (),
592
+ OuterLoop->getHeader ())
593
+ << " Unsupported depth of loop nest, the supported range is ["
594
+ << std::to_string (MinLoopNestDepth) << " , "
595
+ << std::to_string (MaxLoopNestDepth) << " ].\n " ;
596
+ });
597
+ return false ;
598
+ }
599
+ return true ;
600
+ }
601
+
602
+ // Check the metadata for interchange. The outermost one is taken into account
603
+ // and nested ones are ignored. The metadata affects the entire loop nest such
604
+ // that the outermost loop is the loop for which the metadata is specified. For
605
+ // example, in the following example, the loop-interchange will be performed
606
+ // only to the outermost two loops, and the second pragma is ignored.
607
+ //
608
+ // for (...)
609
+ // for (...)
610
+ // #pragma clang loop interchange(disable)
611
+ // for (...)
612
+ // #pragma clang loop interchange(enable)
613
+ // for (...)
614
+ // for (...)
615
+ // Stmt
616
+ //
617
+ void LoopInterchangeList::checkMetadata (bool OnlyWhenForced) {
618
+ ListBegin = 0 ;
619
+ ListEnd = LoopList.size ();
620
+
621
+ for (unsigned I = 0 ; I != LoopList.size (); I++) {
622
+ Loop *L = LoopList[I];
623
+ auto Value = findStringMetadataForLoop (L, " llvm.loop.interchange.enable" );
624
+ if (!Value)
625
+ continue ;
626
+
627
+ const MDOperand *Op = *Value;
628
+ assert (Op && mdconst::hasa<ConstantInt>(*Op) && " invalid metadata" );
629
+ bool Enabled = mdconst::extract<ConstantInt>(*Op)->getZExtValue ();
630
+ if (Enabled && OnlyWhenForced) {
631
+ ListBegin = I;
632
+ } else if (!Enabled && !OnlyWhenForced) {
633
+ ListEnd = I;
634
+ } else if (OnlyWhenForced) {
635
+ ListEnd = 0 ;
636
+ }
637
+ break ;
638
+ }
639
+
640
+ LLVM_DEBUG (
641
+ dbgs () << " LoopInterchange will be applied to the range: [from, to]=["
642
+ << ListBegin << " , " << ListEnd - 1 << " ]\n " ;);
643
+ }
644
+
575
645
bool LoopInterchangeLegality::containsUnsafeInstructions (BasicBlock *BB) {
576
646
return any_of (*BB, [](const Instruction &I) {
577
647
return I.mayHaveSideEffects () || I.mayReadFromMemory ();
@@ -1748,7 +1818,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1748
1818
LoopStandardAnalysisResults &AR,
1749
1819
LPMUpdater &U) {
1750
1820
Function &F = *LN.getParent ();
1751
- SmallVector<Loop *, 8 > LoopList (LN. getLoops () );
1821
+ LoopInterchangeList LIL (LN);
1752
1822
1753
1823
if (MaxMemInstrCount < 1 ) {
1754
1824
LLVM_DEBUG (dbgs () << " MaxMemInstrCount should be at least 1" );
@@ -1757,14 +1827,19 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1757
1827
OptimizationRemarkEmitter ORE (&F);
1758
1828
1759
1829
// Ensure minimum depth of the loop nest to do the interchange.
1760
- if (!hasSupportedLoopDepth (LoopList, ORE))
1830
+ if (!LIL. hasSupportedLoopDepth (ORE))
1761
1831
return PreservedAnalyses::all ();
1762
1832
// Ensure computable loop nest.
1763
- if (!isComputableLoopNest (&AR.SE , LoopList)) {
1833
+ if (!isComputableLoopNest (&AR.SE , LIL. LoopList )) {
1764
1834
LLVM_DEBUG (dbgs () << " Not valid loop candidate for interchange\n " );
1765
1835
return PreservedAnalyses::all ();
1766
1836
}
1767
1837
1838
+ LIL.checkMetadata (OnlyWhenForced);
1839
+ // Ensure the depth again.
1840
+ if (!LIL.hasSupportedLoopDepth (ORE))
1841
+ return PreservedAnalyses::all ();
1842
+
1768
1843
ORE.emit ([&]() {
1769
1844
return OptimizationRemarkAnalysis (DEBUG_TYPE, " Dependence" ,
1770
1845
LN.getOutermostLoop ().getStartLoc (),
@@ -1776,7 +1851,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1776
1851
std::unique_ptr<CacheCost> CC =
1777
1852
CacheCost::getCacheCost (LN.getOutermostLoop (), AR, DI);
1778
1853
1779
- if (!LoopInterchange (&AR.SE , &AR.LI , &DI, &AR.DT , CC, &ORE).run (LN ))
1854
+ if (!LoopInterchange (&AR.SE , &AR.LI , &DI, &AR.DT , CC, &ORE).run (LIL ))
1780
1855
return PreservedAnalyses::all ();
1781
1856
U.markLoopNestChanged (true );
1782
1857
return getLoopPassPreservedAnalyses ();
0 commit comments