Skip to content

Commit 2236c18

Browse files
committed
[LoopInterchange] Add metadata to control loop-interchange
This patch adds metadata to enable/disable the loop-interchange for a loop nest. This is a prelude to introduce a new pragma directive for loop-interchange, like other loop optimizations (unroll, vectorize, distribute, etc.) have.
1 parent 8a0914c commit 2236c18

File tree

2 files changed

+447
-47
lines changed

2 files changed

+447
-47
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 122 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ static cl::opt<unsigned int> MaxMemInstrCount(
6767

6868
namespace {
6969

70-
using LoopVector = SmallVector<Loop *, 8>;
71-
7270
// TODO: Check if we can use a sparse matrix here.
7371
using CharMatrix = std::vector<std::vector<char>>;
7472

@@ -84,6 +82,14 @@ static cl::opt<unsigned int> MaxLoopNestDepth(
8482
"loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
8583
cl::desc("Maximum depth of loop nest considered for the transform"));
8684

85+
// Whether to apply by default.
86+
// TODO: Once this pass is enabled by default, remove this option and use the
87+
// value of PipelineTuningOptions.
88+
static cl::opt<bool> OnlyWhenForced(
89+
"loop-interchange-only-when-forced", cl::init(false), cl::ReallyHidden,
90+
cl::desc(
91+
"Apply interchanges only when explicitly specified metadata exists"));
92+
8793
#ifndef NDEBUG
8894
static void printDepMatrix(CharMatrix &DepMatrix) {
8995
for (auto &Row : DepMatrix) {
@@ -233,7 +239,7 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
233239
return true;
234240
}
235241

236-
static void populateWorklist(Loop &L, LoopVector &LoopList) {
242+
static void populateWorklist(Loop &L, SmallVectorImpl<Loop *> &LoopList) {
237243
LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
238244
<< L.getHeader()->getParent()->getName() << " Loop: %"
239245
<< L.getHeader()->getName() << '\n');
@@ -245,7 +251,7 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
245251
// nested.
246252
// Discard all loops above it added into Worklist.
247253
if (Vec->size() != 1) {
248-
LoopList = {};
254+
LoopList.clear();
249255
return;
250256
}
251257

@@ -256,27 +262,6 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
256262
LoopList.push_back(CurrentLoop);
257263
}
258264

259-
static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList,
260-
OptimizationRemarkEmitter &ORE) {
261-
unsigned LoopNestDepth = LoopList.size();
262-
if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
263-
LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
264-
<< ", the supported range is [" << MinLoopNestDepth
265-
<< ", " << MaxLoopNestDepth << "].\n");
266-
Loop **OuterLoop = LoopList.begin();
267-
ORE.emit([&]() {
268-
return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
269-
(*OuterLoop)->getStartLoc(),
270-
(*OuterLoop)->getHeader())
271-
<< "Unsupported depth of loop nest, the supported range is ["
272-
<< std::to_string(MinLoopNestDepth) << ", "
273-
<< std::to_string(MaxLoopNestDepth) << "].\n";
274-
});
275-
return false;
276-
}
277-
return true;
278-
}
279-
280265
static bool isComputableLoopNest(ScalarEvolution *SE,
281266
ArrayRef<Loop *> LoopList) {
282267
for (Loop *L : LoopList) {
@@ -299,6 +284,26 @@ static bool isComputableLoopNest(ScalarEvolution *SE,
299284

300285
namespace {
301286

287+
/// LoopInterchangeList manages the list of loops and the range to which the
288+
/// interchange may be applied.
289+
struct LoopInterchangeList {
290+
SmallVector<Loop *, 8> LoopList;
291+
unsigned ListBegin = 0;
292+
unsigned ListEnd = 0;
293+
294+
LoopInterchangeList(LoopNest &LN)
295+
: LoopList(LN.getLoops()), ListBegin(0), ListEnd(LoopList.size()) {}
296+
297+
LoopInterchangeList(Loop &L) {
298+
populateWorklist(L, LoopList);
299+
ListBegin = 0;
300+
ListEnd = LoopList.size();
301+
}
302+
303+
void checkMetadata(bool OnlyWhenForced);
304+
bool hasSupportedLoopDepth(OptimizationRemarkEmitter &ORE);
305+
};
306+
302307
/// LoopInterchangeLegality checks if it is legal to interchange the loop.
303308
class LoopInterchangeLegality {
304309
public:
@@ -439,39 +444,38 @@ struct LoopInterchange {
439444
bool run(Loop *L) {
440445
if (L->getParentLoop())
441446
return false;
442-
SmallVector<Loop *, 8> LoopList;
443-
populateWorklist(*L, LoopList);
444-
return processLoopList(LoopList);
447+
LoopInterchangeList LIL(*L);
448+
return processLoopList(LIL);
445449
}
446450

447-
bool run(LoopNest &LN) {
448-
SmallVector<Loop *, 8> LoopList(LN.getLoops());
451+
bool run(LoopInterchangeList &LIL) {
452+
const auto &LoopList = LIL.LoopList;
449453
for (unsigned I = 1; I < LoopList.size(); ++I)
450454
if (LoopList[I]->getParentLoop() != LoopList[I - 1])
451455
return false;
452-
return processLoopList(LoopList);
456+
return processLoopList(LIL);
453457
}
454458

455-
unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
459+
unsigned selectLoopForInterchange(LoopInterchangeList &LIL) {
456460
// TODO: Add a better heuristic to select the loop to be interchanged based
457461
// on the dependence matrix. Currently we select the innermost loop.
458-
return LoopList.size() - 1;
462+
return LIL.ListEnd - 1;
459463
}
460464

461-
bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
465+
bool processLoopList(LoopInterchangeList &LIL) {
462466
bool Changed = false;
463467

464468
// Ensure proper loop nest depth.
465-
assert(hasSupportedLoopDepth(LoopList, *ORE) &&
469+
assert(LIL.hasSupportedLoopDepth(*ORE) &&
466470
"Unsupported depth of loop nest.");
467471

468-
unsigned LoopNestDepth = LoopList.size();
472+
unsigned LoopNestDepth = LIL.LoopList.size();
469473

470474
LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
471475
<< "\n");
472476

473477
CharMatrix DependencyMatrix;
474-
Loop *OuterMostLoop = *(LoopList.begin());
478+
Loop *OuterMostLoop = *(LIL.LoopList.begin());
475479
if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
476480
OuterMostLoop, DI, SE, ORE)) {
477481
LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
@@ -488,7 +492,7 @@ struct LoopInterchange {
488492
return false;
489493
}
490494

491-
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
495+
unsigned SelectLoopId = selectLoopForInterchange(LIL);
492496
// Obtain the loop vector returned from loop cache analysis beforehand,
493497
// and put each <Loop, index> pair into a map for constant time query
494498
// later. Indices in loop vector reprsent the optimal order of the
@@ -504,19 +508,20 @@ struct LoopInterchange {
504508
CostMap[LoopCosts[i].first] = i;
505509
}
506510
}
511+
507512
// We try to achieve the globally optimal memory access for the loopnest,
508513
// and do interchange based on a bubble-sort fasion. We start from
509514
// the innermost loop, move it outwards to the best possible position
510515
// and repeat this process.
511-
for (unsigned j = SelecLoopId; j > 0; j--) {
516+
for (unsigned j = LIL.ListEnd - LIL.ListBegin - 1; j > 0; j--) {
512517
bool ChangedPerIter = false;
513-
for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
514-
bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
515-
DependencyMatrix, CostMap);
518+
for (unsigned i = SelectLoopId; i > SelectLoopId - j; i--) {
519+
bool Interchanged = processLoop(LIL.LoopList[i], LIL.LoopList[i - 1], i,
520+
i - 1, DependencyMatrix, CostMap);
516521
if (!Interchanged)
517522
continue;
518523
// Loops interchanged, update LoopList accordingly.
519-
std::swap(LoopList[i - 1], LoopList[i]);
524+
std::swap(LIL.LoopList[i - 1], LIL.LoopList[i]);
520525
// Update the DependencyMatrix
521526
interChangeDependencies(DependencyMatrix, i, i - 1);
522527

@@ -526,6 +531,7 @@ struct LoopInterchange {
526531
ChangedPerIter |= Interchanged;
527532
Changed |= Interchanged;
528533
}
534+
529535
// Early abort if there was no interchange during an entire round of
530536
// moving loops outwards.
531537
if (!ChangedPerIter)
@@ -572,6 +578,70 @@ struct LoopInterchange {
572578

573579
} // end anonymous namespace
574580

581+
bool LoopInterchangeList::hasSupportedLoopDepth(
582+
OptimizationRemarkEmitter &ORE) {
583+
unsigned LoopNestDepth = ListEnd - ListBegin;
584+
if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
585+
LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
586+
<< ", the supported range is [" << MinLoopNestDepth
587+
<< ", " << MaxLoopNestDepth << "].\n");
588+
Loop *OuterLoop = LoopList[ListBegin];
589+
ORE.emit([&]() {
590+
return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
591+
OuterLoop->getStartLoc(),
592+
OuterLoop->getHeader())
593+
<< "Unsupported depth of loop nest, the supported range is ["
594+
<< std::to_string(MinLoopNestDepth) << ", "
595+
<< std::to_string(MaxLoopNestDepth) << "].\n";
596+
});
597+
return false;
598+
}
599+
return true;
600+
}
601+
602+
// Check the metadata for interchange. The outermost one is taken into account
603+
// and nested ones are ignored. The metadata affects the entire loop nest such
604+
// that the outermost loop is the loop for which the metadata is specified. For
605+
// example, in the following example, the loop-interchange will be performed
606+
// only to the outermost two loops, and the second pragma is ignored.
607+
//
608+
// for (...)
609+
// for (...)
610+
// #pragma clang loop interchange(disable)
611+
// for (...)
612+
// #pragma clang loop interchange(enable)
613+
// for (...)
614+
// for (...)
615+
// Stmt
616+
//
617+
void LoopInterchangeList::checkMetadata(bool OnlyWhenForced) {
618+
ListBegin = 0;
619+
ListEnd = LoopList.size();
620+
621+
for (unsigned I = 0; I != LoopList.size(); I++) {
622+
Loop *L = LoopList[I];
623+
auto Value = findStringMetadataForLoop(L, "llvm.loop.interchange.enable");
624+
if (!Value)
625+
continue;
626+
627+
const MDOperand *Op = *Value;
628+
assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
629+
bool Enabled = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
630+
if (Enabled && OnlyWhenForced) {
631+
ListBegin = I;
632+
} else if (!Enabled && !OnlyWhenForced) {
633+
ListEnd = I;
634+
} else if (OnlyWhenForced) {
635+
ListEnd = 0;
636+
}
637+
break;
638+
}
639+
640+
LLVM_DEBUG(
641+
dbgs() << "LoopInterchange will be applied to the range: [from, to]=["
642+
<< ListBegin << ", " << ListEnd - 1 << "]\n";);
643+
}
644+
575645
bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
576646
return any_of(*BB, [](const Instruction &I) {
577647
return I.mayHaveSideEffects() || I.mayReadFromMemory();
@@ -1748,7 +1818,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17481818
LoopStandardAnalysisResults &AR,
17491819
LPMUpdater &U) {
17501820
Function &F = *LN.getParent();
1751-
SmallVector<Loop *, 8> LoopList(LN.getLoops());
1821+
LoopInterchangeList LIL(LN);
17521822

17531823
if (MaxMemInstrCount < 1) {
17541824
LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
@@ -1757,14 +1827,19 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17571827
OptimizationRemarkEmitter ORE(&F);
17581828

17591829
// Ensure minimum depth of the loop nest to do the interchange.
1760-
if (!hasSupportedLoopDepth(LoopList, ORE))
1830+
if (!LIL.hasSupportedLoopDepth(ORE))
17611831
return PreservedAnalyses::all();
17621832
// Ensure computable loop nest.
1763-
if (!isComputableLoopNest(&AR.SE, LoopList)) {
1833+
if (!isComputableLoopNest(&AR.SE, LIL.LoopList)) {
17641834
LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
17651835
return PreservedAnalyses::all();
17661836
}
17671837

1838+
LIL.checkMetadata(OnlyWhenForced);
1839+
// Ensure the depth again.
1840+
if (!LIL.hasSupportedLoopDepth(ORE))
1841+
return PreservedAnalyses::all();
1842+
17681843
ORE.emit([&]() {
17691844
return OptimizationRemarkAnalysis(DEBUG_TYPE, "Dependence",
17701845
LN.getOutermostLoop().getStartLoc(),
@@ -1776,7 +1851,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17761851
std::unique_ptr<CacheCost> CC =
17771852
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
17781853

1779-
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1854+
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LIL))
17801855
return PreservedAnalyses::all();
17811856
U.markLoopNestChanged(true);
17821857
return getLoopPassPreservedAnalyses();

0 commit comments

Comments
 (0)