Skip to content

Commit d15f3e8

Browse files
authored
[LoopInterchange] Constrain LI within supported loop nest depth (#118656)
This patch is an extension to #115128. After profiling LLVM test-suite, I see a lot of loop nest of depth more than `MaxLoopNestDepth` which is 10. Early exit for them would save compile-time as it would avoid computing DependenceInfo and CacheCost. Please see 'bound-max-depth' branch on compile-time-tracker.
1 parent de209fa commit d15f3e8

File tree

3 files changed

+125
-14
lines changed

3 files changed

+125
-14
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,15 @@ using CharMatrix = std::vector<std::vector<char>>;
7474

7575
} // end anonymous namespace
7676

77+
// Minimum loop depth supported.
78+
static cl::opt<unsigned int> MinLoopNestDepth(
79+
"loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden,
80+
cl::desc("Minimum depth of loop nest considered for the transform"));
81+
7782
// Maximum loop depth supported.
78-
static const unsigned MaxLoopNestDepth = 10;
83+
static cl::opt<unsigned int> MaxLoopNestDepth(
84+
"loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
85+
cl::desc("Maximum depth of loop nest considered for the transform"));
7986

8087
#ifndef NDEBUG
8188
static void printDepMatrix(CharMatrix &DepMatrix) {
@@ -244,10 +251,22 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
244251
LoopList.push_back(CurrentLoop);
245252
}
246253

247-
static bool hasMinimumLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
254+
static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList,
255+
OptimizationRemarkEmitter &ORE) {
248256
unsigned LoopNestDepth = LoopList.size();
249-
if (LoopNestDepth < 2) {
250-
LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
257+
if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
258+
LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
259+
<< ", the supported range is [" << MinLoopNestDepth
260+
<< ", " << MaxLoopNestDepth << "].\n");
261+
Loop **OuterLoop = LoopList.begin();
262+
ORE.emit([&]() {
263+
return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
264+
(*OuterLoop)->getStartLoc(),
265+
(*OuterLoop)->getHeader())
266+
<< "Unsupported depth of loop nest, the supported range is ["
267+
<< std::to_string(MinLoopNestDepth) << ", "
268+
<< std::to_string(MaxLoopNestDepth) << "].\n";
269+
});
251270
return false;
252271
}
253272
return true;
@@ -435,15 +454,11 @@ struct LoopInterchange {
435454
bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
436455
bool Changed = false;
437456

438-
// Ensure minimum loop nest depth.
439-
assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth.");
457+
// Ensure proper loop nest depth.
458+
assert(hasSupportedLoopDepth(LoopList, *ORE) &&
459+
"Unsupported depth of loop nest.");
440460

441461
unsigned LoopNestDepth = LoopList.size();
442-
if (LoopNestDepth > MaxLoopNestDepth) {
443-
LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
444-
<< MaxLoopNestDepth << "\n");
445-
return false;
446-
}
447462
if (!isComputableLoopNest(LoopList)) {
448463
LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
449464
return false;
@@ -1735,14 +1750,15 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17351750
LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
17361751
return PreservedAnalyses::all();
17371752
}
1753+
OptimizationRemarkEmitter ORE(&F);
17381754

17391755
// Ensure minimum depth of the loop nest to do the interchange.
1740-
if (!hasMinimumLoopDepth(LoopList))
1756+
if (!hasSupportedLoopDepth(LoopList, ORE))
17411757
return PreservedAnalyses::all();
17421758
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
17431759
std::unique_ptr<CacheCost> CC =
17441760
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
1745-
OptimizationRemarkEmitter ORE(&F);
1761+
17461762
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
17471763
return PreservedAnalyses::all();
17481764
U.markLoopNestChanged(true);

llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6
1515
; CHECK-NOT: Delinearizing
1616
; CHECK-NOT: Strides:
1717
; CHECK-NOT: Terms:
18-
; CHECK: Loop doesn't contain minimum nesting level.
18+
; CHECK: Unsupported depth of loop nest 1, the supported range is [2, 10].
1919

2020
define void @foo() {
2121
entry:
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \
2+
; RUN: -disable-output 2>&1 | FileCheck %s
3+
4+
; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \
5+
; RUN: -loop-interchange-max-loop-nest-depth=12 -disable-output 2>&1 | \
6+
; RUN: FileCheck --allow-empty -check-prefix=CHECK-MAX %s
7+
8+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9+
10+
; CHECK: Unsupported depth of loop nest, the supported range is [2, 10].
11+
; CHECK-MAX-NOT: Unsupported depth of loop nest, the supported range is [2, 10].
12+
define void @big_loop_nest() {
13+
entry:
14+
br label %for1.header
15+
16+
for1.header:
17+
%j = phi i64 [ 0, %entry ], [ %j.next, %for1.inc ]
18+
br label %for2.header
19+
for2.header:
20+
%k = phi i64 [ 0, %for1.header ], [ %k.next, %for2.inc ]
21+
br label %for3.header
22+
for3.header:
23+
%l = phi i64 [ 0, %for2.header ], [ %l.next, %for3.inc ]
24+
br label %for4.header
25+
for4.header:
26+
%m = phi i64 [ 0, %for3.header ], [ %m.next, %for4.inc ]
27+
br label %for5.header
28+
for5.header:
29+
%n = phi i64 [ 0, %for4.header ], [ %n.next, %for5.inc ]
30+
br label %for6.header
31+
for6.header:
32+
%o = phi i64 [ 0, %for5.header ], [ %o.next, %for6.inc ]
33+
br label %for7.header
34+
for7.header:
35+
%p = phi i64 [ 0, %for6.header ], [ %p.next, %for7.inc ]
36+
br label %for8.header
37+
for8.header:
38+
%q = phi i64 [ 0, %for7.header ], [ %q.next, %for8.inc ]
39+
br label %for9.header
40+
for9.header:
41+
%r = phi i64 [ 0, %for8.header ], [ %r.next, %for9.inc ]
42+
br label %for10.header
43+
for10.header:
44+
%s = phi i64 [ 0, %for9.header ], [ %s.next, %for10.inc ]
45+
br label %for11
46+
for11:
47+
%t = phi i64 [ %t.next, %for11 ], [ 0, %for10.header ]
48+
%t.next = add nuw nsw i64 %t, 1
49+
%exitcond = icmp eq i64 %t.next, 99
50+
br i1 %exitcond, label %for1.inc, label %for11
51+
52+
for1.inc:
53+
%j.next = add nuw nsw i64 %j, 1
54+
%exitcond26 = icmp eq i64 %j.next, 99
55+
br i1 %exitcond26, label %for2.inc, label %for1.header
56+
for2.inc:
57+
%k.next = add nuw nsw i64 %k, 1
58+
%exitcond27 = icmp eq i64 %j.next, 99
59+
br i1 %exitcond27, label %for3.inc, label %for2.header
60+
for3.inc:
61+
%l.next = add nuw nsw i64 %l, 1
62+
%exitcond28 = icmp eq i64 %l.next, 99
63+
br i1 %exitcond28, label %for4.inc, label %for3.header
64+
for4.inc:
65+
%m.next = add nuw nsw i64 %m, 1
66+
%exitcond29 = icmp eq i64 %m.next, 99
67+
br i1 %exitcond29, label %for5.inc, label %for4.header
68+
for5.inc:
69+
%n.next = add nuw nsw i64 %n, 1
70+
%exitcond30 = icmp eq i64 %n.next, 99
71+
br i1 %exitcond30, label %for6.inc, label %for5.header
72+
for6.inc:
73+
%o.next = add nuw nsw i64 %o, 1
74+
%exitcond31 = icmp eq i64 %o.next, 99
75+
br i1 %exitcond31, label %for7.inc, label %for6.header
76+
for7.inc:
77+
%p.next = add nuw nsw i64 %p, 1
78+
%exitcond32 = icmp eq i64 %p.next, 99
79+
br i1 %exitcond32, label %for8.inc, label %for7.header
80+
for8.inc:
81+
%q.next = add nuw nsw i64 %q, 1
82+
%exitcond33 = icmp eq i64 %q.next, 99
83+
br i1 %exitcond33, label %for9.inc, label %for8.header
84+
for9.inc:
85+
%r.next = add nuw nsw i64 %r, 1
86+
%exitcond34 = icmp eq i64 %q.next, 99
87+
br i1 %exitcond34, label %for10.inc, label %for9.header
88+
for10.inc:
89+
%s.next = add nuw nsw i64 %s, 1
90+
%exitcond35 = icmp eq i64 %s.next, 99
91+
br i1 %exitcond35, label %for.end, label %for10.header
92+
93+
for.end:
94+
ret void
95+
}

0 commit comments

Comments
 (0)