Skip to content

[LoopInterchange] Constrain LI within supported loop nest depth #118656

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 29 additions & 13 deletions llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,15 @@ using CharMatrix = std::vector<std::vector<char>>;

} // end anonymous namespace

// Minimum loop depth supported.
static cl::opt<unsigned int> MinLoopNestDepth(
"loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden,
cl::desc("Minimum depth of loop nest considered for the transform"));

// Maximum loop depth supported.
static const unsigned MaxLoopNestDepth = 10;
static cl::opt<unsigned int> MaxLoopNestDepth(
"loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
cl::desc("Maximum depth of loop nest considered for the transform"));

#ifndef NDEBUG
static void printDepMatrix(CharMatrix &DepMatrix) {
Expand Down Expand Up @@ -244,10 +251,22 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
LoopList.push_back(CurrentLoop);
}

static bool hasMinimumLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList,
OptimizationRemarkEmitter &ORE) {
unsigned LoopNestDepth = LoopList.size();
if (LoopNestDepth < 2) {
LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to make MaxLoopNestDepth and MinLoopNestDepth to be tunable parameters rather than hardcoded numbers, so we could control with better flexibility.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, maybe small clarification of the message:

"Unsupported loop nest depth of " << LoopNestDepth << ", the supported range is ..."

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

<< ", the supported range is [" << MinLoopNestDepth
<< ", " << MaxLoopNestDepth << "].\n");
Loop **OuterLoop = LoopList.begin();
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth",
(*OuterLoop)->getStartLoc(),
(*OuterLoop)->getHeader())
<< "Unsupported depth of loop nest, the supported range is ["
<< std::to_string(MinLoopNestDepth) << ", "
<< std::to_string(MaxLoopNestDepth) << "].\n";
});
return false;
}
return true;
Expand Down Expand Up @@ -435,15 +454,11 @@ struct LoopInterchange {
bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
bool Changed = false;

// Ensure minimum loop nest depth.
assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth.");
// Ensure proper loop nest depth.
assert(hasSupportedLoopDepth(LoopList, *ORE) &&
"Unsupported depth of loop nest.");

unsigned LoopNestDepth = LoopList.size();
if (LoopNestDepth > MaxLoopNestDepth) {
LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
<< MaxLoopNestDepth << "\n");
return false;
}
if (!isComputableLoopNest(LoopList)) {
LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
return false;
Expand Down Expand Up @@ -1735,14 +1750,15 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
return PreservedAnalyses::all();
}
OptimizationRemarkEmitter ORE(&F);

// Ensure minimum depth of the loop nest to do the interchange.
if (!hasMinimumLoopDepth(LoopList))
if (!hasSupportedLoopDepth(LoopList, ORE))
return PreservedAnalyses::all();
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
std::unique_ptr<CacheCost> CC =
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
OptimizationRemarkEmitter ORE(&F);

if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
U.markLoopNestChanged(true);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6
; CHECK-NOT: Delinearizing
; CHECK-NOT: Strides:
; CHECK-NOT: Terms:
; CHECK: Loop doesn't contain minimum nesting level.
; CHECK: Unsupported depth of loop nest 1, the supported range is [2, 10].

define void @foo() {
entry:
Expand Down
95 changes: 95 additions & 0 deletions llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \
; RUN: -disable-output 2>&1 | FileCheck %s

; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \
; RUN: -loop-interchange-max-loop-nest-depth=12 -disable-output 2>&1 | \
; RUN: FileCheck --allow-empty -check-prefix=CHECK-MAX %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

; CHECK: Unsupported depth of loop nest, the supported range is [2, 10].
; CHECK-MAX-NOT: Unsupported depth of loop nest, the supported range is [2, 10].
define void @big_loop_nest() {
entry:
br label %for1.header

for1.header:
%j = phi i64 [ 0, %entry ], [ %j.next, %for1.inc ]
br label %for2.header
for2.header:
%k = phi i64 [ 0, %for1.header ], [ %k.next, %for2.inc ]
br label %for3.header
for3.header:
%l = phi i64 [ 0, %for2.header ], [ %l.next, %for3.inc ]
br label %for4.header
for4.header:
%m = phi i64 [ 0, %for3.header ], [ %m.next, %for4.inc ]
br label %for5.header
for5.header:
%n = phi i64 [ 0, %for4.header ], [ %n.next, %for5.inc ]
br label %for6.header
for6.header:
%o = phi i64 [ 0, %for5.header ], [ %o.next, %for6.inc ]
br label %for7.header
for7.header:
%p = phi i64 [ 0, %for6.header ], [ %p.next, %for7.inc ]
br label %for8.header
for8.header:
%q = phi i64 [ 0, %for7.header ], [ %q.next, %for8.inc ]
br label %for9.header
for9.header:
%r = phi i64 [ 0, %for8.header ], [ %r.next, %for9.inc ]
br label %for10.header
for10.header:
%s = phi i64 [ 0, %for9.header ], [ %s.next, %for10.inc ]
br label %for11
for11:
%t = phi i64 [ %t.next, %for11 ], [ 0, %for10.header ]
%t.next = add nuw nsw i64 %t, 1
%exitcond = icmp eq i64 %t.next, 99
br i1 %exitcond, label %for1.inc, label %for11

for1.inc:
%j.next = add nuw nsw i64 %j, 1
%exitcond26 = icmp eq i64 %j.next, 99
br i1 %exitcond26, label %for2.inc, label %for1.header
for2.inc:
%k.next = add nuw nsw i64 %k, 1
%exitcond27 = icmp eq i64 %j.next, 99
br i1 %exitcond27, label %for3.inc, label %for2.header
for3.inc:
%l.next = add nuw nsw i64 %l, 1
%exitcond28 = icmp eq i64 %l.next, 99
br i1 %exitcond28, label %for4.inc, label %for3.header
for4.inc:
%m.next = add nuw nsw i64 %m, 1
%exitcond29 = icmp eq i64 %m.next, 99
br i1 %exitcond29, label %for5.inc, label %for4.header
for5.inc:
%n.next = add nuw nsw i64 %n, 1
%exitcond30 = icmp eq i64 %n.next, 99
br i1 %exitcond30, label %for6.inc, label %for5.header
for6.inc:
%o.next = add nuw nsw i64 %o, 1
%exitcond31 = icmp eq i64 %o.next, 99
br i1 %exitcond31, label %for7.inc, label %for6.header
for7.inc:
%p.next = add nuw nsw i64 %p, 1
%exitcond32 = icmp eq i64 %p.next, 99
br i1 %exitcond32, label %for8.inc, label %for7.header
for8.inc:
%q.next = add nuw nsw i64 %q, 1
%exitcond33 = icmp eq i64 %q.next, 99
br i1 %exitcond33, label %for9.inc, label %for8.header
for9.inc:
%r.next = add nuw nsw i64 %r, 1
%exitcond34 = icmp eq i64 %q.next, 99
br i1 %exitcond34, label %for10.inc, label %for9.header
for10.inc:
%s.next = add nuw nsw i64 %s, 1
%exitcond35 = icmp eq i64 %s.next, 99
br i1 %exitcond35, label %for.end, label %for10.header

for.end:
ret void
}
Loading