diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 5bcc5e41a0e87..d366e749c7370 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -74,8 +74,15 @@ using CharMatrix = std::vector>; } // end anonymous namespace +// Minimum loop depth supported. +static cl::opt MinLoopNestDepth( + "loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, + cl::desc("Minimum depth of loop nest considered for the transform")); + // Maximum loop depth supported. -static const unsigned MaxLoopNestDepth = 10; +static cl::opt MaxLoopNestDepth( + "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, + cl::desc("Maximum depth of loop nest considered for the transform")); #ifndef NDEBUG static void printDepMatrix(CharMatrix &DepMatrix) { @@ -244,10 +251,22 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) { LoopList.push_back(CurrentLoop); } -static bool hasMinimumLoopDepth(SmallVectorImpl &LoopList) { +static bool hasSupportedLoopDepth(SmallVectorImpl &LoopList, + OptimizationRemarkEmitter &ORE) { unsigned LoopNestDepth = LoopList.size(); - if (LoopNestDepth < 2) { - LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); + if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) { + LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth + << ", the supported range is [" << MinLoopNestDepth + << ", " << MaxLoopNestDepth << "].\n"); + Loop **OuterLoop = LoopList.begin(); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth", + (*OuterLoop)->getStartLoc(), + (*OuterLoop)->getHeader()) + << "Unsupported depth of loop nest, the supported range is [" + << std::to_string(MinLoopNestDepth) << ", " + << std::to_string(MaxLoopNestDepth) << "].\n"; + }); return false; } return true; @@ -435,15 +454,11 @@ struct LoopInterchange { bool processLoopList(SmallVectorImpl &LoopList) { bool Changed = false; - // Ensure minimum loop nest depth. - assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth."); + // Ensure proper loop nest depth. + assert(hasSupportedLoopDepth(LoopList, *ORE) && + "Unsupported depth of loop nest."); unsigned LoopNestDepth = LoopList.size(); - if (LoopNestDepth > MaxLoopNestDepth) { - LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than " - << MaxLoopNestDepth << "\n"); - return false; - } if (!isComputableLoopNest(LoopList)) { LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); return false; @@ -1735,14 +1750,15 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1"); return PreservedAnalyses::all(); } + OptimizationRemarkEmitter ORE(&F); // Ensure minimum depth of the loop nest to do the interchange. - if (!hasMinimumLoopDepth(LoopList)) + if (!hasSupportedLoopDepth(LoopList, ORE)) return PreservedAnalyses::all(); DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); std::unique_ptr CC = CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); - OptimizationRemarkEmitter ORE(&F); + if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) return PreservedAnalyses::all(); U.markLoopNestChanged(true); diff --git a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll index 788e1b0157d80..d1cf33acd2831 100644 --- a/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll +++ b/llvm/test/Transforms/LoopInterchange/bail-out-one-loop.ll @@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6 ; CHECK-NOT: Delinearizing ; CHECK-NOT: Strides: ; CHECK-NOT: Terms: -; CHECK: Loop doesn't contain minimum nesting level. +; CHECK: Unsupported depth of loop nest 1, the supported range is [2, 10]. define void @foo() { entry: diff --git a/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll new file mode 100644 index 0000000000000..3252d3c0d7069 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/deep-loop-nest.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \ +; RUN: -disable-output 2>&1 | FileCheck %s + +; RUN: opt < %s -passes=loop-interchange -pass-remarks-missed='loop-interchange' \ +; RUN: -loop-interchange-max-loop-nest-depth=12 -disable-output 2>&1 | \ +; RUN: FileCheck --allow-empty -check-prefix=CHECK-MAX %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK: Unsupported depth of loop nest, the supported range is [2, 10]. +; CHECK-MAX-NOT: Unsupported depth of loop nest, the supported range is [2, 10]. +define void @big_loop_nest() { +entry: + br label %for1.header + +for1.header: + %j = phi i64 [ 0, %entry ], [ %j.next, %for1.inc ] + br label %for2.header +for2.header: + %k = phi i64 [ 0, %for1.header ], [ %k.next, %for2.inc ] + br label %for3.header +for3.header: + %l = phi i64 [ 0, %for2.header ], [ %l.next, %for3.inc ] + br label %for4.header +for4.header: + %m = phi i64 [ 0, %for3.header ], [ %m.next, %for4.inc ] + br label %for5.header +for5.header: + %n = phi i64 [ 0, %for4.header ], [ %n.next, %for5.inc ] + br label %for6.header +for6.header: + %o = phi i64 [ 0, %for5.header ], [ %o.next, %for6.inc ] + br label %for7.header +for7.header: + %p = phi i64 [ 0, %for6.header ], [ %p.next, %for7.inc ] + br label %for8.header +for8.header: + %q = phi i64 [ 0, %for7.header ], [ %q.next, %for8.inc ] + br label %for9.header +for9.header: + %r = phi i64 [ 0, %for8.header ], [ %r.next, %for9.inc ] + br label %for10.header +for10.header: + %s = phi i64 [ 0, %for9.header ], [ %s.next, %for10.inc ] + br label %for11 +for11: + %t = phi i64 [ %t.next, %for11 ], [ 0, %for10.header ] + %t.next = add nuw nsw i64 %t, 1 + %exitcond = icmp eq i64 %t.next, 99 + br i1 %exitcond, label %for1.inc, label %for11 + +for1.inc: + %j.next = add nuw nsw i64 %j, 1 + %exitcond26 = icmp eq i64 %j.next, 99 + br i1 %exitcond26, label %for2.inc, label %for1.header +for2.inc: + %k.next = add nuw nsw i64 %k, 1 + %exitcond27 = icmp eq i64 %j.next, 99 + br i1 %exitcond27, label %for3.inc, label %for2.header +for3.inc: + %l.next = add nuw nsw i64 %l, 1 + %exitcond28 = icmp eq i64 %l.next, 99 + br i1 %exitcond28, label %for4.inc, label %for3.header +for4.inc: + %m.next = add nuw nsw i64 %m, 1 + %exitcond29 = icmp eq i64 %m.next, 99 + br i1 %exitcond29, label %for5.inc, label %for4.header +for5.inc: + %n.next = add nuw nsw i64 %n, 1 + %exitcond30 = icmp eq i64 %n.next, 99 + br i1 %exitcond30, label %for6.inc, label %for5.header +for6.inc: + %o.next = add nuw nsw i64 %o, 1 + %exitcond31 = icmp eq i64 %o.next, 99 + br i1 %exitcond31, label %for7.inc, label %for6.header +for7.inc: + %p.next = add nuw nsw i64 %p, 1 + %exitcond32 = icmp eq i64 %p.next, 99 + br i1 %exitcond32, label %for8.inc, label %for7.header +for8.inc: + %q.next = add nuw nsw i64 %q, 1 + %exitcond33 = icmp eq i64 %q.next, 99 + br i1 %exitcond33, label %for9.inc, label %for8.header +for9.inc: + %r.next = add nuw nsw i64 %r, 1 + %exitcond34 = icmp eq i64 %q.next, 99 + br i1 %exitcond34, label %for10.inc, label %for9.header +for10.inc: + %s.next = add nuw nsw i64 %s, 1 + %exitcond35 = icmp eq i64 %s.next, 99 + br i1 %exitcond35, label %for.end, label %for10.header + +for.end: + ret void +}