diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 8f4c0c88336ac..9768ae8d0904c 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -307,6 +307,7 @@ TransformationMode hasUnrollAndJamTransformation(const Loop *L); TransformationMode hasVectorizeTransformation(const Loop *L); TransformationMode hasDistributeTransformation(const Loop *L); TransformationMode hasLICMVersioningTransformation(const Loop *L); +TransformationMode hasInterchangeTransformation(const Loop *L); /// @} /// Set input string into loop metadata by keeping other values intact. diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 967be109a7ba6..6957e5bf899e1 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -51,6 +51,18 @@ using namespace llvm; #define DEBUG_TYPE "loop-interchange" +/// @{ +/// Metadata attribute names +static const char *const LLVMLoopInterchangeFollowupAll = + "llvm.loop.interchange.followup_all"; +static const char *const LLVMLoopInterchangeFollowupNextOuter = + "llvm.loop.interchange.followup_next_outer"; +static const char *const LLVMLoopInterchangeFollowupOuter = + "llvm.loop.interchange.followup_outer"; +static const char *const LLVMLoopInterchangeFollowupInner = + "llvm.loop.interchange.followup_inner"; +/// @} + STATISTIC(LoopsInterchanged, "Number of loops interchanged"); static cl::opt LoopInterchangeCostThreshold( @@ -65,6 +77,14 @@ static cl::opt MaxMemInstrCount( "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time")); +// Whether to apply by default. +// TODO: Once this pass is enabled by default, remove this option and use the +// value of PipelineTuningOptions. +static cl::opt OnlyWhenForced( + "loop-interchange-only-when-forced", cl::init(false), cl::ReallyHidden, + cl::desc( + "Apply interchanges only when explicitly specified metadata exists")); + namespace { using LoopVector = SmallVector; @@ -297,6 +317,16 @@ static bool isComputableLoopNest(ScalarEvolution *SE, return true; } +static std::optional findMetadata(Loop *L) { + auto Value = findStringMetadataForLoop(L, "llvm.loop.interchange.enable"); + if (!Value) + return std::nullopt; + + const MDOperand *Op = *Value; + assert(Op && mdconst::hasa(*Op) && "invalid metadata"); + return mdconst::extract(*Op)->getZExtValue(); +} + namespace { /// LoopInterchangeLegality checks if it is legal to interchange the loop. @@ -504,6 +534,12 @@ struct LoopInterchange { CostMap[LoopCosts[i].first] = i; } } + + // If OnlyWhenForced is true, only process loops for which interchange is + // explicitly enabled. + if (OnlyWhenForced) + return processEnabledLoop(LoopList, DependencyMatrix, CostMap); + // We try to achieve the globally optimal memory access for the loopnest, // and do interchange based on a bubble-sort fasion. We start from // the innermost loop, move it outwards to the best possible position @@ -530,19 +566,30 @@ struct LoopInterchange { const DenseMap &CostMap) { Loop *OuterLoop = LoopList[OuterLoopId]; Loop *InnerLoop = LoopList[InnerLoopId]; + MDNode *LoopID = OuterLoop->getLoopID(); LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); + std::optional OuterLoopEnabled = findMetadata(OuterLoop); + std::optional InnerLoopEnabled = findMetadata(InnerLoop); + if (OuterLoopEnabled == false || InnerLoopEnabled == false) { + LLVM_DEBUG(dbgs() << "Not interchanging loops. It is disabled.\n"); + return false; + } LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n"); return false; } LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); - LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); - if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, - DependencyMatrix, CostMap, CC)) { - LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); - return false; + + // If the interchange is explicitly enabled, skip the profitability check. + if (OuterLoopEnabled != true) { + LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); + if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, + DependencyMatrix, CostMap, CC)) { + LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); + return false; + } } ORE->emit([&]() { @@ -567,8 +614,161 @@ struct LoopInterchange { LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n"; printDepMatrix(DependencyMatrix)); + // The next outer loop, or nullptr if TargetLoop is the outermost one. + Loop *NextOuterLoop = nullptr; + if (0 < OuterLoopId) + NextOuterLoop = LoopList[OuterLoopId - 1]; + + // Update the metadata. + std::optional MDNextOuterLoopID = + makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll, + LLVMLoopInterchangeFollowupNextOuter}); + std::optional MDOuterLoopID = + makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll, + LLVMLoopInterchangeFollowupOuter}); + std::optional MDInnerLoopID = + makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll, + LLVMLoopInterchangeFollowupInner}); + if (MDNextOuterLoopID) { + if (NextOuterLoop) { + NextOuterLoop->setLoopID(*MDNextOuterLoopID); + } else { + LLVM_DEBUG( + dbgs() << "New metadata for the next outer loop is ignored.\n"); + } + } + if (MDOuterLoopID) + OuterLoop->setLoopID(*MDOuterLoopID); + if (MDInnerLoopID) + InnerLoop->setLoopID(*MDInnerLoopID); + return true; } + + bool processEnabledLoop(SmallVectorImpl &LoopList, + std::vector> &DependencyMatrix, + const DenseMap &CostMap) { + bool Changed = false; + + // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L. + DenseMap Loop2Index; + for (unsigned I = 0; I != LoopList.size(); I++) + Loop2Index[LoopList[I]] = I; + + // Hold outer loops to be exchanged (i.e., loops that have + // "llvm.loop.interchange.enable" is true), in the current nest order. + SmallVector Worklist; + + // Helper funciton to try to add a new loop into the Worklist. Return false + // if there is a duplicate in the loop to be interchanged. + auto AddLoopIfEnabled = [&](Loop *L) { + if (findMetadata(L) == true) { + if (!Worklist.empty()) { + // Because the loops are sorted in the order of the current nest, it + // is sufficient to compare with the last element. + unsigned InnerLoopId = Loop2Index[Worklist.back()] + 1; + unsigned OuterLoopId = Loop2Index[L]; + if (OuterLoopId <= InnerLoopId) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "AmbiguousOrder", + L->getStartLoc(), L->getHeader()) + << "The loops to be interchanged are overlapping."; + }); + return false; + } + } + Worklist.push_back(L); + } + return true; + }; + + // Initialize Worklist. To process the loops in inner-loop-first order, add + // them to the worklist in the outer-loop-first order. + for (unsigned I = 0; I != LoopList.size(); I++) + if (!AddLoopIfEnabled(LoopList[I])) + return Changed; + + // The number of attempts of exchanges. Used for debug build. + [[maybe_unused]] unsigned Attempts = 0; + + // Process the loops. An exchange is applied to two loops, but a metadata + // replacement can be applied to three loops: the two loops plus the next + // outer loop, if it exists. This is because it's necessary to express the + // information about the order of the application of interchanges in cases + // where the target loops to be exchanged are overlapping, e.g., + // + // #pragma clang loop interchange(enable) + // for(int i=0;i= LoopList.size()) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "InnermostLoop", + TargetLoop->getStartLoc(), + TargetLoop->getHeader()) + << "The metadata is invalid with an innermost loop."; + }); + break; + } + bool Interchanged = processLoop(LoopList, InnerLoopId, OuterLoopId, + DependencyMatrix, CostMap); + if (!Interchanged) { + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInterchanged", + TargetLoop->getStartLoc(), + TargetLoop->getHeader()) + << "Failed to perform explicitly specified loop interchange."; + }); + break; + } + + // The next outer loop, or nullptr if TargetLoop is the outermost one. + Loop *NextOuterLoop = nullptr; + if (0 < OuterLoopId) + NextOuterLoop = LoopList[OuterLoopId - 1]; + Loop *OuterLoop = LoopList[OuterLoopId]; + Loop *InnerLoop = LoopList[InnerLoopId]; + Changed = true; + Loop2Index[OuterLoop] = OuterLoopId; + Loop2Index[InnerLoop] = InnerLoopId; + + // Add new elements, paying attention to the order. + bool Valid = true; + if (NextOuterLoop) + Valid &= AddLoopIfEnabled(NextOuterLoop); + Valid &= AddLoopIfEnabled(OuterLoop); + Valid &= AddLoopIfEnabled(InnerLoop); + if (!Valid) + break; + + // Check that the number of attempts of interchanges hasn't exceeded the + // upper limit. It would lead an infinite loops. + LLVM_DEBUG({ + // There is no deep meaning behind the current value (square of the size + // of LoopList). + unsigned MaxAttemptsCount = LoopList.size() * LoopList.size(); + Attempts++; + assert(Attempts <= MaxAttemptsCount && + "The number of attempts of interchanges exceeded the limit. An " + "infinite loop may have occured because the metadata was not " + "properly deleted after each exchange."); + }); + } + + return Changed; + } }; } // end anonymous namespace diff --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp index e53019768e881..a6854bb6b0f3a 100644 --- a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp +++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp @@ -80,6 +80,17 @@ static void warnAboutLeftoverTransformations(Loop *L, "requested transformation; the transformation might be disabled or " "specified as part of an unsupported transformation ordering"); } + + if (hasInterchangeTransformation(L) == TM_ForcedByUser) { + LLVM_DEBUG(dbgs() << "Leftover interchange transformation\n"); + ORE->emit( + DiagnosticInfoOptimizationFailure(DEBUG_TYPE, + "FailedRequestedInterchange", + L->getStartLoc(), L->getHeader()) + << "loop not interchanged: the optimizer was unable to perform the " + "requested transformation; the transformation might be disabled or " + "specified as part of an unsupported transformation ordering"); + } } static void warnAboutLeftoverTransformations(Function *F, LoopInfo *LI, diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 42c70d2c163b5..297ccd39506d0 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -444,6 +444,16 @@ TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) { return TM_Unspecified; } +TransformationMode llvm::hasInterchangeTransformation(const Loop *L) { + if (getBooleanLoopAttribute(L, "llvm.loop.interchange.enable")) + return TM_ForcedByUser; + + if (hasDisableAllTransformsHint(L)) + return TM_Disable; + + return TM_Unspecified; +} + /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of basic blocks includes the starting point. SmallVector llvm::collectChildrenInLoop(DominatorTree *DT, diff --git a/llvm/test/Transforms/LoopInterchange/metadata-disable.ll b/llvm/test/Transforms/LoopInterchange/metadata-disable.ll new file mode 100644 index 0000000000000..af7af8892cb35 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/metadata-disable.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=0 --cache-line-size=64 -S < %s | FileCheck %s + +; Check that the interchange is not applied to the loop that is disabled by +; metadata. The original code is as below: +; +; for (int i=0; i<128; i++) +; for (int j=0; j<128; j++) +; #pragma clang loop interchange(disable) +; for (int k=0; k<128; k++) +; for (int l=0; l<128; l++) +; a[l][k][j][i]++; +; +; Since interchanges are not be applied to the k-loop, the pair (i, j) is the +; only candidate for exchange. + +@a = dso_local local_unnamed_addr global [128 x [128 x [128 x [128 x i32]]]] zeroinitializer, align 4 + +define void @f() { +; CHECK-LABEL: define void @f() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[FOR_J_HEADER_PREHEADER:.*]] +; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]: +; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]] +; CHECK: [[FOR_I_HEADER]]: +; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[IV_I_NEXT:%.*]], %[[FOR_I_CLEANUP:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_K_HEADER:.*]] +; CHECK: [[FOR_J_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_J_HEADER:.*]] +; CHECK: [[FOR_J_HEADER]]: +; CHECK-NEXT: [[IV_J:%.*]] = phi i64 [ [[IV_J_NEXT:%.*]], %[[FOR_J_CLEANUP:.*]] ], [ 0, %[[FOR_J_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]] +; CHECK: [[FOR_K_HEADER]]: +; CHECK-NEXT: [[IV_K:%.*]] = phi i64 [ 0, %[[FOR_I_HEADER]] ], [ [[IV_K_NEXT:%.*]], %[[FOR_K_CLEANUP:.*]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV_L:%.*]] = phi i64 [ 0, %[[FOR_K_HEADER]] ], [ [[TMP0:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds nuw [128 x [128 x [128 x [128 x i32]]]], ptr @a, i64 [[IV_L]], i64 [[IV_K]], i64 [[IV_J]], i64 [[IV_I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[VAL]], 1 +; CHECK-NEXT: store i32 [[INC]], ptr [[PTR]], align 4 +; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[IV_L]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 128 +; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_K_CLEANUP]], label %[[FOR_BODY]] +; CHECK: [[FOR_K_CLEANUP]]: +; CHECK-NEXT: [[IV_K_NEXT]] = add nuw nsw i64 [[IV_K]], 1 +; CHECK-NEXT: [[EXITCOND_K:%.*]] = icmp eq i64 [[IV_K_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_K]], label %[[FOR_I_CLEANUP]], label %[[FOR_K_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[FOR_J_CLEANUP]]: +; CHECK-NEXT: [[IV_J_NEXT]] = add nuw nsw i64 [[IV_J]], 1 +; CHECK-NEXT: [[EXITCOND_J:%.*]] = icmp eq i64 [[IV_J_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_J]], label %[[EXIT:.*]], label %[[FOR_J_HEADER]] +; CHECK: [[FOR_I_CLEANUP]]: +; CHECK-NEXT: [[IV_I_NEXT]] = add nuw nsw i64 [[IV_I]], 1 +; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[IV_I_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_I]], label %[[FOR_J_CLEANUP]], label %[[FOR_I_HEADER]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.i.header + +for.i.header: + %iv.i = phi i64 [ 0, %entry ], [ %iv.i.next, %for.i.cleanup ] + br label %for.j.header + +for.j.header: + %iv.j = phi i64 [ 0, %for.i.header ], [ %iv.j.next, %for.j.cleanup ] + br label %for.k.header + +for.k.header: + %iv.k = phi i64 [ 0, %for.j.header ], [ %iv.k.next, %for.k.cleanup ] + br label %for.body + +for.body: + %iv.l = phi i64 [ 0, %for.k.header ], [ %iv.l.next, %for.body ] + %ptr = getelementptr inbounds nuw [128 x [128 x [128 x [128 x i32]]]], ptr @a, i64 %iv.l, i64 %iv.k, i64 %iv.j, i64 %iv.i + %val = load i32, ptr %ptr, align 4 + %inc = add nuw nsw i32 %val, 1 + store i32 %inc, ptr %ptr, align 4 + %iv.l.next = add nuw nsw i64 %iv.l, 1 + %exitcond.l = icmp eq i64 %iv.l.next, 128 + br i1 %exitcond.l, label %for.k.cleanup, label %for.body + +for.k.cleanup: + %iv.k.next = add nuw nsw i64 %iv.k, 1 + %exitcond.k = icmp eq i64 %iv.k.next, 128 + br i1 %exitcond.k, label %for.j.cleanup, label %for.k.header, !llvm.loop !0 + +for.j.cleanup: + %iv.j.next = add nuw nsw i64 %iv.j, 1 + %exitcond.j = icmp eq i64 %iv.j.next, 128 + br i1 %exitcond.j, label %for.i.cleanup, label %for.j.header + +for.i.cleanup: + %iv.i.next = add nuw nsw i64 %iv.i, 1 + %exitcond.i = icmp eq i64 %iv.i.next, 128 + br i1 %exitcond.i, label %exit, label %for.i.header + +exit: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.interchange.enable", i1 false} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.interchange.enable", i1 false} +;. diff --git a/llvm/test/Transforms/LoopInterchange/metadata-interruption.ll b/llvm/test/Transforms/LoopInterchange/metadata-interruption.ll new file mode 100644 index 0000000000000..5e1d35b46ff4f --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/metadata-interruption.ll @@ -0,0 +1,94 @@ +; RUN: opt < %s -passes=loop-interchange -loop-interchange-only-when-forced=1 -pass-remarks-output=%t -disable-output +; RUN: FileCheck -input-file %t %s + +; Test that the loop-interchange stops processing for some reason even though +; some loops have metadata specyfing interchange enable. + +@a = dso_local local_unnamed_addr global [128 x [128 x [128 x i32]]] zeroinitializer, align 4 + +; CHECK: --- !Missed +; CHECK-NEXT: Pass: loop-interchange +; CHECK-NEXT: Name: Innermost +; CHECK-NEXT: Function: enable_innermost +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: The metadata is invalid with an innermost loop. +define void @enable_innermost() { +entry: + br label %for.i.header + +for.i.header: + %iv.i = phi i64 [ 0, %entry ], [ %iv.i.next, %for.i.cleanup ] + br label %for.j.header + +for.j.header: + %iv.j = phi i64 [ 0, %for.i.header ], [ %iv.j.next, %for.j.cleanup ] + br label %for.body + +for.body: + %iv.k = phi i64 [ 0, %for.j.header ], [ %iv.k.next, %for.body ] + %ptr = getelementptr inbounds nuw [128 x [128 x [128 x i32]]], ptr @a, i64 %iv.k, i64 %iv.j, i64 %iv.i + %val = load i32, ptr %ptr, align 4 + %inc = add nuw nsw i32 %val, 1 + store i32 %inc, ptr %ptr, align 4 + %iv.k.next = add nuw nsw i64 %iv.k, 1 + %exitcond.k = icmp eq i64 %iv.k.next, 128 + br i1 %exitcond.k, label %for.j.cleanup, label %for.body, !llvm.loop !0 + +for.j.cleanup: + %iv.j.next = add nuw nsw i64 %iv.j, 1 + %exitcond.j = icmp eq i64 %iv.j.next, 128 + br i1 %exitcond.j, label %for.i.cleanup, label %for.j.header + +for.i.cleanup: + %iv.i.next = add nuw nsw i64 %iv.i, 1 + %exitcond.i = icmp eq i64 %iv.i.next, 128 + br i1 %exitcond.i, label %exit, label %for.i.header + +exit: + ret void +} + +; CHECK: --- !Missed +; CHECK-NEXT: Pass: loop-interchange +; CHECK-NEXT: Name: AmbiguousOrder +; CHECK-NEXT: Function: ambiguous_order +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: The loops to be interchanged are overlapping. +define void @ambiguous_order() { +entry: + br label %for.i.header + +for.i.header: + %iv.i = phi i64 [ 0, %entry ], [ %iv.i.next, %for.i.cleanup ] + br label %for.j.header + +for.j.header: + %iv.j = phi i64 [ 0, %for.i.header ], [ %iv.j.next, %for.j.cleanup ] + br label %for.body + +for.body: + %iv.k = phi i64 [ 0, %for.j.header ], [ %iv.k.next, %for.body ] + %ptr = getelementptr inbounds nuw [128 x [128 x [128 x i32]]], ptr @a, i64 %iv.k, i64 %iv.j, i64 %iv.i + %val = load i32, ptr %ptr, align 4 + %inc = add nuw nsw i32 %val, 1 + store i32 %inc, ptr %ptr, align 4 + %iv.k.next = add nuw nsw i64 %iv.k, 1 + %exitcond.k = icmp eq i64 %iv.k.next, 128 + br i1 %exitcond.k, label %for.j.cleanup, label %for.body + +for.j.cleanup: + %iv.j.next = add nuw nsw i64 %iv.j, 1 + %exitcond.j = icmp eq i64 %iv.j.next, 128 + br i1 %exitcond.j, label %for.i.cleanup, label %for.j.header, !llvm.loop !0 + +for.i.cleanup: + %iv.i.next = add nuw nsw i64 %iv.i, 1 + %exitcond.i = icmp eq i64 %iv.i.next, 128 + br i1 %exitcond.i, label %exit, label %for.i.header, !llvm.loop !0 + +exit: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.interchange.enable", i1 true} diff --git a/llvm/test/Transforms/LoopInterchange/metadata.ll b/llvm/test/Transforms/LoopInterchange/metadata.ll new file mode 100644 index 0000000000000..83ee9fdc0aa67 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/metadata.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=1 --cache-line-size=64 -S < %s | FileCheck %s + +@a = dso_local local_unnamed_addr global [128 x [128 x [128 x i32]]] zeroinitializer, align 4 + +; Check that the interchanges are applied in the expected order. The original +; code looks like as follows: +; +; #pragma clang loop interchange(enable) +; for (int j=0; j<128; j++) +; #pragma clang loop interchange(enable) +; for (int i=0; i<128; i++) +; for (int k=0; k<128; k++) +; a[k][j][i]++; +; +; At first the interchange is applied to the i-loop and the k-loop. The +; follow-up metadata is attached to the outermost loop, then the interchange to +; the j-loop and the k-loop. +; +define void @f() { +; CHECK-LABEL: define void @f() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_J_HEADER_PREHEADER:.*]]: +; CHECK-NEXT: br label %[[FOR_J_HEADER:.*]] +; CHECK: [[FOR_J_HEADER]]: +; CHECK-NEXT: [[IV_J:%.*]] = phi i64 [ [[IV_I_NEXT1:%.*]], %[[FOR_J_CLEANUP:.*]] ], [ 0, %[[FOR_J_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER:.*]] +; CHECK: [[FOR_I_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]] +; CHECK: [[FOR_I_HEADER]]: +; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[IV_I_NEXT:%.*]], %[[FOR_I_CLEANUP:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_BODY_SPLIT1:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV_K:%.*]] = phi i64 [ [[IV_J_NEXT:%.*]], %[[FOR_BODY_SPLIT:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_J_HEADER_PREHEADER]] +; CHECK: [[FOR_BODY_SPLIT1]]: +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds nuw [128 x [128 x [128 x i32]]], ptr @a, i64 [[IV_K]], i64 [[IV_J]], i64 [[IV_I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[VAL]], 1 +; CHECK-NEXT: store i32 [[INC]], ptr [[PTR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[IV_K]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 128 +; CHECK-NEXT: br label %[[FOR_I_CLEANUP]] +; CHECK: [[FOR_BODY_SPLIT]]: +; CHECK-NEXT: [[IV_J_NEXT]] = add nuw nsw i64 [[IV_K]], 1 +; CHECK-NEXT: [[EXITCOND_J:%.*]] = icmp eq i64 [[IV_J_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_J]], label %[[EXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[FOR_I_CLEANUP]]: +; CHECK-NEXT: [[IV_I_NEXT]] = add nuw nsw i64 [[IV_I]], 1 +; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[IV_I_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_I]], label %[[FOR_J_CLEANUP]], label %[[FOR_I_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[FOR_J_CLEANUP]]: +; CHECK-NEXT: [[IV_I_NEXT1]] = add nuw nsw i64 [[IV_J]], 1 +; CHECK-NEXT: [[EXITCOND_I1:%.*]] = icmp eq i64 [[IV_I_NEXT1]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_I1]], label %[[FOR_BODY_SPLIT]], label %[[FOR_J_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.j.header + +for.j.header: + %iv.j = phi i64 [ 0, %entry ], [ %iv.j.next, %for.j.cleanup ] + br label %for.i.header + +for.i.header: + %iv.i = phi i64 [ 0, %for.j.header ], [ %iv.i.next, %for.i.cleanup ] + br label %for.body + +for.body: + %iv.k = phi i64 [ 0, %for.i.header ], [ %iv.k.next, %for.body ] + %ptr = getelementptr inbounds nuw [128 x [128 x [128 x i32]]], ptr @a, i64 %iv.k, i64 %iv.j, i64 %iv.i + %val = load i32, ptr %ptr, align 4 + %inc = add nuw nsw i32 %val, 1 + store i32 %inc, ptr %ptr, align 4 + %iv.k.next = add nuw nsw i64 %iv.k, 1 + %exitcond.k = icmp eq i64 %iv.k.next, 128 + br i1 %exitcond.k, label %for.i.cleanup, label %for.body + +for.i.cleanup: + %iv.i.next = add nuw nsw i64 %iv.i, 1 + %exitcond.i = icmp eq i64 %iv.i.next, 128 + br i1 %exitcond.i, label %for.j.cleanup, label %for.i.header, !llvm.loop !0 + +for.j.cleanup: + %iv.j.next = add nuw nsw i64 %iv.j, 1 + %exitcond.j = icmp eq i64 %iv.j.next, 128 + br i1 %exitcond.j, label %exit, label %for.j.header + +exit: + ret void +} + +!0 = distinct !{!0, !1, !2, !3, !4, !5} +!1 = !{!"llvm.loop.interchange.enable", i1 true} +!2 = !{!"llvm.loop.interchange.followup_all", !{!"FolloupAll"}} +!3 = !{!"llvm.loop.interchange.followup_inner", !{!"FollowupInner0"}} +!4 = !{!"llvm.loop.interchange.followup_outer", !{!"FollowupOuter0"}} +!5 = !{!"llvm.loop.interchange.followup_next_outer", !1, !2, !6, !7} +!6 = !{!"llvm.loop.interchange.followup_inner", !{!"FollowupInner1"}} +!7 = !{!"llvm.loop.interchange.followup_outer", !{!"FollowupOuter1"}} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"FolloupAll"} +; CHECK: [[META2]] = !{!"FollowupInner1"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META4:![0-9]+]]} +; CHECK: [[META4]] = !{!"FollowupOuter0"} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META6:![0-9]+]]} +; CHECK: [[META6]] = !{!"FollowupOuter1"} +;. diff --git a/llvm/test/Transforms/LoopTransformWarning/interchange-remarks-missed.ll b/llvm/test/Transforms/LoopTransformWarning/interchange-remarks-missed.ll new file mode 100644 index 0000000000000..e0a8dace4b649 --- /dev/null +++ b/llvm/test/Transforms/LoopTransformWarning/interchange-remarks-missed.ll @@ -0,0 +1,103 @@ +; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s +; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml +; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s + +; C/C++ code for tests +; +; float a[200][200]; +; void f() { +; #pragma clang loop interchange(enable) +; for (int i = 0; i < 10; i++) { +; for (int j = 0; j < 10; j++) { +; a[j*j][i+5] += a[j+5][i*i]; +; } +; } +; } + +; CHECK: warning: source.c:6:3: loop not interchanged: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering + +; YAML: --- !Failure +; YAML-NEXT: Pass: transform-warning +; YAML-NEXT: Name: FailedRequestedInterchange +; YAML-NEXT: DebugLoc: { File: source.c, Line: 6, Column: 3 } +; YAML-NEXT: Function: test_interchange_enable +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not interchanged: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering' +; YAML-NEXT: ... + +@a = dso_local local_unnamed_addr global [200 x [200 x float]] zeroinitializer, align 4 + +define dso_local void @test_interchange_enable() !dbg !18 { +entry: + br label %for.cond1.preheader, !dbg !30 + +for.cond1.preheader: ; preds = %entry, %for.cond.cleanup3 + %indvars.iv27 = phi i64 [ 0, %entry ], [ %indvars.iv.next28, %for.cond.cleanup3 ] + %0 = mul nuw nsw i64 %indvars.iv27, %indvars.iv27 + br label %for.body4, !dbg !32 + +for.cond.cleanup: ; preds = %for.cond.cleanup3 + ret void, !dbg !33 + +for.cond.cleanup3: ; preds = %for.body4 + %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1, !dbg !34 + %exitcond31.not = icmp eq i64 %indvars.iv.next28, 10, !dbg !35 + br i1 %exitcond31.not, label %for.cond.cleanup, label %for.cond1.preheader, !dbg !30, !llvm.loop !36 + +for.body4: ; preds = %for.cond1.preheader, %for.body4 + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ] + %arrayidx6 = getelementptr inbounds nuw [200 x [200 x float]], ptr @a, i64 0, i64 %indvars.iv, i64 %0, !dbg !41 + %1 = load float, ptr %arrayidx6, align 4, !dbg !41, !tbaa !44 + %2 = mul nuw nsw i64 %indvars.iv, %indvars.iv, !dbg !48 + %arrayidx11 = getelementptr inbounds nuw [200 x [200 x float]], ptr @a, i64 0, i64 %2, i64 %indvars.iv27, !dbg !49 + %3 = load float, ptr %arrayidx11, align 4, !dbg !50, !tbaa !44 + %add = fadd float %1, %3, !dbg !50 + store float %add, ptr %arrayidx11, align 4, !dbg !50, !tbaa !44 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !51 + %exitcond.not = icmp eq i64 %indvars.iv.next, 10, !dbg !52 + br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4, !dbg !32, !llvm.loop !53 +} + +!llvm.module.flags = !{!9, !10} + +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3) +!3 = !DIFile(filename: "source.c", directory: ".") +!9 = !{i32 7, !"Dwarf Version", i32 5} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!18 = distinct !DISubprogram(name: "test_interchange_enable", scope: !3, file: !3, line: 4, type: !19, scopeLine: 4, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{null} +!21 = !{!22, !25} +!22 = !DILocalVariable(name: "i", scope: !23, file: !3, line: 6, type: !24) +!23 = distinct !DILexicalBlock(scope: !18, file: !3, line: 6, column: 3) +!24 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!25 = !DILocalVariable(name: "j", scope: !26, file: !3, line: 7, type: !24) +!26 = distinct !DILexicalBlock(scope: !27, file: !3, line: 7, column: 5) +!27 = distinct !DILexicalBlock(scope: !28, file: !3, line: 6, column: 32) +!28 = distinct !DILexicalBlock(scope: !23, file: !3, line: 6, column: 3) +!29 = !DILocation(line: 0, scope: !23) +!30 = !DILocation(line: 6, column: 3, scope: !23) +!31 = !DILocation(line: 0, scope: !26) +!32 = !DILocation(line: 7, column: 5, scope: !26) +!33 = !DILocation(line: 11, column: 1, scope: !18) +!34 = !DILocation(line: 6, column: 28, scope: !28) +!35 = !DILocation(line: 6, column: 21, scope: !28) +!36 = distinct !{!36, !30, !37, !38, !39, !40} +!37 = !DILocation(line: 10, column: 3, scope: !23) +!38 = !{!"llvm.loop.mustprogress"} +!39 = !{!"llvm.loop.unroll.disable"} +!40 = !{!"llvm.loop.interchange.enable", i1 true} +!41 = !DILocation(line: 8, column: 20, scope: !42) +!42 = distinct !DILexicalBlock(scope: !43, file: !3, line: 7, column: 34) +!43 = distinct !DILexicalBlock(scope: !26, file: !3, line: 7, column: 5) +!44 = !{!45, !45, i64 0} +!45 = !{!"float", !46, i64 0} +!46 = !{!"omnipotent char", !47, i64 0} +!47 = !{!"Simple C/C++ TBAA"} +!48 = !DILocation(line: 8, column: 10, scope: !42) +!49 = !DILocation(line: 8, column: 7, scope: !42) +!50 = !DILocation(line: 8, column: 17, scope: !42) +!51 = !DILocation(line: 7, column: 30, scope: !43) +!52 = !DILocation(line: 7, column: 23, scope: !43) +!53 = distinct !{!53, !32, !54, !38, !39} +!54 = !DILocation(line: 9, column: 5, scope: !26)