Skip to content

Commit 2418ad8

Browse files
committed
Fix metadata to express the application order explicitly.
1 parent 1b11ebe commit 2418ad8

File tree

4 files changed

+434
-339
lines changed

4 files changed

+434
-339
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 138 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ using namespace llvm;
5555
/// Metadata attribute names
5656
static const char *const LLVMLoopInterchangeFollowupAll =
5757
"llvm.loop.interchange.followup_all";
58+
static const char *const LLVMLoopInterchangeFollowupNextOuter =
59+
"llvm.loop.interchange.followup_next_outer";
5860
static const char *const LLVMLoopInterchangeFollowupOuter =
5961
"llvm.loop.interchange.followup_outer";
6062
static const char *const LLVMLoopInterchangeFollowupInner =
@@ -533,6 +535,8 @@ struct LoopInterchange {
533535
}
534536
}
535537

538+
// If OnlyWhenForced is true, only process loops for which interchange is
539+
// explicitly enabled.
536540
if (OnlyWhenForced)
537541
return processEnabledLoop(LoopList, DependencyMatrix, CostMap);
538542

@@ -564,8 +568,10 @@ struct LoopInterchange {
564568
Loop *InnerLoop = LoopList[InnerLoopId];
565569
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
566570
<< " and OuterLoopId = " << OuterLoopId << "\n");
567-
if (findMetadata(OuterLoop) == false || findMetadata(InnerLoop) == false)
571+
if (findMetadata(OuterLoop) == false || findMetadata(InnerLoop) == false) {
572+
LLVM_DEBUG(dbgs() << "Not interchanging loops. It is disabled.\n");
568573
return false;
574+
}
569575
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
570576
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
571577
LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
@@ -608,41 +614,145 @@ struct LoopInterchange {
608614
std::vector<std::vector<char>> &DependencyMatrix,
609615
const DenseMap<const Loop *, unsigned> &CostMap) {
610616
bool Changed = false;
611-
for (unsigned InnerLoopId = LoopList.size() - 1; InnerLoopId > 0;
612-
InnerLoopId--) {
613-
unsigned OuterLoopId = InnerLoopId - 1;
614-
if (findMetadata(LoopList[OuterLoopId]) != true)
615-
continue;
616617

617-
MDNode *MDOrigLoopID = LoopList[OuterLoopId]->getLoopID();
618-
bool Interchanged =
619-
processLoop(LoopList[InnerLoopId], LoopList[OuterLoopId], InnerLoopId,
620-
OuterLoopId, DependencyMatrix, CostMap);
621-
622-
// TODO: Consolidate the duplicate code in `processLoopList`.
623-
if (Interchanged) {
624-
std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
625-
// Update the DependencyMatrix
626-
interChangeDependencies(DependencyMatrix, InnerLoopId, OuterLoopId);
618+
// Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
619+
DenseMap<Loop *, unsigned> Loop2Index;
620+
for (unsigned I = 0; I != LoopList.size(); I++)
621+
Loop2Index[LoopList[I]] = I;
622+
623+
// Hold outer loops to be exchanged (i.e., loops that have
624+
// "llvm.loop.interchange.enable" is true), in the current nest order.
625+
SmallVector<Loop *, 4> Worklist;
626+
627+
// Helper funciton to try to add a new loop into the Worklist. Return false
628+
// if there is a duplicate in the loop to be interchanged.
629+
auto AddLoopIfEnabled = [&](Loop *L) {
630+
if (findMetadata(L) == true) {
631+
if (!Worklist.empty()) {
632+
// Because the loops are sorted in the order of the current nest, it
633+
// is sufficient to compare with the last element.
634+
unsigned InnerLoopId = Loop2Index[Worklist.back()] + 1;
635+
unsigned OuterLoopId = Loop2Index[L];
636+
if (OuterLoopId <= InnerLoopId) {
637+
ORE->emit([&]() {
638+
return OptimizationRemarkMissed(DEBUG_TYPE, "AmbiguousOrder",
639+
L->getStartLoc(), L->getHeader())
640+
<< "The loops to be interchanged are overlapping.";
641+
});
642+
return false;
643+
}
644+
}
645+
Worklist.push_back(L);
646+
}
647+
return true;
648+
};
627649

628-
LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
629-
printDepMatrix(DependencyMatrix));
650+
// Initialize Worklist. To process the loops in inner-loop-first order, add
651+
// them to the worklist in the outer-loop-first order.
652+
for (unsigned I = 0; I != LoopList.size(); I++)
653+
if (!AddLoopIfEnabled(LoopList[I]))
654+
return Changed;
655+
656+
// Set an upper bound of the number of transformations to avoid infinite
657+
// loop. There is no deep meaning behind the current value (square of the
658+
// size of LoopList).
659+
// TODO: Is this really necessary?
660+
const unsigned MaxAttemptsCount = LoopList.size() * LoopList.size();
661+
unsigned Attempts = 0;
662+
663+
// Process the loops. An exchange is applied to two loops, but a metadata
664+
// replacement can be applied to three loops: the two loops plus the next
665+
// outer loop, if it exists. This is because it's necessary to express the
666+
// information about the order of the application of interchanges in cases
667+
// where the target loops to be exchanged are overlapping, e.g.,
668+
//
669+
// #pragma clang loop interchange(enable)
670+
// for(int i=0;i<N;i++)
671+
// #pragma clang loop interchange(enable)
672+
// for (int j=0;j<N;j++)
673+
// for (int k=0;k<N;k++)
674+
// ...
675+
//
676+
// In this case we will exchange the innermost two loops at first, the
677+
// follow-up metadata including enabling interchange is attached on the
678+
// outermost loop, and it is enqueued as the next candidate to be processed.
679+
while (!Worklist.empty() && Attempts < MaxAttemptsCount) {
680+
Loop *TargetLoop = Worklist.pop_back_val();
681+
assert(findMetadata(TargetLoop) == true &&
682+
"Some metadata was unexpectedlly removed");
683+
unsigned OuterLoopId = Loop2Index[TargetLoop];
684+
unsigned InnerLoopId = OuterLoopId + 1;
685+
if (InnerLoopId >= LoopList.size()) {
686+
ORE->emit([&]() {
687+
return OptimizationRemarkMissed(DEBUG_TYPE, "InnermostLoop",
688+
TargetLoop->getStartLoc(),
689+
TargetLoop->getHeader())
690+
<< "The metadata is invalid with an innermost loop.";
691+
});
692+
break;
693+
}
694+
MDNode *LoopID = TargetLoop->getLoopID();
695+
bool Interchanged = processLoop(LoopList, InnerLoopId, OuterLoopId,
696+
DependencyMatrix, CostMap);
697+
if (!Interchanged) {
698+
ORE->emit([&]() {
699+
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInterchanged",
700+
TargetLoop->getStartLoc(),
701+
TargetLoop->getHeader())
702+
<< "Failed to perform explicitly specified loop interchange.";
703+
});
704+
break;
630705
}
631706

632-
std::optional<MDNode *> MDOuterLoopID =
633-
makeFollowupLoopID(MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
634-
LLVMLoopInterchangeFollowupOuter});
635-
if (MDOuterLoopID)
636-
LoopList[OuterLoopId]->setLoopID(*MDOuterLoopID);
707+
// The next outer loop, or nullptr if TargetLoop is the outermost one.
708+
Loop *NextOuterLoop = nullptr;
709+
if (0 < OuterLoopId)
710+
NextOuterLoop = LoopList[OuterLoopId - 1];
711+
Loop *OuterLoop = LoopList[OuterLoopId];
712+
Loop *InnerLoop = LoopList[InnerLoopId];
713+
Attempts++;
714+
Changed = true;
715+
Loop2Index[OuterLoop] = OuterLoopId;
716+
Loop2Index[InnerLoop] = InnerLoopId;
637717

718+
// Update the metadata.
719+
std::optional<MDNode *> MDNextOuterLoopID =
720+
makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
721+
LLVMLoopInterchangeFollowupNextOuter});
722+
std::optional<MDNode *> MDOuterLoopID =
723+
makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
724+
LLVMLoopInterchangeFollowupOuter});
638725
std::optional<MDNode *> MDInnerLoopID =
639-
makeFollowupLoopID(MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
640-
LLVMLoopInterchangeFollowupInner});
726+
makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
727+
LLVMLoopInterchangeFollowupInner});
728+
if (MDNextOuterLoopID) {
729+
if (NextOuterLoop) {
730+
NextOuterLoop->setLoopID(*MDNextOuterLoopID);
731+
} else {
732+
LLVM_DEBUG(dbgs()
733+
<< "New metadata for the next outer loop is ignored.\n");
734+
}
735+
}
736+
if (MDOuterLoopID)
737+
OuterLoop->setLoopID(*MDOuterLoopID);
641738
if (MDInnerLoopID)
642-
LoopList[InnerLoopId]->setLoopID(*MDInnerLoopID);
643-
644-
Changed |= Interchanged;
739+
InnerLoop->setLoopID(*MDInnerLoopID);
740+
741+
// Add new elements, paying attention to the order.
742+
bool Valid = true;
743+
if (NextOuterLoop)
744+
Valid &= AddLoopIfEnabled(NextOuterLoop);
745+
Valid &= AddLoopIfEnabled(OuterLoop);
746+
Valid &= AddLoopIfEnabled(InnerLoop);
747+
if (!Valid)
748+
break;
645749
}
750+
751+
LLVM_DEBUG({
752+
if (!Worklist.empty())
753+
dbgs() << "Some metadata was ignored because the maximum number of "
754+
"attempts was reached.\n";
755+
});
646756
return Changed;
647757
}
648758
};
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=loop-interchange -loop-interchange-only-when-forced=0 --cache-line-size=64 -S < %s | FileCheck %s
3+
4+
; Check that the interchange is not applied to the loop that is disabled by
5+
; metadata. The original code is as below:
6+
;
7+
; for (int i=0; i<128; i++)
8+
; for (int j=0; j<128; j++)
9+
; #pragma clang loop interchange(disable)
10+
; for (int k=0; k<128; k++)
11+
; for (int l=0; l<128; l++)
12+
; a[l][k][j][i]++;
13+
;
14+
; Since interchanges are not be applied to the k-loop, the pair (i, j) is the
15+
; only candidate for exchange.
16+
17+
@a = dso_local local_unnamed_addr global [128 x [128 x [128 x [128 x i32]]]] zeroinitializer, align 4
18+
19+
define void @f() {
20+
; CHECK-LABEL: define void @f() {
21+
; CHECK-NEXT: [[ENTRY:.*:]]
22+
; CHECK-NEXT: br label %[[FOR_J_HEADER_PREHEADER:.*]]
23+
; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]:
24+
; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
25+
; CHECK: [[FOR_I_HEADER]]:
26+
; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[IV_I_NEXT:%.*]], %[[FOR_I_CLEANUP:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ]
27+
; CHECK-NEXT: br label %[[FOR_K_HEADER:.*]]
28+
; CHECK: [[FOR_J_HEADER_PREHEADER]]:
29+
; CHECK-NEXT: br label %[[FOR_J_HEADER:.*]]
30+
; CHECK: [[FOR_J_HEADER]]:
31+
; CHECK-NEXT: [[IV_J:%.*]] = phi i64 [ [[IV_J_NEXT:%.*]], %[[FOR_J_CLEANUP:.*]] ], [ 0, %[[FOR_J_HEADER_PREHEADER]] ]
32+
; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]]
33+
; CHECK: [[FOR_K_HEADER]]:
34+
; CHECK-NEXT: [[IV_K:%.*]] = phi i64 [ 0, %[[FOR_I_HEADER]] ], [ [[IV_K_NEXT:%.*]], %[[FOR_K_CLEANUP:.*]] ]
35+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
36+
; CHECK: [[FOR_BODY]]:
37+
; CHECK-NEXT: [[IV_L:%.*]] = phi i64 [ 0, %[[FOR_K_HEADER]] ], [ [[TMP0:%.*]], %[[FOR_BODY]] ]
38+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds nuw [128 x [128 x [128 x [128 x i32]]]], ptr @a, i64 [[IV_L]], i64 [[IV_K]], i64 [[IV_J]], i64 [[IV_I]]
39+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
40+
; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[VAL]], 1
41+
; CHECK-NEXT: store i32 [[INC]], ptr [[PTR]], align 4
42+
; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[IV_L]], 1
43+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 128
44+
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_K_CLEANUP]], label %[[FOR_BODY]]
45+
; CHECK: [[FOR_K_CLEANUP]]:
46+
; CHECK-NEXT: [[IV_K_NEXT]] = add nuw nsw i64 [[IV_K]], 1
47+
; CHECK-NEXT: [[EXITCOND_K:%.*]] = icmp eq i64 [[IV_K_NEXT]], 128
48+
; CHECK-NEXT: br i1 [[EXITCOND_K]], label %[[FOR_I_CLEANUP]], label %[[FOR_K_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
49+
; CHECK: [[FOR_J_CLEANUP]]:
50+
; CHECK-NEXT: [[IV_J_NEXT]] = add nuw nsw i64 [[IV_J]], 1
51+
; CHECK-NEXT: [[EXITCOND_J:%.*]] = icmp eq i64 [[IV_J_NEXT]], 128
52+
; CHECK-NEXT: br i1 [[EXITCOND_J]], label %[[EXIT:.*]], label %[[FOR_J_HEADER]]
53+
; CHECK: [[FOR_I_CLEANUP]]:
54+
; CHECK-NEXT: [[IV_I_NEXT]] = add nuw nsw i64 [[IV_I]], 1
55+
; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[IV_I_NEXT]], 128
56+
; CHECK-NEXT: br i1 [[EXITCOND_I]], label %[[FOR_J_CLEANUP]], label %[[FOR_I_HEADER]]
57+
; CHECK: [[EXIT]]:
58+
; CHECK-NEXT: ret void
59+
;
60+
entry:
61+
br label %for.i.header
62+
63+
for.i.header:
64+
%iv.i = phi i64 [ 0, %entry ], [ %iv.i.next, %for.i.cleanup ]
65+
br label %for.j.header
66+
67+
for.j.header:
68+
%iv.j = phi i64 [ 0, %for.i.header ], [ %iv.j.next, %for.j.cleanup ]
69+
br label %for.k.header
70+
71+
for.k.header:
72+
%iv.k = phi i64 [ 0, %for.j.header ], [ %iv.k.next, %for.k.cleanup ]
73+
br label %for.body
74+
75+
for.body:
76+
%iv.l = phi i64 [ 0, %for.k.header ], [ %iv.l.next, %for.body ]
77+
%ptr = getelementptr inbounds nuw [128 x [128 x [128 x [128 x i32]]]], ptr @a, i64 %iv.l, i64 %iv.k, i64 %iv.j, i64 %iv.i
78+
%val = load i32, ptr %ptr, align 4
79+
%inc = add nuw nsw i32 %val, 1
80+
store i32 %inc, ptr %ptr, align 4
81+
%iv.l.next = add nuw nsw i64 %iv.l, 1
82+
%exitcond.l = icmp eq i64 %iv.l.next, 128
83+
br i1 %exitcond.l, label %for.k.cleanup, label %for.body
84+
85+
for.k.cleanup:
86+
%iv.k.next = add nuw nsw i64 %iv.k, 1
87+
%exitcond.k = icmp eq i64 %iv.k.next, 128
88+
br i1 %exitcond.k, label %for.j.cleanup, label %for.k.header, !llvm.loop !0
89+
90+
for.j.cleanup:
91+
%iv.j.next = add nuw nsw i64 %iv.j, 1
92+
%exitcond.j = icmp eq i64 %iv.j.next, 128
93+
br i1 %exitcond.j, label %for.i.cleanup, label %for.j.header
94+
95+
for.i.cleanup:
96+
%iv.i.next = add nuw nsw i64 %iv.i, 1
97+
%exitcond.i = icmp eq i64 %iv.i.next, 128
98+
br i1 %exitcond.i, label %exit, label %for.i.header
99+
100+
exit:
101+
ret void
102+
}
103+
104+
!0 = distinct !{!0, !1}
105+
!1 = !{!"llvm.loop.interchange.enable", i1 false}
106+
;.
107+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
108+
; CHECK: [[META1]] = !{!"llvm.loop.interchange.enable", i1 false}
109+
;.

0 commit comments

Comments
 (0)