Skip to content

[LoopInterchange] Add metadata to control loop-interchange #127474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ TransformationMode hasUnrollAndJamTransformation(const Loop *L);
TransformationMode hasVectorizeTransformation(const Loop *L);
TransformationMode hasDistributeTransformation(const Loop *L);
TransformationMode hasLICMVersioningTransformation(const Loop *L);
TransformationMode hasInterchangeTransformation(const Loop *L);
/// @}

/// Set input string into loop metadata by keeping other values intact.
Expand Down
210 changes: 205 additions & 5 deletions llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ using namespace llvm;

#define DEBUG_TYPE "loop-interchange"

/// @{
/// Metadata attribute names
static const char *const LLVMLoopInterchangeFollowupAll =
"llvm.loop.interchange.followup_all";
static const char *const LLVMLoopInterchangeFollowupNextOuter =
"llvm.loop.interchange.followup_next_outer";
static const char *const LLVMLoopInterchangeFollowupOuter =
"llvm.loop.interchange.followup_outer";
static const char *const LLVMLoopInterchangeFollowupInner =
"llvm.loop.interchange.followup_inner";
/// @}

STATISTIC(LoopsInterchanged, "Number of loops interchanged");

static cl::opt<int> LoopInterchangeCostThreshold(
Expand All @@ -65,6 +77,14 @@ static cl::opt<unsigned int> MaxMemInstrCount(
"in the dependency matrix. Higher value may lead to more interchanges "
"at the cost of compile-time"));

// Whether to apply by default.
// TODO: Once this pass is enabled by default, remove this option and use the
// value of PipelineTuningOptions.
static cl::opt<bool> OnlyWhenForced(
"loop-interchange-only-when-forced", cl::init(false), cl::ReallyHidden,
cl::desc(
"Apply interchanges only when explicitly specified metadata exists"));

namespace {

using LoopVector = SmallVector<Loop *, 8>;
Expand Down Expand Up @@ -297,6 +317,16 @@ static bool isComputableLoopNest(ScalarEvolution *SE,
return true;
}

static std::optional<bool> findMetadata(Loop *L) {
auto Value = findStringMetadataForLoop(L, "llvm.loop.interchange.enable");
if (!Value)
return std::nullopt;

const MDOperand *Op = *Value;
assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
return mdconst::extract<ConstantInt>(*Op)->getZExtValue();
}

namespace {

/// LoopInterchangeLegality checks if it is legal to interchange the loop.
Expand Down Expand Up @@ -504,6 +534,12 @@ struct LoopInterchange {
CostMap[LoopCosts[i].first] = i;
}
}

// If OnlyWhenForced is true, only process loops for which interchange is
// explicitly enabled.
if (OnlyWhenForced)
return processEnabledLoop(LoopList, DependencyMatrix, CostMap);

// We try to achieve the globally optimal memory access for the loopnest,
// and do interchange based on a bubble-sort fasion. We start from
// the innermost loop, move it outwards to the best possible position
Expand All @@ -530,19 +566,30 @@ struct LoopInterchange {
const DenseMap<const Loop *, unsigned> &CostMap) {
Loop *OuterLoop = LoopList[OuterLoopId];
Loop *InnerLoop = LoopList[InnerLoopId];
MDNode *LoopID = OuterLoop->getLoopID();
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n");
std::optional<bool> OuterLoopEnabled = findMetadata(OuterLoop);
std::optional<bool> InnerLoopEnabled = findMetadata(InnerLoop);
if (OuterLoopEnabled == false || InnerLoopEnabled == false) {
LLVM_DEBUG(dbgs() << "Not interchanging loops. It is disabled.\n");
return false;
}
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
return false;
}
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
DependencyMatrix, CostMap, CC)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;

// If the interchange is explicitly enabled, skip the profitability check.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it is reasonable and have changed to skip the profitability check when the loop has metadata that explicitly specifies exchange.

if (OuterLoopEnabled != true) {
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
DependencyMatrix, CostMap, CC)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;
}
}

ORE->emit([&]() {
Expand All @@ -567,8 +614,161 @@ struct LoopInterchange {
LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
printDepMatrix(DependencyMatrix));

// The next outer loop, or nullptr if TargetLoop is the outermost one.
Loop *NextOuterLoop = nullptr;
if (0 < OuterLoopId)
NextOuterLoop = LoopList[OuterLoopId - 1];

// Update the metadata.
std::optional<MDNode *> MDNextOuterLoopID =
makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
LLVMLoopInterchangeFollowupNextOuter});
std::optional<MDNode *> MDOuterLoopID =
makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
LLVMLoopInterchangeFollowupOuter});
std::optional<MDNode *> MDInnerLoopID =
makeFollowupLoopID(LoopID, {LLVMLoopInterchangeFollowupAll,
LLVMLoopInterchangeFollowupInner});
if (MDNextOuterLoopID) {
if (NextOuterLoop) {
NextOuterLoop->setLoopID(*MDNextOuterLoopID);
} else {
LLVM_DEBUG(
dbgs() << "New metadata for the next outer loop is ignored.\n");
}
}
if (MDOuterLoopID)
OuterLoop->setLoopID(*MDOuterLoopID);
if (MDInnerLoopID)
InnerLoop->setLoopID(*MDInnerLoopID);

return true;
}

bool processEnabledLoop(SmallVectorImpl<Loop *> &LoopList,
std::vector<std::vector<char>> &DependencyMatrix,
const DenseMap<const Loop *, unsigned> &CostMap) {
bool Changed = false;

// Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
DenseMap<Loop *, unsigned> Loop2Index;
for (unsigned I = 0; I != LoopList.size(); I++)
Loop2Index[LoopList[I]] = I;

// Hold outer loops to be exchanged (i.e., loops that have
// "llvm.loop.interchange.enable" is true), in the current nest order.
SmallVector<Loop *, 4> Worklist;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As for the cases where the loops to be interchanged do not interfere with each other, I plan to handle them independently. That is, the following code

#pragma clang loop interchange(enable)
for (i=0; i<N; i++)
  for (j=0; j<N; j++)
    #pragma clang loop interchange(enable)
    for (k=0; k<N; k++)
      for (l=0; l<N; l++)
        ...

will be translated like as

!interchange_ij = !{!"interchange_ij", !interchange_enable}
!interchange_kl = !{!"interchange_kl", !interchange_enable}
!interchange_enable = !{!"llvm.loop.interchange.enable", i1 true}

not as follows.

!interchange_kl = !{!"interchange_kl", !interchange_enable, !followup_ij}
!interchange_enable = !{!"llvm.loop.interchange.enable", i1 true}
!followup_ij = !{"followup_next_next_outer", ...}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fine, but also the less interesting case.


// Helper funciton to try to add a new loop into the Worklist. Return false
// if there is a duplicate in the loop to be interchanged.
auto AddLoopIfEnabled = [&](Loop *L) {
if (findMetadata(L) == true) {
if (!Worklist.empty()) {
// Because the loops are sorted in the order of the current nest, it
// is sufficient to compare with the last element.
unsigned InnerLoopId = Loop2Index[Worklist.back()] + 1;
unsigned OuterLoopId = Loop2Index[L];
if (OuterLoopId <= InnerLoopId) {
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "AmbiguousOrder",
L->getStartLoc(), L->getHeader())
<< "The loops to be interchanged are overlapping.";
});
return false;
}
}
Worklist.push_back(L);
}
return true;
};

// Initialize Worklist. To process the loops in inner-loop-first order, add
// them to the worklist in the outer-loop-first order.
for (unsigned I = 0; I != LoopList.size(); I++)
if (!AddLoopIfEnabled(LoopList[I]))
return Changed;

// The number of attempts of exchanges. Used for debug build.
[[maybe_unused]] unsigned Attempts = 0;

// Process the loops. An exchange is applied to two loops, but a metadata
// replacement can be applied to three loops: the two loops plus the next
// outer loop, if it exists. This is because it's necessary to express the
// information about the order of the application of interchanges in cases
// where the target loops to be exchanged are overlapping, e.g.,
//
// #pragma clang loop interchange(enable)
// for(int i=0;i<N;i++)
// #pragma clang loop interchange(enable)
// for (int j=0;j<N;j++)
// for (int k=0;k<N;k++)
// ...
//
// In this case we will exchange the innermost two loops at first, the
// follow-up metadata including enabling interchange is attached on the
// outermost loop, and it is enqueued as the next candidate to be processed.
while (!Worklist.empty()) {
Loop *TargetLoop = Worklist.pop_back_val();
assert(findMetadata(TargetLoop) == true &&
"Some metadata was unexpectedlly removed");
unsigned OuterLoopId = Loop2Index[TargetLoop];
unsigned InnerLoopId = OuterLoopId + 1;
if (InnerLoopId >= LoopList.size()) {
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "InnermostLoop",
TargetLoop->getStartLoc(),
TargetLoop->getHeader())
<< "The metadata is invalid with an innermost loop.";
});
break;
}
bool Interchanged = processLoop(LoopList, InnerLoopId, OuterLoopId,
DependencyMatrix, CostMap);
if (!Interchanged) {
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInterchanged",
TargetLoop->getStartLoc(),
TargetLoop->getHeader())
<< "Failed to perform explicitly specified loop interchange.";
});
break;
}

// The next outer loop, or nullptr if TargetLoop is the outermost one.
Loop *NextOuterLoop = nullptr;
if (0 < OuterLoopId)
NextOuterLoop = LoopList[OuterLoopId - 1];
Loop *OuterLoop = LoopList[OuterLoopId];
Loop *InnerLoop = LoopList[InnerLoopId];
Changed = true;
Loop2Index[OuterLoop] = OuterLoopId;
Loop2Index[InnerLoop] = InnerLoopId;

// Add new elements, paying attention to the order.
bool Valid = true;
if (NextOuterLoop)
Valid &= AddLoopIfEnabled(NextOuterLoop);
Valid &= AddLoopIfEnabled(OuterLoop);
Valid &= AddLoopIfEnabled(InnerLoop);
if (!Valid)
break;

// Check that the number of attempts of interchanges hasn't exceeded the
// upper limit. It would lead an infinite loops.
LLVM_DEBUG({
// There is no deep meaning behind the current value (square of the size
// of LoopList).
unsigned MaxAttemptsCount = LoopList.size() * LoopList.size();
Attempts++;
assert(Attempts <= MaxAttemptsCount &&
"The number of attempts of interchanges exceeded the limit. An "
"infinite loop may have occured because the metadata was not "
"properly deleted after each exchange.");
});
}

return Changed;
}
};

} // end anonymous namespace
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,17 @@ static void warnAboutLeftoverTransformations(Loop *L,
"requested transformation; the transformation might be disabled or "
"specified as part of an unsupported transformation ordering");
}

if (hasInterchangeTransformation(L) == TM_ForcedByUser) {
LLVM_DEBUG(dbgs() << "Leftover interchange transformation\n");
ORE->emit(
DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
"FailedRequestedInterchange",
L->getStartLoc(), L->getHeader())
<< "loop not interchanged: the optimizer was unable to perform the "
"requested transformation; the transformation might be disabled or "
"specified as part of an unsupported transformation ordering");
}
Comment on lines +84 to +93
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding the loop interchange metadata to https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp, such that for whatever reason there is still interchange instructions on some loops, the user gets a warning.

Is this what you intended?

}

static void warnAboutLeftoverTransformations(Function *F, LoopInfo *LI,
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,16 @@ TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
return TM_Unspecified;
}

TransformationMode llvm::hasInterchangeTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.interchange.enable"))
return TM_ForcedByUser;

if (hasDisableAllTransformsHint(L))
return TM_Disable;

return TM_Unspecified;
}

/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of basic blocks includes the starting point.
SmallVector<BasicBlock *, 16> llvm::collectChildrenInLoop(DominatorTree *DT,
Expand Down
Loading