Skip to content

Commit 887ef54

Browse files
committed
consider leaf descendants to include more candidates for outlining
1 parent e9adcc4 commit 887ef54

15 files changed

+453
-34
lines changed

llvm/include/llvm/Support/SuffixTree.h

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ class SuffixTree {
4242
/// Each element is an integer representing an instruction in the module.
4343
ArrayRef<unsigned> Str;
4444

45+
/// Whether to consider leaf descendants or only leaf children.
46+
bool OutlinerLeafDescendants;
47+
4548
/// A repeated substring in the tree.
4649
struct RepeatedSubstring {
4750
/// The length of the string.
@@ -130,11 +133,27 @@ class SuffixTree {
130133
/// this step.
131134
unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd);
132135

136+
/// This vector contains all leaf nodes of this suffix tree. These leaf nodes
137+
/// are identified using post-order depth-first traversal, so that the order
138+
/// of these leaf nodes in the vector matches the order of the leaves in the
139+
/// tree from left to right if one were to draw the tree on paper.
140+
std::vector<SuffixTreeLeafNode *> LeafNodes;
141+
142+
/// Perform a post-order depth-first traversal of the tree and perform two
143+
/// tasks during the traversal. The first is to populate LeafNodes, adding
144+
/// nodes in order of the traversal. The second is to keep track of the leaf
145+
/// descendants of every internal node by assigning values to LeftLeafIndex
146+
/// and RightLefIndex fields of SuffixTreeNode for all internal nodes.
147+
void setLeafNodes();
148+
133149
public:
134150
/// Construct a suffix tree from a sequence of unsigned integers.
135151
///
136152
/// \param Str The string to construct the suffix tree for.
137-
SuffixTree(const ArrayRef<unsigned> &Str);
153+
/// \param OutlinerLeafDescendants Whether to consider leaf descendants or
154+
/// only leaf children (used by Machine Outliner).
155+
SuffixTree(const ArrayRef<unsigned> &Str,
156+
bool OutlinerLeafDescendants = false);
138157

139158
/// Iterator for finding all repeated substrings in the suffix tree.
140159
struct RepeatedSubstringIterator {
@@ -154,6 +173,12 @@ class SuffixTree {
154173
/// instruction lengths.
155174
const unsigned MinLength = 2;
156175

176+
/// Vector of leaf nodes of the suffix tree.
177+
const std::vector<SuffixTreeLeafNode *> &LeafNodes;
178+
179+
/// Whether to consider leaf descendants or only leaf children.
180+
bool OutlinerLeafDescendants = !LeafNodes.empty();
181+
157182
/// Move the iterator to the next repeated substring.
158183
void advance();
159184

@@ -179,7 +204,10 @@ class SuffixTree {
179204
return !(*this == Other);
180205
}
181206

182-
RepeatedSubstringIterator(SuffixTreeInternalNode *N) : N(N) {
207+
RepeatedSubstringIterator(
208+
SuffixTreeInternalNode *N,
209+
const std::vector<SuffixTreeLeafNode *> &LeafNodes = {})
210+
: N(N), LeafNodes(LeafNodes) {
183211
// Do we have a non-null node?
184212
if (!N)
185213
return;
@@ -191,7 +219,7 @@ class SuffixTree {
191219
};
192220

193221
typedef RepeatedSubstringIterator iterator;
194-
iterator begin() { return iterator(Root); }
222+
iterator begin() { return iterator(Root, LeafNodes); }
195223
iterator end() { return iterator(nullptr); }
196224
};
197225

llvm/include/llvm/Support/SuffixTreeNode.h

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ struct SuffixTreeNode {
4646
/// the root to this node.
4747
unsigned ConcatLen = 0;
4848

49+
/// These two indices give a range of indices for its leaf descendants.
50+
/// Imagine drawing a tree on paper and assigning a unique index to each leaf
51+
/// node in monotonically increasing order from left to right. This way of
52+
/// numbering the leaf nodes allows us to associate a continuous range of
53+
/// indices with each internal node. For example, if a node has leaf
54+
/// descendants with indices i, i+1, ..., j, then its LeftLeafIdx is i and
55+
/// its RightLeafIdx is j. These indices are for LeafNodes in the SuffixTree
56+
/// class, which is constructed using post-order depth-first traversal.
57+
unsigned LeftLeafIdx = EmptyIdx;
58+
unsigned RightLeafIdx = EmptyIdx;
59+
4960
public:
5061
// LLVM RTTI boilerplate.
5162
NodeKind getKind() const { return Kind; }
@@ -56,6 +67,18 @@ struct SuffixTreeNode {
5667
/// \returns the end index of this node.
5768
virtual unsigned getEndIdx() const = 0;
5869

70+
/// \return the index of this node's left most leaf node.
71+
unsigned getLeftLeafIdx() const;
72+
73+
/// \return the index of this node's right most leaf node.
74+
unsigned getRightLeafIdx() const;
75+
76+
/// Set the index of the left most leaf node of this node to \p Idx.
77+
void setLeftLeafIdx(unsigned Idx);
78+
79+
/// Set the index of the right most leaf node of this node to \p Idx.
80+
void setRightLeafIdx(unsigned Idx);
81+
5982
/// Advance this node's StartIdx by \p Inc.
6083
void incrementStartIdx(unsigned Inc);
6184

@@ -168,4 +191,4 @@ struct SuffixTreeLeafNode : SuffixTreeNode {
168191
virtual ~SuffixTreeLeafNode() = default;
169192
};
170193
} // namespace llvm
171-
#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
194+
#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H

llvm/lib/CodeGen/MachineOutliner.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ static cl::opt<unsigned> OutlinerBenefitThreshold(
121121
cl::desc(
122122
"The minimum size in bytes before an outlining candidate is accepted"));
123123

124+
static cl::opt<bool> OutlinerLeafDescendants(
125+
"outliner-leaf-descendants", cl::init(true), cl::Hidden,
126+
cl::desc("Consider all leaf descendants of internal nodes of the suffix "
127+
"tree as candidates for outlining (if false, only leaf children "
128+
"are considered)"));
129+
124130
namespace {
125131

126132
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -576,7 +582,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
576582
void MachineOutliner::findCandidates(
577583
InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
578584
FunctionList.clear();
579-
SuffixTree ST(Mapper.UnsignedVec);
585+
SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
580586

581587
// First, find all of the repeated substrings in the tree of minimum length
582588
// 2.

llvm/lib/Support/SuffixTree.cpp

Lines changed: 93 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/Support/SuffixTree.h"
14+
#include "llvm/ADT/SmallPtrSet.h"
1415
#include "llvm/Support/Allocator.h"
1516
#include "llvm/Support/Casting.h"
1617
#include "llvm/Support/SuffixTreeNode.h"
18+
#include <stack>
1719

1820
using namespace llvm;
1921

@@ -26,7 +28,9 @@ static size_t numElementsInSubstring(const SuffixTreeNode *N) {
2628
return N->getEndIdx() - N->getStartIdx() + 1;
2729
}
2830

29-
SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
31+
SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
32+
bool OutlinerLeafDescendants)
33+
: Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
3034
Root = insertRoot();
3135
Active.Node = Root;
3236

@@ -46,6 +50,11 @@ SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
4650
// Set the suffix indices of each leaf.
4751
assert(Root && "Root node can't be nullptr!");
4852
setSuffixIndices();
53+
54+
// Collect all leaf nodes of the suffix tree. And for each internal node,
55+
// record the range of leaf nodes that are descendants of it.
56+
if (OutlinerLeafDescendants)
57+
setLeafNodes();
4958
}
5059

5160
SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
@@ -105,6 +114,68 @@ void SuffixTree::setSuffixIndices() {
105114
}
106115
}
107116

117+
void SuffixTree::setLeafNodes() {
118+
// A stack that keeps track of nodes to visit for post-order DFS traversal.
119+
std::stack<SuffixTreeNode *> ToVisit;
120+
ToVisit.push(Root);
121+
122+
// This keeps track of the index of the next leaf node to be added to
123+
// the LeafNodes vector of the suffix tree.
124+
unsigned LeafCounter = 0;
125+
126+
// This keeps track of nodes whose children have been added to the stack
127+
// during the post-order depth-first traversal of the tree.
128+
llvm::SmallPtrSet<SuffixTreeInternalNode *, 32> ChildrenAddedToStack;
129+
130+
// Traverse the tree in post-order.
131+
while (!ToVisit.empty()) {
132+
SuffixTreeNode *CurrNode = ToVisit.top();
133+
ToVisit.pop();
134+
if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
135+
// The current node is an internal node.
136+
if (ChildrenAddedToStack.find(CurrInternalNode) !=
137+
ChildrenAddedToStack.end()) {
138+
// If the children of the current node has been added to the stack,
139+
// then this is the second time we visit this node and at this point,
140+
// all of its children have already been processed. Now, we can
141+
// set its LeftLeafIdx and RightLeafIdx;
142+
auto it = CurrInternalNode->Children.begin();
143+
if (it != CurrInternalNode->Children.end()) {
144+
// Get the first child to use its RightLeafIdx. The RightLeafIdx is
145+
// used as the first child is the initial one added to the stack, so
146+
// it's the last one to be processed. This implies that the leaf
147+
// descendants of the first child are assigned the largest index
148+
// numbers.
149+
CurrNode->setRightLeafIdx(it->second->getRightLeafIdx());
150+
// get the last child to use its LeftLeafIdx.
151+
while (std::next(it) != CurrInternalNode->Children.end())
152+
it = std::next(it);
153+
CurrNode->setLeftLeafIdx(it->second->getLeftLeafIdx());
154+
assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
155+
"LeftLeafIdx should not be larger than RightLeafIdx");
156+
}
157+
} else {
158+
// This is the first time we visit this node. This means that its
159+
// children have not been added to the stack yet. Hence, we will add
160+
// the current node back to the stack and add its children to the
161+
// stack for processing.
162+
ToVisit.push(CurrNode);
163+
for (auto &ChildPair : CurrInternalNode->Children)
164+
ToVisit.push(ChildPair.second);
165+
ChildrenAddedToStack.insert(CurrInternalNode);
166+
}
167+
} else {
168+
// The current node is a leaf node.
169+
// We can simplyset its LeftLeafIdx and RightLeafIdx.
170+
CurrNode->setLeftLeafIdx(LeafCounter);
171+
CurrNode->setRightLeafIdx(LeafCounter);
172+
LeafCounter++;
173+
auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
174+
LeafNodes.push_back(CurrLeafNode);
175+
}
176+
}
177+
}
178+
108179
unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
109180
SuffixTreeInternalNode *NeedsLink = nullptr;
110181

@@ -230,6 +301,7 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
230301

231302
// Each leaf node represents a repeat of a string.
232303
SmallVector<unsigned> RepeatedSubstringStarts;
304+
SmallVector<SuffixTreeLeafNode *> LeafDescendants;
233305

234306
// Continue visiting nodes until we find one which repeats more than once.
235307
while (!InternalNodesToVisit.empty()) {
@@ -241,30 +313,35 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
241313
// it's too short, we'll quit.
242314
unsigned Length = Curr->getConcatLen();
243315

244-
// Iterate over each child, saving internal nodes for visiting, and
245-
// leaf nodes' SuffixIdx in RepeatedSubstringStarts. Internal nodes
246-
// represent individual strings, which may repeat.
247-
for (auto &ChildPair : Curr->Children) {
316+
// Iterate over each child, saving internal nodes for visiting.
317+
// Internal nodes represent individual strings, which may repeat.
318+
for (auto &ChildPair : Curr->Children)
248319
// Save all of this node's children for processing.
249320
if (auto *InternalChild =
250-
dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
321+
dyn_cast<SuffixTreeInternalNode>(ChildPair.second))
251322
InternalNodesToVisit.push_back(InternalChild);
252-
continue;
253-
}
254-
255-
if (Length < MinLength)
256-
continue;
257-
258-
// Have an occurrence of a potentially repeated string. Save it.
259-
auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
260-
RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
261-
}
323+
324+
// If length of repeated substring is below threshold, then skip it.
325+
if (Length < MinLength)
326+
continue;
262327

263328
// The root never represents a repeated substring. If we're looking at
264329
// that, then skip it.
265330
if (Curr->isRoot())
266331
continue;
267332

333+
// Collect leaf children or leaf descendants by OutlinerLeafDescendants.
334+
if (!OutlinerLeafDescendants) {
335+
for (auto &ChildPair : Curr->Children)
336+
if (auto *Leaf = dyn_cast<SuffixTreeLeafNode>(ChildPair.second))
337+
RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
338+
} else {
339+
LeafDescendants.assign(LeafNodes.begin() + Curr->getLeftLeafIdx(),
340+
LeafNodes.begin() + Curr->getRightLeafIdx() + 1);
341+
for (SuffixTreeLeafNode *Leaf : LeafDescendants)
342+
RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
343+
}
344+
268345
// Do we have any repeated substrings?
269346
if (RepeatedSubstringStarts.size() < 2)
270347
continue;

llvm/lib/Support/SuffixTreeNode.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,8 @@ unsigned SuffixTreeLeafNode::getEndIdx() const {
3838

3939
unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; }
4040
void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; }
41+
42+
unsigned SuffixTreeNode::getLeftLeafIdx() const { return LeftLeafIdx; }
43+
unsigned SuffixTreeNode::getRightLeafIdx() const { return RightLeafIdx; }
44+
void SuffixTreeNode::setLeftLeafIdx(unsigned Idx) { LeftLeafIdx = Idx; }
45+
void SuffixTreeNode::setRightLeafIdx(unsigned Idx) { RightLeafIdx = Idx; }

llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
2+
# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs -outliner-leaf-descendants=false %s -o - | FileCheck %s
33

44
# Outlining CFI instructions is unsafe if we cannot outline all of the CFI
55
# instructions from a function. This shows that we choose not to outline the

0 commit comments

Comments
 (0)