Skip to content

Commit e19a5fc

Browse files
authored
[FuncSpec] Improve accounting of specialization codesize growth (#113448)
Only accumulate the codesize increase of functions that are actually specialized, rather than for every candidate specialization that we analyse. This fixes a subtle bug where prior analysis of candidate specializations that were deemed unprofitable could prevent subsequent profitable candidates from being recognised.
1 parent ec427df commit e19a5fc

File tree

3 files changed

+73
-23
lines changed

3 files changed

+73
-23
lines changed

llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,16 @@ struct Spec {
138138
// Profitability of the specialization.
139139
unsigned Score;
140140

141+
// Number of instructions in the specialization.
142+
unsigned CodeSize;
143+
141144
// List of call sites, matching this specialization.
142145
SmallVector<CallBase *> CallSites;
143146

144-
Spec(Function *F, const SpecSig &S, unsigned Score)
145-
: F(F), Sig(S), Score(Score) {}
146-
Spec(Function *F, const SpecSig &&S, unsigned Score)
147-
: F(F), Sig(S), Score(Score) {}
147+
Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize)
148+
: F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
149+
Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize)
150+
: F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
148151
};
149152

150153
class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,18 @@ FunctionSpecializer::~FunctionSpecializer() {
643643
cleanUpSSA();
644644
}
645645

646+
/// Get the unsigned Value of given Cost object. Assumes the Cost is always
647+
/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
648+
/// always Valid.
649+
static unsigned getCostValue(const Cost &C) {
650+
int64_t Value = *C.getValue();
651+
652+
assert(Value >= 0 && "CodeSize and Latency cannot be negative");
653+
// It is safe to down cast since we know the arguments cannot be negative and
654+
// Cost is of type int64_t.
655+
return static_cast<unsigned>(Value);
656+
}
657+
646658
/// Attempt to specialize functions in the module to enable constant
647659
/// propagation across function boundaries.
648660
///
@@ -757,6 +769,11 @@ bool FunctionSpecializer::run() {
757769
SmallVector<Function *> Clones;
758770
for (unsigned I = 0; I < NSpecs; ++I) {
759771
Spec &S = AllSpecs[BestSpecs[I]];
772+
773+
// Accumulate the codesize growth for the function, now we are creating the
774+
// specialization.
775+
FunctionGrowth[S.F] += S.CodeSize;
776+
760777
S.Clone = createSpecialization(S.F, S.Sig);
761778

762779
// Update the known call sites to call the clone.
@@ -835,18 +852,6 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
835852
return Clone;
836853
}
837854

838-
/// Get the unsigned Value of given Cost object. Assumes the Cost is always
839-
/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
840-
/// always Valid.
841-
static unsigned getCostValue(const Cost &C) {
842-
int64_t Value = *C.getValue();
843-
844-
assert(Value >= 0 && "CodeSize and Latency cannot be negative");
845-
// It is safe to down cast since we know the arguments cannot be negative and
846-
// Cost is of type int64_t.
847-
return static_cast<unsigned>(Value);
848-
}
849-
850855
bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
851856
SmallVectorImpl<Spec> &AllSpecs,
852857
SpecMap &SM) {
@@ -922,16 +927,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
922927
}
923928
CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();
924929

930+
unsigned CodeSizeSavings = getCostValue(CodeSize);
931+
unsigned SpecSize = FuncSize - CodeSizeSavings;
932+
925933
auto IsProfitable = [&]() -> bool {
926934
// No check required.
927935
if (ForceSpecialization)
928936
return true;
929937

930-
unsigned CodeSizeSavings = getCostValue(CodeSize);
931-
// TODO: We should only accumulate codesize increase of specializations
932-
// that are actually created.
933-
FunctionGrowth[F] += FuncSize - CodeSizeSavings;
934-
935938
LLVM_DEBUG(
936939
dbgs() << "FnSpecialization: Specialization bonus {Inlining = "
937940
<< Score << " (" << (Score * 100 / FuncSize) << "%)}\n");
@@ -962,7 +965,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
962965
if (LatencySavings < MinLatencySavings * FuncSize / 100)
963966
return false;
964967
// Maximum codesize growth.
965-
if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth)
968+
if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth)
966969
return false;
967970

968971
Score += std::max(CodeSizeSavings, LatencySavings);
@@ -974,7 +977,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
974977
continue;
975978

976979
// Create a new specialisation entry.
977-
auto &Spec = AllSpecs.emplace_back(F, S, Score);
980+
auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);
978981
if (CS.getFunction() != F)
979982
Spec.CallSites.push_back(&CS);
980983
const unsigned Index = AllSpecs.size() - 1;
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
2+
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \
3+
; RUN: -funcspec-for-literal-constant=true \
4+
; RUN: -funcspec-min-codesize-savings=50 \
5+
; RUN: -funcspec-min-latency-savings=50 \
6+
; RUN: -funcspec-max-codesize-growth=1 \
7+
; RUN: -S < %s | FileCheck %s
8+
9+
; Verify that we are able to specialize a function successfully after analysis
10+
; of other specializations that are found to not be profitable.
11+
define void @test_specialize_after_failed_analysis(i32 %n) {
12+
entry:
13+
%notspec0 = call i32 @add(i32 0, i32 %n)
14+
%notspec1 = call i32 @add(i32 1, i32 %n)
15+
%spec = call i32 @add(i32 1, i32 1)
16+
ret void
17+
}
18+
19+
define i32 @add(i32 %x, i32 %y) {
20+
entry:
21+
%res = add i32 %x, %y
22+
ret i32 %res
23+
}
24+
; CHECK-LABEL: define void @test_specialize_after_failed_analysis(
25+
; CHECK-SAME: i32 [[N:%.*]]) {
26+
; CHECK-NEXT: [[ENTRY:.*:]]
27+
; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]])
28+
; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]])
29+
; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1)
30+
; CHECK-NEXT: ret void
31+
;
32+
;
33+
; CHECK-LABEL: define i32 @add(
34+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
35+
; CHECK-NEXT: [[ENTRY:.*:]]
36+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]]
37+
; CHECK-NEXT: ret i32 [[RES]]
38+
;
39+
;
40+
; CHECK-LABEL: define internal i32 @add.specialized.1(
41+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
42+
; CHECK-NEXT: [[ENTRY:.*:]]
43+
; CHECK-NEXT: ret i32 poison
44+
;

0 commit comments

Comments
 (0)