Skip to content

Commit b50f42e

Browse files
mingmingl-llvmlravenclaw
authored andcommitted
[TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (llvm#81442)
Clang's `-fwhole-program-vtables` is required for this optimization to take place. If `-fwhole-program-vtables` is not enabled, this change is no-op. * Function-comparison (before): ``` %vtable = load ptr, ptr %obj %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 %func = load ptr, ptr %vfn %cond = icmp eq ptr %func, @callee br i1 %cond, label bb1, label bb2: bb1: call @callee bb2: call %func ``` * VTable-comparison (after): ``` %vtable = load ptr, ptr %obj %cond = icmp eq ptr %vtable, @vtable-address-point br i1 %cond, label bb1, label bb2: bb1: call @callee bb2: %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 %func = load ptr, ptr %vfn call %func ``` Key changes: 1. Find out virtual calls and the vtables they come from. - The ICP relies on type intrinsic `llvm.type.test` to find out virtual calls and the compatible vtables, and relies on type metadata to find the address point for comparison. 2. ICP pass does cost-benefit analysis and compares vtable only when the number of vtables for a function candidate is within (option specified) threshold. 3. Sink the function addressing and vtable load instruction to indirect fallback. - The sink helper functions are simplified versions of `InstCombinerImpl::tryToSinkInstruction`. Currently debug intrinsics are not handled. Ideally `InstCombinerImpl::tryToSinkInstructionDbgValues` and `InstCombinerImpl::tryToSinkInstructionDbgVariableRecords` could be moved into Transforms/Utils/Local.cpp (or another util cpp file) to handle debug intrinsics when moving instructions across basic blocks. 4. Keep value profiles updated 1) Update vtable value profiles after inline 2) For either function-based comparison or vtable-based comparison, update both vtable and indirect call value profiles.
1 parent 21a0bed commit b50f42e

File tree

14 files changed

+1288
-157
lines changed

14 files changed

+1288
-157
lines changed

compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp

Lines changed: 108 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,61 @@
55
// ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported
66
// UNSUPPORTED: ppc && host-byteorder-big-endian
77

8-
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o %t-test
9-
// RUN: env LLVM_PROFILE_FILE=%t-test.profraw %t-test
8+
// RUN: rm -rf %t && mkdir %t && cd %t
9+
10+
// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test
11+
// RUN: env LLVM_PROFILE_FILE=test.profraw ./test
1012

1113
// Show vtable profiles from raw profile.
12-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profraw | FileCheck %s --check-prefixes=COMMON,RAW
14+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profraw | FileCheck %s --check-prefixes=COMMON,RAW
1315

1416
// Generate indexed profile from raw profile and show the data.
15-
// RUN: llvm-profdata merge --keep-vtable-symbols %t-test.profraw -o %t-test.profdata
16-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
17+
// RUN: llvm-profdata merge --keep-vtable-symbols test.profraw -o test.profdata
18+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
1719

1820
// Generate text profile from raw and indexed profiles respectively and show the data.
19-
// RUN: llvm-profdata merge --keep-vtable-symbols --text %t-test.profraw -o %t-raw.proftext
20-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-raw.proftext | FileCheck %s --check-prefix=ICTEXT
21-
// RUN: llvm-profdata merge --keep-vtable-symbols --text %t-test.profdata -o %t-indexed.proftext
22-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-indexed.proftext | FileCheck %s --check-prefix=ICTEXT
21+
// RUN: llvm-profdata merge --keep-vtable-symbols --text test.profraw -o raw.proftext
22+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text raw.proftext | FileCheck %s --check-prefix=ICTEXT
23+
// RUN: llvm-profdata merge --keep-vtable-symbols --text test.profdata -o indexed.proftext
24+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text indexed.proftext | FileCheck %s --check-prefix=ICTEXT
2325

2426
// Generate indexed profile from text profiles and show the data
25-
// RUN: llvm-profdata merge --keep-vtable-symbols --binary %t-raw.proftext -o %t-text.profraw
26-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
27-
// RUN: llvm-profdata merge --keep-vtable-symbols --binary %t-indexed.proftext -o %t-text.profdata
28-
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
27+
// RUN: llvm-profdata merge --keep-vtable-symbols --binary raw.proftext -o text.profraw
28+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
29+
// RUN: llvm-profdata merge --keep-vtable-symbols --binary indexed.proftext -o text.profdata
30+
// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
2931

3032
// COMMON: Counters:
3133
// COMMON-NEXT: main:
32-
// COMMON-NEXT: Hash: 0x0f9a16fe6d398548
33-
// COMMON-NEXT: Counters: 2
34+
// COMMON-NEXT: Hash: 0x068617320ec408a0
35+
// COMMON-NEXT: Counters: 4
3436
// COMMON-NEXT: Indirect Call Site Count: 2
3537
// COMMON-NEXT: Number of instrumented vtables: 2
3638
// RAW: Indirect Target Results:
37-
// RAW-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%)
38-
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%)
39-
// RAW-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%)
40-
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%)
39+
// RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
40+
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
41+
// RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
42+
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
4143
// RAW-NEXT: VTable Results:
42-
// RAW-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%)
43-
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
44+
// RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
45+
// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
4446
// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
4547
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
4648
// INDEXED: Indirect Target Results:
47-
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%)
48-
// INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%)
49-
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%)
50-
// INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%)
49+
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
50+
// INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
51+
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
52+
// INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
5153
// INDEXED-NEXT: VTable Results:
52-
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
53-
// INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%)
54+
// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
55+
// INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
5456
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
5557
// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
5658
// COMMON: Instrumentation level: IR entry_first = 0
5759
// COMMON-NEXT: Functions shown: 1
58-
// COMMON-NEXT: Total functions: 6
60+
// COMMON-NEXT: Total functions: 7
5961
// COMMON-NEXT: Maximum function count: 1000
60-
// COMMON-NEXT: Maximum internal block count: 250
62+
// COMMON-NEXT: Maximum internal block count: 1000
6163
// COMMON-NEXT: Statistics for indirect call sites profile:
6264
// COMMON-NEXT: Total number of sites: 2
6365
// COMMON-NEXT: Total number of sites with values: 2
@@ -76,11 +78,13 @@
7678
// ICTEXT: :ir
7779
// ICTEXT: main
7880
// ICTEXT: # Func Hash:
79-
// ICTEXT: 1124236338992350536
81+
// ICTEXT: 470088714870327456
8082
// ICTEXT: # Num Counters:
81-
// ICTEXT: 2
83+
// ICTEXT: 4
8284
// ICTEXT: # Counter Values:
8385
// ICTEXT: 1000
86+
// ICTEXT: 1000
87+
// ICTEXT: 200
8488
// ICTEXT: 1
8589
// ICTEXT: # Num Value Kinds:
8690
// ICTEXT: 2
@@ -89,41 +93,98 @@
8993
// ICTEXT: # NumValueSites:
9094
// ICTEXT: 2
9195
// ICTEXT: 2
92-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii:750
93-
// ICTEXT: _ZN8Derived15func1Eii:250
96+
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
97+
// ICTEXT: _ZN8Derived14funcEii:50
9498
// ICTEXT: 2
95-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii:750
96-
// ICTEXT: _ZN8Derived15func2Eii:250
99+
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
100+
// ICTEXT: _ZN8Derived1D0Ev:250
97101
// ICTEXT: # ValueKind = IPVK_VTableTarget:
98102
// ICTEXT: 2
99103
// ICTEXT: # NumValueSites:
100104
// ICTEXT: 2
101105
// ICTEXT: 2
102-
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
103-
// ICTEXT: _ZTV8Derived1:250
106+
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
107+
// ICTEXT: _ZTV8Derived1:50
104108
// ICTEXT: 2
105109
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
106110
// ICTEXT: _ZTV8Derived1:250
107111

112+
// Test indirect call promotion transformation using vtable profiles.
113+
// - Build with `-g` to enable debug information.
114+
// - In real world settings, ICP pass is disabled in prelink pipeline. In
115+
// the postlink pipeline, ICP is enabled after whole-program-devirtualization
116+
// pass. Do the same thing in this test.
117+
// - Enable `-fwhole-program-vtables` generate type metadata and intrinsics.
118+
// - Enable `-fno-split-lto-unit` and `-Wl,-lto-whole-program-visibility` to
119+
// preserve type intrinsics for ICP pass.
120+
// RUN: %clangxx -m64 -fprofile-use=test.profdata -Wl,--lto-whole-program-visibility \
121+
// RUN: -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -fuse-ld=lld \
122+
// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \
123+
// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \
124+
// RUN: -mllvm -enable-vtable-profile-use \
125+
// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \
126+
// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \
127+
// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \
128+
// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP"
129+
130+
// For the indirect call site `ptr->func`
131+
// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
132+
// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
133+
//
134+
// For the indirect call site `delete ptr`
135+
// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
136+
// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
137+
138+
// The IR matchers for indirect callsite `ptr->func`.
139+
// IR-LABEL: @main
140+
// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei
141+
// IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]]
142+
// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16)
143+
// IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]],
144+
//
145+
// IR: [[BB1]]:
146+
// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii
147+
// IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]]
148+
//
149+
// IR: [[BB2]]:
150+
// IR: [[CMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV8Derived1, i32 16)
151+
// IR: br i1 [[CMP2]], label %[[BB3:.*]], label %[[BB4:[a-zA-Z0-9_.]+]],
152+
//
153+
// IR: [[BB3]]:
154+
// IR: [[RESBB3:%.*]] = {{.*}}call {{.*}} @_ZN8Derived14funcEii
155+
// IR: br label %[[MERGE1:[a-zA-Z0-9_.]+]],
156+
//
157+
// IR: [[BB4]]:
158+
// IR: [[FUNCPTR:%.*]] = load ptr, ptr [[VTABLE]]
159+
// IR: [[RESBB4:%.*]] = {{.*}}call {{.*}} [[FUNCPTR]]
160+
// IR: br label %[[MERGE1]]
161+
//
162+
// IR: [[MERGE1]]:
163+
// IR: [[RES1:%.*]] = phi i32 [ [[RESBB4]], %[[BB4]] ], [ [[RESBB3]], %[[BB3]] ]
164+
// IR: br label %[[MERGE0]]
165+
//
166+
// IR: [[MERGE0]]:
167+
// IR: [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ]
108168
#include <cstdio>
109169
#include <cstdlib>
110170
class Base {
111171
public:
112-
virtual int func1(int a, int b) = 0;
113-
virtual int func2(int a, int b) = 0;
172+
virtual int func(int a, int b) = 0;
173+
174+
virtual ~Base() {};
114175
};
115176
class Derived1 : public Base {
116177
public:
117-
int func1(int a, int b) override { return a + b; }
178+
int func(int a, int b) override { return a * b; }
118179

119-
int func2(int a, int b) override { return a * b; }
180+
~Derived1() {}
120181
};
121182
namespace {
122183
class Derived2 : public Base {
123184
public:
124-
int func1(int a, int b) override { return a - b; }
185+
int func(int a, int b) override { return a * (a - b); }
125186

126-
int func2(int a, int b) override { return a * (a - b); }
187+
~Derived2() {}
127188
};
128189
} // namespace
129190
__attribute__((noinline)) Base *createType(int a) {
@@ -140,7 +201,10 @@ int main(int argc, char **argv) {
140201
int a = rand();
141202
int b = rand();
142203
Base *ptr = createType(i);
143-
sum += ptr->func1(a, b) + ptr->func2(b, a);
204+
if (i % 5 == 0)
205+
sum += ptr->func(b, a);
206+
207+
delete ptr;
144208
}
145209
printf("sum is %d\n", sum);
146210
return 0;

llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class ICallPromotionAnalysis {
5757
///
5858
/// The returned array space is owned by this class, and overwritten on
5959
/// subsequent calls.
60-
ArrayRef<InstrProfValueData> getPromotionCandidatesForInstruction(
60+
MutableArrayRef<InstrProfValueData> getPromotionCandidatesForInstruction(
6161
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates);
6262
};
6363

llvm/include/llvm/Analysis/IndirectCallVisitor.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
3737
// A heuristic is used to find the address feeding instructions.
3838
static Instruction *tryGetVTableInstruction(CallBase *CB) {
3939
assert(CB != nullptr && "Caller guaranteed");
40-
LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
40+
if (!CB->isIndirectCall())
41+
return nullptr;
4142

43+
LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
4244
if (LI != nullptr) {
4345
Value *FuncPtr = LI->getPointerOperand(); // GEP (or bitcast)
4446
Value *VTablePtr = FuncPtr->stripInBoundsConstantOffsets();

llvm/include/llvm/ProfileData/InstrProf.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,8 @@ getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
294294
uint32_t MaxNumValueData, uint32_t &ActualNumValueData,
295295
uint64_t &TotalC, bool GetNoICPValue = false);
296296

297+
// TODO: Unify metadata name 'PGOFuncName' and 'PGOName', by supporting read
298+
// of this metadata for backward compatibility and generating 'PGOName' only.
297299
/// Extract the value profile data from \p Inst and returns them if \p Inst is
298300
/// annotated with value profile data. Returns an empty vector otherwise.
299301
SmallVector<InstrProfValueData, 4>
@@ -303,6 +305,8 @@ getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
303305

304306
inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
305307

308+
inline StringRef getPGONameMetadataName() { return "PGOName"; }
309+
306310
/// Return the PGOFuncName meta data associated with a function.
307311
MDNode *getPGOFuncNameMetadata(const Function &F);
308312

@@ -311,8 +315,14 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO = false);
311315
/// Create the PGOFuncName meta data if PGOFuncName is different from
312316
/// function's raw name. This should only apply to internal linkage functions
313317
/// declared by users only.
318+
/// TODO: Update all callers to 'createPGONameMetadata' and deprecate this
319+
/// function.
314320
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
315321

322+
/// Create the PGOName metadata if a global object's PGO name is different from
323+
/// its mangled name. This should apply to local-linkage global objects only.
324+
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName);
325+
316326
/// Check if we can use Comdat for profile variables. This will eliminate
317327
/// the duplicated profile variables for Comdat functions.
318328
bool needsComdatForCounter(const GlobalObject &GV, const Module &M);

llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,17 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
8787
return I;
8888
}
8989

90-
ArrayRef<InstrProfValueData>
90+
MutableArrayRef<InstrProfValueData>
9191
ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
9292
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates) {
9393
uint32_t NumVals;
9494
auto Res = getValueProfDataFromInst(*I, IPVK_IndirectCallTarget,
9595
MaxNumPromotions, NumVals, TotalCount);
9696
if (!Res) {
9797
NumCandidates = 0;
98-
return ArrayRef<InstrProfValueData>();
98+
return MutableArrayRef<InstrProfValueData>();
9999
}
100100
ValueDataArray = std::move(Res);
101101
NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount);
102-
return ArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
102+
return MutableArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
103103
}

llvm/lib/ProfileData/InstrProf.cpp

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,12 @@ cl::opt<bool> EnableVTableValueProfiling(
228228
"the types of a C++ pointer. The information is used in indirect "
229229
"call promotion to do selective vtable-based comparison."));
230230

231+
cl::opt<bool> EnableVTableProfileUse(
232+
"enable-vtable-profile-use", cl::init(false),
233+
cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable "
234+
"profiles will be used by ICP pass for more efficient indirect "
235+
"call sequence. If false, type profiles won't be used."));
236+
231237
std::string getInstrProfSectionName(InstrProfSectKind IPSK,
232238
Triple::ObjectFormatType OF,
233239
bool AddSegmentInfo) {
@@ -391,7 +397,7 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO) {
391397
// PGONameMetadata should be set by compiler at profile use time
392398
// and read by symtab creation to look up symbols corresponding to
393399
// a MD5 hash.
394-
return getIRPGOObjectName(V, InLTO, /*PGONameMetadata=*/nullptr);
400+
return getIRPGOObjectName(V, InLTO, V.getMetadata(getPGONameMetadataName()));
395401
}
396402

397403
// See getIRPGOObjectName() for a discription of the format.
@@ -480,8 +486,7 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
480486
for (GlobalVariable &G : M.globals()) {
481487
if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
482488
continue;
483-
if (Error E = addVTableWithName(
484-
G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
489+
if (Error E = addVTableWithName(G, getPGOName(G, InLTO)))
485490
return E;
486491
}
487492

@@ -1425,16 +1430,28 @@ MDNode *getPGOFuncNameMetadata(const Function &F) {
14251430
return F.getMetadata(getPGOFuncNameMetadataName());
14261431
}
14271432

1428-
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
1429-
// Only for internal linkage functions.
1430-
if (PGOFuncName == F.getName())
1431-
return;
1432-
// Don't create duplicated meta-data.
1433-
if (getPGOFuncNameMetadata(F))
1433+
static void createPGONameMetadata(GlobalObject &GO, StringRef MetadataName,
1434+
StringRef PGOName) {
1435+
// Only for internal linkage functions or global variables. The name is not
1436+
// the same as PGO name for these global objects.
1437+
if (GO.getName() == PGOName)
14341438
return;
1435-
LLVMContext &C = F.getContext();
1436-
MDNode *N = MDNode::get(C, MDString::get(C, PGOFuncName));
1437-
F.setMetadata(getPGOFuncNameMetadataName(), N);
1439+
1440+
// Don't create duplicated metadata.
1441+
if (GO.getMetadata(MetadataName))
1442+
return;
1443+
1444+
LLVMContext &C = GO.getContext();
1445+
MDNode *N = MDNode::get(C, MDString::get(C, PGOName));
1446+
GO.setMetadata(MetadataName, N);
1447+
}
1448+
1449+
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
1450+
return createPGONameMetadata(F, getPGOFuncNameMetadataName(), PGOFuncName);
1451+
}
1452+
1453+
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName) {
1454+
return createPGONameMetadata(GO, getPGONameMetadataName(), PGOName);
14381455
}
14391456

14401457
bool needsComdatForCounter(const GlobalObject &GO, const Module &M) {

0 commit comments

Comments
 (0)