diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp index c23b2c77321c6..6cf73c6fdbd73 100644 --- a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp +++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp @@ -5,59 +5,61 @@ // ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported // UNSUPPORTED: ppc && host-byteorder-big-endian -// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o %t-test -// RUN: env LLVM_PROFILE_FILE=%t-test.profraw %t-test +// RUN: rm -rf %t && mkdir %t && cd %t + +// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test +// RUN: env LLVM_PROFILE_FILE=test.profraw ./test // Show vtable profiles from raw profile. -// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profraw | FileCheck %s --check-prefixes=COMMON,RAW +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profraw | FileCheck %s --check-prefixes=COMMON,RAW // Generate indexed profile from raw profile and show the data. -// RUN: llvm-profdata merge --keep-vtable-symbols %t-test.profraw -o %t-test.profdata -// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED +// RUN: llvm-profdata merge --keep-vtable-symbols test.profraw -o test.profdata +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED // Generate text profile from raw and indexed profiles respectively and show the data. -// RUN: llvm-profdata merge --keep-vtable-symbols --text %t-test.profraw -o %t-raw.proftext -// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-raw.proftext | FileCheck %s --check-prefix=ICTEXT -// RUN: llvm-profdata merge --keep-vtable-symbols --text %t-test.profdata -o %t-indexed.proftext -// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-indexed.proftext | FileCheck %s --check-prefix=ICTEXT +// RUN: llvm-profdata merge --keep-vtable-symbols --text test.profraw -o raw.proftext +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text raw.proftext | FileCheck %s --check-prefix=ICTEXT +// RUN: llvm-profdata merge --keep-vtable-symbols --text test.profdata -o indexed.proftext +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text indexed.proftext | FileCheck %s --check-prefix=ICTEXT // Generate indexed profile from text profiles and show the data -// RUN: llvm-profdata merge --keep-vtable-symbols --binary %t-raw.proftext -o %t-text.profraw -// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED -// RUN: llvm-profdata merge --keep-vtable-symbols --binary %t-indexed.proftext -o %t-text.profdata -// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED +// RUN: llvm-profdata merge --keep-vtable-symbols --binary raw.proftext -o text.profraw +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED +// RUN: llvm-profdata merge --keep-vtable-symbols --binary indexed.proftext -o text.profdata +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED // COMMON: Counters: // COMMON-NEXT: main: -// COMMON-NEXT: Hash: 0x0f9a16fe6d398548 -// COMMON-NEXT: Counters: 2 +// COMMON-NEXT: Hash: 0x068617320ec408a0 +// COMMON-NEXT: Counters: 4 // COMMON-NEXT: Indirect Call Site Count: 2 // COMMON-NEXT: Number of instrumented vtables: 2 // RAW: Indirect Target Results: -// RAW-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) -// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) -// RAW-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) -// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) +// RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%) +// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%) +// RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%) +// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%) // RAW-NEXT: VTable Results: -// RAW-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) -// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%) +// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%) // RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) // RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) // INDEXED: Indirect Target Results: -// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) -// INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) -// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) -// INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) +// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%) +// INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%) +// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%) +// INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%) // INDEXED-NEXT: VTable Results: -// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) -// INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) +// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%) +// INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%) // INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) // INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) // COMMON: Instrumentation level: IR entry_first = 0 // COMMON-NEXT: Functions shown: 1 -// COMMON-NEXT: Total functions: 6 +// COMMON-NEXT: Total functions: 7 // COMMON-NEXT: Maximum function count: 1000 -// COMMON-NEXT: Maximum internal block count: 250 +// COMMON-NEXT: Maximum internal block count: 1000 // COMMON-NEXT: Statistics for indirect call sites profile: // COMMON-NEXT: Total number of sites: 2 // COMMON-NEXT: Total number of sites with values: 2 @@ -76,11 +78,13 @@ // ICTEXT: :ir // ICTEXT: main // ICTEXT: # Func Hash: -// ICTEXT: 1124236338992350536 +// ICTEXT: 470088714870327456 // ICTEXT: # Num Counters: -// ICTEXT: 2 +// ICTEXT: 4 // ICTEXT: # Counter Values: // ICTEXT: 1000 +// ICTEXT: 1000 +// ICTEXT: 200 // ICTEXT: 1 // ICTEXT: # Num Value Kinds: // ICTEXT: 2 @@ -89,41 +93,98 @@ // ICTEXT: # NumValueSites: // ICTEXT: 2 // ICTEXT: 2 -// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii:750 -// ICTEXT: _ZN8Derived15func1Eii:250 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150 +// ICTEXT: _ZN8Derived14funcEii:50 // ICTEXT: 2 -// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii:750 -// ICTEXT: _ZN8Derived15func2Eii:250 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750 +// ICTEXT: _ZN8Derived1D0Ev:250 // ICTEXT: # ValueKind = IPVK_VTableTarget: // ICTEXT: 2 // ICTEXT: # NumValueSites: // ICTEXT: 2 // ICTEXT: 2 -// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750 -// ICTEXT: _ZTV8Derived1:250 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150 +// ICTEXT: _ZTV8Derived1:50 // ICTEXT: 2 // ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750 // ICTEXT: _ZTV8Derived1:250 +// Test indirect call promotion transformation using vtable profiles. +// - Build with `-g` to enable debug information. +// - In real world settings, ICP pass is disabled in prelink pipeline. In +// the postlink pipeline, ICP is enabled after whole-program-devirtualization +// pass. Do the same thing in this test. +// - Enable `-fwhole-program-vtables` generate type metadata and intrinsics. +// - Enable `-fno-split-lto-unit` and `-Wl,-lto-whole-program-visibility` to +// preserve type intrinsics for ICP pass. +// RUN: %clangxx -m64 -fprofile-use=test.profdata -Wl,--lto-whole-program-visibility \ +// RUN: -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -fuse-ld=lld \ +// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \ +// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \ +// RUN: -mllvm -enable-vtable-profile-use \ +// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \ +// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \ +// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP" + +// For the indirect call site `ptr->func` +// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E} +// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1} +// +// For the indirect call site `delete ptr` +// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E} +// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1} + +// The IR matchers for indirect callsite `ptr->func`. +// IR-LABEL: @main +// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei +// IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]] +// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16) +// IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]], +// +// IR: [[BB1]]: +// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii +// IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]] +// +// IR: [[BB2]]: +// IR: [[CMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV8Derived1, i32 16) +// IR: br i1 [[CMP2]], label %[[BB3:.*]], label %[[BB4:[a-zA-Z0-9_.]+]], +// +// IR: [[BB3]]: +// IR: [[RESBB3:%.*]] = {{.*}}call {{.*}} @_ZN8Derived14funcEii +// IR: br label %[[MERGE1:[a-zA-Z0-9_.]+]], +// +// IR: [[BB4]]: +// IR: [[FUNCPTR:%.*]] = load ptr, ptr [[VTABLE]] +// IR: [[RESBB4:%.*]] = {{.*}}call {{.*}} [[FUNCPTR]] +// IR: br label %[[MERGE1]] +// +// IR: [[MERGE1]]: +// IR: [[RES1:%.*]] = phi i32 [ [[RESBB4]], %[[BB4]] ], [ [[RESBB3]], %[[BB3]] ] +// IR: br label %[[MERGE0]] +// +// IR: [[MERGE0]]: +// IR: [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ] #include #include class Base { public: - virtual int func1(int a, int b) = 0; - virtual int func2(int a, int b) = 0; + virtual int func(int a, int b) = 0; + + virtual ~Base() {}; }; class Derived1 : public Base { public: - int func1(int a, int b) override { return a + b; } + int func(int a, int b) override { return a * b; } - int func2(int a, int b) override { return a * b; } + ~Derived1() {} }; namespace { class Derived2 : public Base { public: - int func1(int a, int b) override { return a - b; } + int func(int a, int b) override { return a * (a - b); } - int func2(int a, int b) override { return a * (a - b); } + ~Derived2() {} }; } // namespace __attribute__((noinline)) Base *createType(int a) { @@ -140,7 +201,10 @@ int main(int argc, char **argv) { int a = rand(); int b = rand(); Base *ptr = createType(i); - sum += ptr->func1(a, b) + ptr->func2(b, a); + if (i % 5 == 0) + sum += ptr->func(b, a); + + delete ptr; } printf("sum is %d\n", sum); return 0; diff --git a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h index e0e8a7cda9369..9c2be12fce2fb 100644 --- a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h +++ b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h @@ -57,7 +57,7 @@ class ICallPromotionAnalysis { /// /// The returned array space is owned by this class, and overwritten on /// subsequent calls. - ArrayRef getPromotionCandidatesForInstruction( + MutableArrayRef getPromotionCandidatesForInstruction( const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates); }; diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h index 66c972572b06c..6c424038070dc 100644 --- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h +++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h @@ -37,8 +37,10 @@ struct PGOIndirectCallVisitor : public InstVisitor { // A heuristic is used to find the address feeding instructions. static Instruction *tryGetVTableInstruction(CallBase *CB) { assert(CB != nullptr && "Caller guaranteed"); - LoadInst *LI = dyn_cast(CB->getCalledOperand()); + if (!CB->isIndirectCall()) + return nullptr; + LoadInst *LI = dyn_cast(CB->getCalledOperand()); if (LI != nullptr) { Value *FuncPtr = LI->getPointerOperand(); // GEP (or bitcast) Value *VTablePtr = FuncPtr->stripInBoundsConstantOffsets(); diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 7fa6d44990a14..50e6f1d3b9b1f 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -294,6 +294,8 @@ getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue = false); +// TODO: Unify metadata name 'PGOFuncName' and 'PGOName', by supporting read +// of this metadata for backward compatibility and generating 'PGOName' only. /// Extract the value profile data from \p Inst and returns them if \p Inst is /// annotated with value profile data. Returns an empty vector otherwise. SmallVector @@ -303,6 +305,8 @@ getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } +inline StringRef getPGONameMetadataName() { return "PGOName"; } + /// Return the PGOFuncName meta data associated with a function. MDNode *getPGOFuncNameMetadata(const Function &F); @@ -311,8 +315,14 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO = false); /// Create the PGOFuncName meta data if PGOFuncName is different from /// function's raw name. This should only apply to internal linkage functions /// declared by users only. +/// TODO: Update all callers to 'createPGONameMetadata' and deprecate this +/// function. void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); +/// Create the PGOName metadata if a global object's PGO name is different from +/// its mangled name. This should apply to local-linkage global objects only. +void createPGONameMetadata(GlobalObject &GO, StringRef PGOName); + /// Check if we can use Comdat for profile variables. This will eliminate /// the duplicated profile variables for Comdat functions. bool needsComdatForCounter(const GlobalObject &GV, const Module &M); diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index a71ab23a30902..f43666f0037b6 100644 --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -87,7 +87,7 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates( return I; } -ArrayRef +MutableArrayRef ICallPromotionAnalysis::getPromotionCandidatesForInstruction( const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates) { uint32_t NumVals; @@ -95,9 +95,9 @@ ICallPromotionAnalysis::getPromotionCandidatesForInstruction( MaxNumPromotions, NumVals, TotalCount); if (!Res) { NumCandidates = 0; - return ArrayRef(); + return MutableArrayRef(); } ValueDataArray = std::move(Res); NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount); - return ArrayRef(ValueDataArray.get(), NumVals); + return MutableArrayRef(ValueDataArray.get(), NumVals); } diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index c7749f33d9af5..9dbaa2ca0f020 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -228,6 +228,12 @@ cl::opt EnableVTableValueProfiling( "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison.")); +cl::opt EnableVTableProfileUse( + "enable-vtable-profile-use", cl::init(false), + cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " + "profiles will be used by ICP pass for more efficient indirect " + "call sequence. If false, type profiles won't be used.")); + std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo) { @@ -391,7 +397,7 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO) { // PGONameMetadata should be set by compiler at profile use time // and read by symtab creation to look up symbols corresponding to // a MD5 hash. - return getIRPGOObjectName(V, InLTO, /*PGONameMetadata=*/nullptr); + return getIRPGOObjectName(V, InLTO, V.getMetadata(getPGONameMetadataName())); } // See getIRPGOObjectName() for a discription of the format. @@ -480,8 +486,7 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { for (GlobalVariable &G : M.globals()) { if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) continue; - if (Error E = addVTableWithName( - G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr))) + if (Error E = addVTableWithName(G, getPGOName(G, InLTO))) return E; } @@ -1425,16 +1430,28 @@ MDNode *getPGOFuncNameMetadata(const Function &F) { return F.getMetadata(getPGOFuncNameMetadataName()); } -void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { - // Only for internal linkage functions. - if (PGOFuncName == F.getName()) - return; - // Don't create duplicated meta-data. - if (getPGOFuncNameMetadata(F)) +static void createPGONameMetadata(GlobalObject &GO, StringRef MetadataName, + StringRef PGOName) { + // Only for internal linkage functions or global variables. The name is not + // the same as PGO name for these global objects. + if (GO.getName() == PGOName) return; - LLVMContext &C = F.getContext(); - MDNode *N = MDNode::get(C, MDString::get(C, PGOFuncName)); - F.setMetadata(getPGOFuncNameMetadataName(), N); + + // Don't create duplicated metadata. + if (GO.getMetadata(MetadataName)) + return; + + LLVMContext &C = GO.getContext(); + MDNode *N = MDNode::get(C, MDString::get(C, PGOName)); + GO.setMetadata(MetadataName, N); +} + +void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { + return createPGONameMetadata(F, getPGOFuncNameMetadataName(), PGOFuncName); +} + +void createPGONameMetadata(GlobalObject &GO, StringRef PGOName) { + return createPGONameMetadata(GO, getPGONameMetadataName(), PGOName); } bool needsComdatForCounter(const GlobalObject &GO, const Module &M) { diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index fe9eaae9ac7ea..68f4544b82e1b 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -13,13 +13,16 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" @@ -40,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +55,12 @@ using namespace llvm; STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); +extern cl::opt MaxNumVTableAnnotations; + +namespace llvm { +extern cl::opt EnableVTableProfileUse; +} + // Command line option to disable indirect-call promotion with the default as // false. This is for debug purpose. static cl::opt DisableICP("disable-icp", cl::init(false), cl::Hidden, @@ -103,13 +113,196 @@ static cl::opt ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); +// Indirect call promotion pass will fall back to function-based comparison if +// vtable-count / function-count is smaller than this threshold. +static cl::opt ICPVTablePercentageThreshold( + "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden, + cl::desc("The percentage threshold of vtable-count / function-count for " + "cost-benefit analysis.")); + +// Although comparing vtables can save a vtable load, we may need to compare +// vtable pointer with multiple vtable address points due to class inheritance. +// Comparing with multiple vtables inserts additional instructions on hot code +// path, and doing so for an earlier candidate delays the comparisons for later +// candidates. For the last candidate, only the fallback path is affected. +// We allow multiple vtable comparison for the last function candidate and use +// the option below to cap the number of vtables. +static cl::opt ICPMaxNumVTableLastCandidate( + "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, + cl::desc("The maximum number of vtable for the last candidate.")); + namespace { +// The key is a vtable global variable, and the value is a map. +// In the inner map, the key represents address point offsets and the value is a +// constant for this address point. +using VTableAddressPointOffsetValMap = + SmallDenseMap>; + +// A struct to collect type information for a virtual call site. +struct VirtualCallSiteInfo { + // The offset from the address point to virtual function in the vtable. + uint64_t FunctionOffset; + // The instruction that computes the address point of vtable. + Instruction *VPtr; + // The compatible type used in LLVM type intrinsics. + StringRef CompatibleTypeStr; +}; + +// The key is a virtual call, and value is its type information. +using VirtualCallSiteTypeInfoMap = + SmallDenseMap; + +// The key is vtable GUID, and value is its value profile count. +using VTableGUIDCountsMap = SmallDenseMap; + +// Return the address point offset of the given compatible type. +// +// Type metadata of a vtable specifies the types that can contain a pointer to +// this vtable, for example, `Base*` can be a pointer to an derived type +// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html +static std::optional +getAddressPointOffset(const GlobalVariable &VTableVar, + StringRef CompatibleType) { + SmallVector Types; + VTableVar.getMetadata(LLVMContext::MD_type, Types); + + for (MDNode *Type : Types) + if (auto *TypeId = dyn_cast(Type->getOperand(1).get()); + TypeId && TypeId->getString() == CompatibleType) + return cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + return std::nullopt; +} + +// Return a constant representing the vtable's address point specified by the +// offset. +static Constant *getVTableAddressPointOffset(GlobalVariable *VTable, + uint32_t AddressPointOffset) { + Module &M = *VTable->getParent(); + LLVMContext &Context = M.getContext(); + assert(AddressPointOffset < + M.getDataLayout().getTypeAllocSize(VTable->getValueType()) && + "Out-of-bound access"); + + return ConstantExpr::getInBoundsGetElementPtr( + Type::getInt8Ty(Context), VTable, + llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset)); +} + +// Return the basic block in which Use `U` is used via its `UserInst`. +static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) { + if (PHINode *PN = dyn_cast(UserInst)) + return PN->getIncomingBlock(U); + + return UserInst->getParent(); +} + +// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users +// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()` +// is the sole predecessor of `DestBB` and `DestBB` is dominated by +// `Inst->getParent()`. +static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) { + // 'BB' is used only by assert. + [[maybe_unused]] BasicBlock *BB = Inst->getParent(); + + assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 && + DestBB->getUniquePredecessor() == BB && + "Guaranteed by ICP transformation"); + + BasicBlock *UserBB = nullptr; + for (Use &Use : Inst->uses()) { + User *User = Use.getUser(); + // Do checked cast since IR verifier guarantees that the user of an + // instruction must be an instruction. See `Verifier::visitInstruction`. + Instruction *UserInst = cast(User); + // We can sink debug or pseudo instructions together with Inst. + if (UserInst->isDebugOrPseudoInst()) + continue; + UserBB = getUserBasicBlock(Use, UserInst); + // Do not sink if Inst is used in a basic block that is not DestBB. + // TODO: Sink to the common dominator of all user blocks. + if (UserBB != DestBB) + return false; + } + return UserBB != nullptr; +} + +// For the virtual call dispatch sequence, try to sink vtable load instructions +// to the cold indirect call fallback. +// FIXME: Move the sink eligibility check below to a utility function in +// Transforms/Utils/ directory. +static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { + if (!isDestBBSuitableForSink(I, DestBlock)) + return false; + + // Do not move control-flow-involving, volatile loads, vaarg, alloca + // instructions, etc. + if (isa(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || + isa(I)) + return false; + + // Do not sink convergent call instructions. + if (const auto *C = dyn_cast(I)) + if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent()) + return false; + + // Do not move an instruction that may write to memory. + if (I->mayWriteToMemory()) + return false; + + // We can only sink load instructions if there is nothing between the load and + // the end of block that could change the value. + if (I->mayReadFromMemory()) { + // We already know that SrcBlock is the unique predecessor of DestBlock. + for (BasicBlock::iterator Scan = std::next(I->getIterator()), + E = I->getParent()->end(); + Scan != E; ++Scan) { + // Note analysis analysis can tell whether two pointers can point to the + // same object in memory or not thereby find further opportunities to + // sink. + if (Scan->mayWriteToMemory()) + return false; + } + } + + BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); + I->moveBefore(*DestBlock, InsertPos); + + // TODO: Sink debug intrinsic users of I to 'DestBlock'. + // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and + // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have + // the core logic to do this. + return true; +} + +// Try to sink instructions after VPtr to the indirect call fallback. +// Return the number of sunk IR instructions. +static int tryToSinkInstructions(BasicBlock *OriginalBB, + BasicBlock *IndirectCallBB) { + int SinkCount = 0; + // Do not sink across a critical edge for simplicity. + if (IndirectCallBB->getUniquePredecessor() != OriginalBB) + return SinkCount; + // Sink all eligible instructions in OriginalBB in reverse order. + for (Instruction &I : + llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB)))) + if (tryToSinkInstruction(&I, IndirectCallBB)) + SinkCount++; + + return SinkCount; +} + // Promote indirect calls to conditional direct calls, keeping track of // thresholds. class IndirectCallPromoter { private: Function &F; + Module &M; + + ProfileSummaryInfo *PSI = nullptr; // Symtab that maps indirect call profile values to function names and // defines. @@ -117,6 +310,11 @@ class IndirectCallPromoter { const bool SamplePGO; + // A map from a virtual call to its type information. + const VirtualCallSiteTypeInfoMap &VirtualCSInfo; + + VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal; + OptimizationRemarkEmitter &ORE; // A struct that records the direct target and it's call count. @@ -124,6 +322,16 @@ class IndirectCallPromoter { Function *const TargetFunction; const uint64_t Count; + // The following fields only exists for promotion candidates with vtable + // information. + // + // Due to class inheritance, one virtual call candidate can come from + // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and + // counts for 'TargetFunction'. `AddressPoints` stores the vtable address + // points for comparison. + VTableGUIDCountsMap VTableGUIDAndCounts; + SmallVector AddressPoints; + PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} }; @@ -137,18 +345,60 @@ class IndirectCallPromoter { uint64_t TotalCount, uint32_t NumCandidates); // Promote a list of targets for one indirect-call callsite by comparing - // indirect callee with functions. Returns true if there are IR + // indirect callee with functions. Return true if there are IR // transformations and false otherwise. - bool tryToPromoteWithFuncCmp(CallBase &CB, + bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr, ArrayRef Candidates, uint64_t TotalCount, ArrayRef ICallProfDataRef, - uint32_t NumCandidates); + uint32_t NumCandidates, + VTableGUIDCountsMap &VTableGUIDCounts); + + // Promote a list of targets for one indirect call by comparing vtables with + // functions. Return true if there are IR transformations and false + // otherwise. + bool tryToPromoteWithVTableCmp( + CallBase &CB, Instruction *VPtr, + const std::vector &Candidates, + uint64_t TotalFuncCount, uint32_t NumCandidates, + MutableArrayRef ICallProfDataRef, + VTableGUIDCountsMap &VTableGUIDCounts); + + // Return true if it's profitable to compare vtables for the callsite. + bool isProfitableToCompareVTables( + const CallBase &CB, const std::vector &Candidates, + uint64_t TotalCount); + + // Given an indirect callsite and the list of function candidates, compute + // the following vtable information in output parameters and return vtable + // pointer if type profiles exist. + // - Populate `VTableGUIDCounts` with using !prof + // metadata attached on the vtable pointer. + // - For each function candidate, finds out the vtables from which it gets + // called and stores the in promotion candidate. + Instruction *computeVTableInfos(const CallBase *CB, + VTableGUIDCountsMap &VTableGUIDCounts, + std::vector &Candidates); + + Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV, + uint64_t AddressPointOffset); + + void updateFuncValueProfiles(CallBase &CB, ArrayRef VDs, + uint64_t Sum, uint32_t MaxMDCount); + + void updateVPtrValueProfiles(Instruction *VPtr, + VTableGUIDCountsMap &VTableGUIDCounts); public: - IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO, - OptimizationRemarkEmitter &ORE) - : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} + IndirectCallPromoter( + Function &Func, Module &M, ProfileSummaryInfo *PSI, + InstrProfSymtab *Symtab, bool SamplePGO, + const VirtualCallSiteTypeInfoMap &VirtualCSInfo, + VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal, + OptimizationRemarkEmitter &ORE) + : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO), + VirtualCSInfo(VirtualCSInfo), + VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {} IndirectCallPromoter(const IndirectCallPromoter &) = delete; IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; @@ -244,25 +494,127 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite( return Ret; } +Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar( + GlobalVariable *GV, uint64_t AddressPointOffset) { + auto [Iter, Inserted] = + VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr); + if (Inserted) + Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset); + return Iter->second; +} + +Instruction *IndirectCallPromoter::computeVTableInfos( + const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap, + std::vector &Candidates) { + if (!EnableVTableProfileUse) + return nullptr; + + // Take the following code sequence as an example, here is how the code works + // @vtable1 = {[n x ptr] [... ptr @func1]} + // @vtable2 = {[m x ptr] [... ptr @func2]} + // + // %vptr = load ptr, ptr %d, !prof !0 + // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1") + // tail call void @llvm.assume(i1 %0) + // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1 + // %1 = load ptr, ptr %vfn + // call void %1(ptr %d), !prof !1 + // + // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50} + // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50} + // + // Step 1. Find out the %vptr instruction for indirect call and use its !prof + // to populate `GUIDCountsMap`. + // Step 2. For each vtable-guid, look up its definition from symtab. LTO can + // make vtable definitions visible across modules. + // Step 3. Compute the byte offset of the virtual call, by adding vtable + // address point offset and function's offset relative to vtable address + // point. For each function candidate, this step tells us the vtable from + // which it comes from, and the vtable address point to compare %vptr with. + + // Only virtual calls have virtual call site info. + auto Iter = VirtualCSInfo.find(CB); + if (Iter == VirtualCSInfo.end()) + return nullptr; + + LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #" + << NumOfPGOICallsites << "\n"); + + const auto &VirtualCallInfo = Iter->second; + Instruction *VPtr = VirtualCallInfo.VPtr; + + SmallDenseMap CalleeIndexMap; + for (size_t I = 0; I < Candidates.size(); I++) + CalleeIndexMap[Candidates[I].TargetFunction] = I; + + uint32_t ActualNumValueData = 0; + uint64_t TotalVTableCount = 0; + auto VTableValueDataArray = getValueProfDataFromInst( + *VirtualCallInfo.VPtr, IPVK_VTableTarget, MaxNumVTableAnnotations, + ActualNumValueData, TotalVTableCount); + if (VTableValueDataArray.get() == nullptr) + return VPtr; + + // Compute the functions and counts from by each vtable. + for (size_t j = 0; j < ActualNumValueData; j++) { + uint64_t VTableVal = VTableValueDataArray[j].Value; + GUIDCountsMap[VTableVal] = VTableValueDataArray[j].Count; + GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal); + if (!VTableVar) { + LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal + << "; maybe the vtable isn't imported\n"); + continue; + } + + std::optional MaybeAddressPointOffset = + getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr); + if (!MaybeAddressPointOffset) + continue; + + const uint64_t AddressPointOffset = *MaybeAddressPointOffset; + + Function *Callee = nullptr; + std::tie(Callee, std::ignore) = getFunctionAtVTableOffset( + VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M); + if (!Callee) + continue; + auto CalleeIndexIter = CalleeIndexMap.find(Callee); + if (CalleeIndexIter == CalleeIndexMap.end()) + continue; + + auto &Candidate = Candidates[CalleeIndexIter->second]; + // There shouldn't be duplicate GUIDs in one !prof metadata (except + // duplicated zeros), so assign counters directly won't cause overwrite or + // counter loss. + Candidate.VTableGUIDAndCounts[VTableVal] = VTableValueDataArray[j].Count; + Candidate.AddressPoints.push_back( + getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset)); + } + + return VPtr; +} + +// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight. +// Scales uint64_t counters down to uint32_t if necessary to prevent overflow. +static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, + uint64_t FalseWeight) { + MDBuilder MDB(Context); + uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight)); + return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale), + scaleBranchCount(FalseWeight, Scale)); +} + CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE) { + CallBase &NewInst = promoteCallWithIfThenElse( + CB, DirectCallee, + createBranchWeights(CB.getContext(), Count, TotalCount - Count)); - uint64_t ElseCount = TotalCount - Count; - uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); - uint64_t Scale = calculateCountScale(MaxCount); - MDBuilder MDB(CB.getContext()); - MDNode *BranchWeights = MDB.createBranchWeights( - scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); - - CallBase &NewInst = - promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights); - - if (AttachProfToDirectCall) { + if (AttachProfToDirectCall) setBranchWeights(NewInst, {static_cast(Count)}, /*IsExpected=*/false); - } using namespace ore; @@ -278,34 +630,175 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, // Promote indirect-call to conditional direct-call for one callsite. bool IndirectCallPromoter::tryToPromoteWithFuncCmp( - CallBase &CB, ArrayRef Candidates, uint64_t TotalCount, - ArrayRef ICallProfDataRef, uint32_t NumCandidates) { + CallBase &CB, Instruction *VPtr, ArrayRef Candidates, + uint64_t TotalCount, ArrayRef ICallProfDataRef, + uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) { uint32_t NumPromoted = 0; for (const auto &C : Candidates) { - uint64_t Count = C.Count; - pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO, - &ORE); - assert(TotalCount >= Count); - TotalCount -= Count; + uint64_t FuncCount = C.Count; + pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount, + SamplePGO, &ORE); + assert(TotalCount >= FuncCount); + TotalCount -= FuncCount; NumOfPGOICallPromotion++; NumPromoted++; - } + if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty()) + continue; + + // After a virtual call candidate gets promoted, update the vtable's counts + // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents + // a vtable from which the virtual call is loaded. Compute the sum and use + // 128-bit APInt to improve accuracy. + uint64_t SumVTableCount = 0; + for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) + SumVTableCount += VTableCount; + + for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) { + APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/); + APFuncCount *= VTableCount; + VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue(); + } + } if (NumPromoted == 0) return false; - // Adjust the MD.prof metadata. First delete the old one. - CB.setMetadata(LLVMContext::MD_prof, nullptr); - assert(NumPromoted <= ICallProfDataRef.size() && "Number of promoted functions should not be greater than the number " "of values in profile metadata"); + + // Update value profiles on the indirect call. + updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount, + NumCandidates); + updateVPtrValueProfiles(VPtr, VTableGUIDCounts); + return true; +} + +void IndirectCallPromoter::updateFuncValueProfiles( + CallBase &CB, ArrayRef CallVDs, uint64_t TotalCount, + uint32_t MaxMDCount) { + // First clear the existing !prof. + CB.setMetadata(LLVMContext::MD_prof, nullptr); // Annotate the remaining value profiles if counter is not zero. if (TotalCount != 0) - annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted), - TotalCount, IPVK_IndirectCallTarget, NumCandidates); + annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget, + MaxMDCount); +} + +void IndirectCallPromoter::updateVPtrValueProfiles( + Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) { + if (!EnableVTableProfileUse || VPtr == nullptr || + !VPtr->getMetadata(LLVMContext::MD_prof)) + return; + VPtr->setMetadata(LLVMContext::MD_prof, nullptr); + std::vector VTableValueProfiles; + uint64_t TotalVTableCount = 0; + for (auto [GUID, Count] : VTableGUIDCounts) { + if (Count == 0) + continue; + + VTableValueProfiles.push_back({GUID, Count}); + TotalVTableCount += Count; + } + llvm::sort(VTableValueProfiles, + [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) { + return LHS.Count > RHS.Count; + }); + + annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount, + IPVK_VTableTarget, VTableValueProfiles.size()); +} + +bool IndirectCallPromoter::tryToPromoteWithVTableCmp( + CallBase &CB, Instruction *VPtr, + const std::vector &Candidates, uint64_t TotalFuncCount, + uint32_t NumCandidates, + MutableArrayRef ICallProfDataRef, + VTableGUIDCountsMap &VTableGUIDCounts) { + SmallVector PromotedFuncCount; + + for (const auto &Candidate : Candidates) { + for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) + VTableGUIDCounts[GUID] -= Count; + + // 'OriginalBB' is the basic block of indirect call. After each candidate + // is promoted, a new basic block is created for the indirect fallback basic + // block and indirect call `CB` is moved into this new BB. + BasicBlock *OriginalBB = CB.getParent(); + promoteCallWithVTableCmp( + CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints, + createBranchWeights(CB.getContext(), Candidate.Count, + TotalFuncCount - Candidate.Count)); + + int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent()); + + ORE.emit([&]() { + OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB); + + const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; + Remark << "Promote indirect call to " + << ore::NV("DirectCallee", Candidate.TargetFunction) + << " with count " << ore::NV("Count", Candidate.Count) + << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink " + << ore::NV("SinkCount", SinkCount) + << " instruction(s) and compare " + << ore::NV("VTable", VTableGUIDAndCounts.size()) + << " vtable(s): {"; + + // Sort GUIDs so remark message is deterministic. + std::set GUIDSet; + for (auto [GUID, Count] : VTableGUIDAndCounts) + GUIDSet.insert(GUID); + for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) { + if (Iter != GUIDSet.begin()) + Remark << ", "; + Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter)); + } + + Remark << "}"; + + return Remark; + }); + + PromotedFuncCount.push_back(Candidate.Count); + + assert(TotalFuncCount >= Candidate.Count && + "Within one prof metadata, total count is the sum of counts from " + "individual pairs"); + // Use std::min since 'TotalFuncCount' is the saturated sum of individual + // counts, see + // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288 + TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count); + NumOfPGOICallPromotion++; + } + if (PromotedFuncCount.empty()) + return false; + + // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a + // a distinct 'VPtr'. + // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be + // used to load multiple virtual functions. The vtable profiles needs to be + // updated properly in that case (e.g, for each indirect call annotate both + // type profiles and function profiles in one !prof). + for (size_t I = 0; I < PromotedFuncCount.size(); I++) + ICallProfDataRef[I].Count -= + std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count); + // Sort value profiles by count in descending order. + llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS, + const InstrProfValueData &RHS) { + return LHS.Count > RHS.Count; + }); + // Drop the pair if count is zero. + ArrayRef VDs( + ICallProfDataRef.begin(), + llvm::upper_bound(ICallProfDataRef, 0U, + [](uint64_t Count, const InstrProfValueData &ProfData) { + return ProfData.Count <= Count; + })); + updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates); + updateVPtrValueProfiles(VPtr, VTableGUIDCounts); return true; } @@ -322,14 +815,151 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { if (!NumCandidates || (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) continue; + auto PromotionCandidates = getPromotionCandidatesForCallSite( *CB, ICallProfDataRef, TotalCount, NumCandidates); - Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount, - ICallProfDataRef, NumCandidates); + + VTableGUIDCountsMap VTableGUIDCounts; + Instruction *VPtr = + computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates); + + if (isProfitableToCompareVTables(*CB, PromotionCandidates, TotalCount)) + Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates, + TotalCount, NumCandidates, + ICallProfDataRef, VTableGUIDCounts); + else + Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates, + TotalCount, ICallProfDataRef, + NumCandidates, VTableGUIDCounts); } return Changed; } +// TODO: Return false if the function addressing and vtable load instructions +// cannot sink to indirect fallback. +bool IndirectCallPromoter::isProfitableToCompareVTables( + const CallBase &CB, const std::vector &Candidates, + uint64_t TotalCount) { + if (!EnableVTableProfileUse || Candidates.empty()) + return false; + LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #" + << NumOfPGOICallsites << CB << "\n"); + uint64_t RemainingVTableCount = TotalCount; + const size_t CandidateSize = Candidates.size(); + for (size_t I = 0; I < CandidateSize; I++) { + auto &Candidate = Candidates[I]; + auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; + + LLVM_DEBUG(dbgs() << " Candidate " << I << " FunctionCount: " + << Candidate.Count << ", VTableCounts:"); + // Add [[maybe_unused]] since are only used by LLVM_DEBUG. + for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts) + LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() + << ", " << Count << "}"); + LLVM_DEBUG(dbgs() << "\n"); + + uint64_t CandidateVTableCount = 0; + for (auto &[GUID, Count] : VTableGUIDAndCounts) + CandidateVTableCount += Count; + + if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) { + LLVM_DEBUG( + dbgs() << " function count " << Candidate.Count + << " and its vtable sum count " << CandidateVTableCount + << " have discrepancies. Bail out vtable comparison.\n"); + return false; + } + + RemainingVTableCount -= Candidate.Count; + + // 'MaxNumVTable' limits the number of vtables to make vtable comparison + // profitable. Comparing multiple vtables for one function candidate will + // insert additional instructions on the hot path, and allowing more than + // one vtable for non last candidates may or may not elongate the dependency + // chain for the subsequent candidates. Set its value to 1 for non-last + // candidate and allow option to override it for the last candidate. + int MaxNumVTable = 1; + if (I == CandidateSize - 1) + MaxNumVTable = ICPMaxNumVTableLastCandidate; + + if ((int)Candidate.AddressPoints.size() > MaxNumVTable) { + LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got " + << Candidate.AddressPoints.size() + << " vtables. Bail out for vtable comparison.\n"); + return false; + } + } + + // If the indirect fallback is not cold, don't compare vtables. + if (PSI && PSI->hasProfileSummary() && + !PSI->isColdCount(RemainingVTableCount)) { + LLVM_DEBUG(dbgs() << " Indirect fallback basic block is not cold. Bail " + "out for vtable comparison.\n"); + return false; + } + + return true; +} + +// For virtual calls in the module, collect per-callsite information which will +// be used to associate an ICP candidate with a vtable and a specific function +// in the vtable. With type intrinsics (llvm.type.test), we can find virtual +// calls in a compile-time efficient manner (by iterating its users) and more +// importantly use the compatible type later to figure out the function byte +// offset relative to the start of vtables. +static void +computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, + VirtualCallSiteTypeInfoMap &VirtualCSInfo) { + // Right now only llvm.type.test is used to find out virtual call sites. + // With ThinLTO and whole-program-devirtualization, llvm.type.test and + // llvm.public.type.test are emitted, and llvm.public.type.test is either + // refined to llvm.type.test or dropped before indirect-call-promotion pass. + // + // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted. + // Find out virtual calls by looking at users of llvm.type.checked.load in + // that case. + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + if (!TypeTestFunc || TypeTestFunc->use_empty()) + return; + + auto &FAM = MAM.getResult(M).getManager(); + auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult(F); + }; + // Iterate all type.test calls to find all indirect calls. + for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { + auto *CI = dyn_cast(U.getUser()); + if (!CI) + continue; + auto *TypeMDVal = cast(CI->getArgOperand(1)); + if (!TypeMDVal) + continue; + auto *CompatibleTypeId = dyn_cast(TypeMDVal->getMetadata()); + if (!CompatibleTypeId) + continue; + + // Find out all devirtualizable call sites given a llvm.type.test + // intrinsic call. + SmallVector DevirtCalls; + SmallVector Assumes; + auto &DT = LookupDomTree(*CI->getFunction()); + findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); + + for (auto &DevirtCall : DevirtCalls) { + CallBase &CB = DevirtCall.CB; + // Given an indirect call, try find the instruction which loads a + // pointer to virtual table. + Instruction *VTablePtr = + PGOIndirectCallVisitor::tryGetVTableInstruction(&CB); + if (!VTablePtr) + continue; + VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr, + CompatibleTypeId->getString()}; + } + } +} + // A wrapper function that does the actual work. static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, bool SamplePGO, ModuleAnalysisManager &MAM) { @@ -342,6 +972,20 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, return false; } bool Changed = false; + VirtualCallSiteTypeInfoMap VirtualCSInfo; + + if (EnableVTableProfileUse) + computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo); + + // VTableAddressPointOffsetVal stores the vtable address points. The vtable + // address point of a given is static (doesn't + // change after being computed once). + // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map + // entry the first time a pair is seen, as + // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter + // repeatedly on each function. + VTableAddressPointOffsetValMap VTableAddressPointOffsetVal; + for (auto &F : M) { if (F.isDeclaration() || F.hasOptNone()) continue; @@ -350,7 +994,9 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, MAM.getResult(M).getManager(); auto &ORE = FAM.getResult(F); - IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE); + IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO, + VirtualCSInfo, + VTableAddressPointOffsetVal, ORE); bool FuncChanged = CallPromoter.processFunction(PSI); if (ICPDUMPAFTER && FuncChanged) { LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 572d37a2b3e55..d10b58b17f42f 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -320,6 +320,8 @@ static cl::opt PGOFunctionCriticalEdgeThreshold( cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold.")); +extern cl::opt MaxNumVTableAnnotations; + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -332,6 +334,7 @@ extern cl::opt ViewBlockFreqFuncName; // Command line option to enable vtable value profiling. Defined in // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= extern cl::opt EnableVTableValueProfiling; +extern cl::opt EnableVTableProfileUse; extern cl::opt ProfileCorrelate; } // namespace llvm @@ -1728,6 +1731,14 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { llvm_unreachable("Unknown visiting mode"); } +static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) { + if (ValueProfKind == IPVK_MemOPSize) + return MaxNumMemOPAnnotations; + if (ValueProfKind == llvm::IPVK_VTableTarget) + return MaxNumVTableAnnotations; + return MaxNumAnnotations; +} + // Traverse all valuesites and annotate the instructions for all value kind. void PGOUseFunc::annotateValueSites() { if (isValueProfilingDisabled()) @@ -1762,10 +1773,10 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) { LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind << "): Index = " << ValueSiteIndex << " out of " << NumValueSites << "\n"); - annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, - static_cast(Kind), ValueSiteIndex, - Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations - : MaxNumAnnotations); + annotateValueSite( + *M, *I.AnnotatedInst, ProfileRecord, + static_cast(Kind), ValueSiteIndex, + getMaxNumAnnotations(static_cast(Kind))); ValueSiteIndex++; } } @@ -2054,6 +2065,16 @@ static bool annotateAllFunctions( return false; } + if (EnableVTableProfileUse) { + for (GlobalVariable &G : M.globals()) { + if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) + continue; + + // Create the PGOFuncName meta data. + createPGONameMetadata(G, getPGOName(G, false /* InLTO*/)); + } + } + // Add the profile summary (read from the header of the indexed summary) here // so that we can use it below when reading counters (which checks if the // function should be marked with a cold or inlinehint attribute). @@ -2229,7 +2250,6 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, }; auto *PSI = &MAM.getResult(M); - if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f2130e4c286aa..0725addfbb90a 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" @@ -56,6 +57,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -1976,16 +1978,28 @@ void llvm::updateProfileCallee( ? 0 : PriorEntryCount + EntryDelta; + auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount, + const uint64_t PriorEntryCount) { + Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB); + if (VPtr) + scaleProfData(*VPtr, NewEntryCount, PriorEntryCount); + }; + // During inlining ? if (VMap) { uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; for (auto Entry : *VMap) { if (isa(Entry.first)) - if (auto *CI = dyn_cast_or_null(Entry.second)) + if (auto *CI = dyn_cast_or_null(Entry.second)) { CI->updateProfWeight(CloneEntryCount, PriorEntryCount); + updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount); + } + if (isa(Entry.first)) - if (auto *II = dyn_cast_or_null(Entry.second)) + if (auto *II = dyn_cast_or_null(Entry.second)) { II->updateProfWeight(CloneEntryCount, PriorEntryCount); + updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount); + } } } @@ -1996,10 +2010,14 @@ void llvm::updateProfileCallee( // No need to update the callsite if it is pruned during inlining. if (!VMap || VMap->count(&BB)) for (Instruction &I : BB) { - if (CallInst *CI = dyn_cast(&I)) + if (CallInst *CI = dyn_cast(&I)) { CI->updateProfWeight(NewEntryCount, PriorEntryCount); - if (InvokeInst *II = dyn_cast(&I)) + updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount); + } + if (InvokeInst *II = dyn_cast(&I)) { II->updateProfWeight(NewEntryCount, PriorEntryCount); + updateVTableProfWeight(II, NewEntryCount, PriorEntryCount); + } } } } diff --git a/llvm/test/Transforms/Inline/update_invoke_prof.ll b/llvm/test/Transforms/Inline/update_invoke_prof.ll index f6b86dfe5bb1b..12eb7dbf418c5 100644 --- a/llvm/test/Transforms/Inline/update_invoke_prof.ll +++ b/llvm/test/Transforms/Inline/update_invoke_prof.ll @@ -1,6 +1,7 @@ -; Test that branch weights and value profiles associated with invoke are updated -; in both caller and callee after inline, but invoke instructions with taken or -; not taken branch probabilities are not updated. +; Tests that instructions with value profiles and count-type branch weights are +; updated in both caller and callee after inline, but invoke instructions with +; taken or not taken branch probabilities are not updated. + ; RUN: opt < %s -passes='require,cgscc(inline)' -S | FileCheck %s declare i32 @__gxx_personality_v0(...) @@ -15,21 +16,23 @@ declare void @callee1(ptr %func) declare void @callee2(ptr %func) -define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 { +define void @callee(ptr %obj) personality ptr @__gxx_personality_v0 !prof !17 { + %vtable = load ptr, ptr %obj, !prof !21 + %func = load ptr, ptr %vtable invoke void %func() - to label %next unwind label %lpad, !prof !18 + to label %next unwind label %lpad, !prof !18 next: invoke void @callee1(ptr %func) - to label %cont unwind label %lpad, !prof !19 + to label %cont unwind label %lpad, !prof !19 cont: invoke void @callee2(ptr %func) - to label %ret unwind label %lpad, !prof !20 + to label %ret unwind label %lpad, !prof !20 lpad: %exn = landingpad {ptr, i32} - cleanup + cleanup unreachable ret: @@ -57,26 +60,41 @@ ret: !18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600} !19 = !{!"branch_weights", i32 1500} !20 = !{!"branch_weights", i32 1234, i32 5678} +!21 = !{!"VP", i32 2, i64 1500, i64 789, i64 900, i64 321, i64 600} -; CHECK-LABEL: @caller( -; CHECK: invoke void %func( -; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]] -; CHECK: invoke void @callee1( -; CHECK-NEXT: {{.*}} !prof ![[PROF2:[0-9]+]] -; CHECK: invoke void @callee2( -; CHECK-NEXT: {{.*}} !prof ![[PROF3:[0-9]+]] - -; CHECK-LABL: @callee( -; CHECK: invoke void %func( -; CHECK-NEXT: {{.*}} !prof ![[PROF4:[0-9]+]] -; CHECK: invoke void @callee1( -; CHECK-NEXT: {{.*}} !prof ![[PROF5:[0-9]+]] -; CHECK: invoke void @callee2( -; CHECK-NEXT: {{.*}} !prof ![[PROF3]] +; CHECK-LABEL: define void @caller( +; CHECK-SAME: ptr [[FUNC:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF14:![0-9]+]] { +; CHECK-NEXT: [[VTABLE_I:%.*]] = load ptr, ptr [[FUNC]], align 8, !prof [[PROF15:![0-9]+]] +; CHECK-NEXT: [[FUNC_I:%.*]] = load ptr, ptr [[VTABLE_I]], align 8 +; CHECK-NEXT: invoke void [[FUNC_I]]() +; CHECK-NEXT: to label %[[NEXT_I:.*]] unwind label %[[LPAD_I:.*]], !prof [[PROF16:![0-9]+]] +; CHECK: [[NEXT_I]]: +; CHECK-NEXT: invoke void @callee1(ptr [[FUNC_I]]) +; CHECK-NEXT: to label %[[CONT_I:.*]] unwind label %[[LPAD_I]], !prof [[PROF17:![0-9]+]] +; CHECK: [[CONT_I]]: +; CHECK-NEXT: invoke void @callee2(ptr [[FUNC_I]]) +; CHECK-NEXT: to label %[[CALLEE_EXIT:.*]] unwind label %[[LPAD_I]], !prof [[PROF18:![0-9]+]] +; +; CHECK-LABEL: define void @callee( +; CHECK-SAME: ptr [[OBJ:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF19:![0-9]+]] { +; CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]], align 8, !prof [[PROF20:![0-9]+]] +; CHECK-NEXT: [[FUNC:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; CHECK-NEXT: invoke void [[FUNC]]() +; CHECK-NEXT: to label %[[NEXT:.*]] unwind label %[[LPAD:.*]], !prof [[PROF21:![0-9]+]] +; CHECK: [[NEXT]]: +; CHECK-NEXT: invoke void @callee1(ptr [[FUNC]]) +; CHECK-NEXT: to label %[[CONT:.*]] unwind label %[[LPAD]], !prof [[PROF22:![0-9]+]] +; CHECK: [[CONT]]: +; CHECK-NEXT: invoke void @callee2(ptr [[FUNC]]) +; CHECK-NEXT: to label %[[RET:.*]] unwind label %[[LPAD]], !prof [[PROF18]] -; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 1000, i64 123, i64 600, i64 456, i64 400} -; CHECK: ![[PROF2]] = !{!"branch_weights", i32 1000} -; CHECK: ![[PROF3]] = !{!"branch_weights", i32 1234, i32 5678} -; CHECK: ![[PROF4]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200} -; CHECK: ![[PROF5]] = !{!"branch_weights", i32 500} +; CHECK: [[PROF14]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF15]] = !{!"VP", i32 2, i64 1000, i64 789, i64 600, i64 321, i64 400} +; CHECK: [[PROF16]] = !{!"VP", i32 0, i64 1000, i64 123, i64 600, i64 456, i64 400} +; CHECK: [[PROF17]] = !{!"branch_weights", i32 1000} +; CHECK: [[PROF18]] = !{!"branch_weights", i32 1234, i32 5678} +; CHECK: [[PROF19]] = !{!"function_entry_count", i64 500} +; CHECK: [[PROF20]] = !{!"VP", i32 2, i64 500, i64 789, i64 300, i64 321, i64 200} +; CHECK: [[PROF21]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200} +; CHECK: [[PROF22]] = !{!"branch_weights", i32 500} diff --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll index daa95e93b68ec..96aa35fb572de 100644 --- a/llvm/test/Transforms/Inline/update_value_profile.ll +++ b/llvm/test/Transforms/Inline/update_value_profile.ll @@ -2,33 +2,33 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; When 'callee' is inlined into caller1 and caller2, the indirect call value -; profiles of the inlined copy should be scaled based on callers' profiles, -; and the indirect call value profiles in 'callee' should be updated. -define i32 @callee(ptr %0, i32 %1) !prof !20 { +; When 'callee' is inlined into caller1 and caller2, the indirect call and vtable +; value profiles of the inlined copy should be scaled based on callers' profiles. +; The indirect call and vtable value profiles in 'callee' should be updated. +define i32 @callee(ptr %0, i32 %1) !prof !19 { ; CHECK-LABEL: define i32 @callee( ; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] { -; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF1:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF2:![0-9]+]] ; CHECK-NEXT: ret i32 [[TMP6]] ; - %3 = load ptr, ptr %0 + %3 = load ptr, ptr %0, !prof !15 %5 = getelementptr inbounds i8, ptr %3, i64 8 %6 = load ptr, ptr %5 - %7 = tail call i32 %6(ptr %0, i32 %1), !prof !17 + %7 = tail call i32 %6(ptr %0, i32 %1), !prof !16 ret i32 %7 } -define i32 @caller1(i32 %0) !prof !18 { +define i32 @caller1(i32 %0) !prof !17 { ; CHECK-LABEL: define i32 @caller1( -; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF2:![0-9]+]] { +; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF3:![0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]]) -; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF4:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF3:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]] ; CHECK-NEXT: ret i32 [[TMP6]] ; %2 = tail call ptr @_Z10createTypei(i32 %0) @@ -36,14 +36,14 @@ define i32 @caller1(i32 %0) !prof !18 { ret i32 %3 } -define i32 @caller2(i32 %0) !prof !19 { +define i32 @caller2(i32 %0) !prof !18 { ; CHECK-LABEL: define i32 @caller2( -; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF4:![0-9]+]] { +; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF6:![0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]]) -; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF7:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF8:![0-9]+]] ; CHECK-NEXT: ret i32 [[TMP6]] ; %2 = tail call ptr @_Z10createTypei(i32 %0) @@ -67,15 +67,19 @@ declare ptr @_Z10createTypei(i32) !12 = !{i32 10000, i64 100, i32 1} !13 = !{i32 999000, i64 100, i32 1} !14 = !{i32 999999, i64 1, i32 2} -!17 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600} -!18 = !{!"function_entry_count", i64 1000} -!19 = !{!"function_entry_count", i64 600} -!20 = !{!"function_entry_count", i64 1700} +!15 = !{!"VP", i32 2, i64 1600, i64 321, i64 1000, i64 789, i64 600} +!16 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600} +!17 = !{!"function_entry_count", i64 1000} +!18 = !{!"function_entry_count", i64 600} +!19 = !{!"function_entry_count", i64 1700} ;. ; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100} -; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35} -; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000} -; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352} -; CHECK: [[PROF4]] = !{!"function_entry_count", i64 600} -; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211} +; CHECK: [[PROF1]] = !{!"VP", i32 2, i64 94, i64 321, i64 58, i64 789, i64 35} +; CHECK: [[PROF2]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35} +; CHECK: [[PROF3]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF4]] = !{!"VP", i32 2, i64 941, i64 321, i64 588, i64 789, i64 352} +; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352} +; CHECK: [[PROF6]] = !{!"function_entry_count", i64 600} +; CHECK: [[PROF7]] = !{!"VP", i32 2, i64 564, i64 321, i64 352, i64 789, i64 211} +; CHECK: [[PROF8]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211} ;. diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll new file mode 100644 index 0000000000000..c77be3b1ed244 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 + +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0 +@Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2 +@Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6 + +@Derived1 = constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3 +@Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7 +@Derived3 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8 + +; VTABLE-CMP: remark: :0:0: Promote indirect call to Derived1_bar with count 600 out of 1600, sink 2 instruction(s) and compare 1 vtable(s): {Derived1} +; VTABLE-CMP: remark: :0:0: Promote indirect call to Derived2_bar with count 500 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {Derived2} +; VTABLE-CMP: remark: :0:0: Promote indirect call to Base1_bar with count 400 out of 500, sink 2 instruction(s) and compare 2 vtable(s): {Derived3, Base1} + +define void @test(ptr %d) { +; VTABLE-CMP-LABEL: define void @test( +; VTABLE-CMP-SAME: ptr [[D:%.*]]) { +; VTABLE-CMP-NEXT: [[ENTRY:.*:]] +; VTABLE-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]] +; VTABLE-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1") +; VTABLE-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) +; VTABLE-CMP-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived1, i32 40) +; VTABLE-CMP-NEXT: br i1 [[TMP1]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]] +; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG]]: +; VTABLE-CMP-NEXT: call void @Derived1_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP:.*]] +; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT]]: +; VTABLE-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived2, i32 64) +; VTABLE-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]] +; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG1]]: +; VTABLE-CMP-NEXT: call void @Derived2_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3:.*]] +; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT2]]: +; VTABLE-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Base1, i32 16) +; VTABLE-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived3, i32 16) +; VTABLE-CMP-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; VTABLE-CMP-NEXT: br i1 [[TMP5]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]] +; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG4]]: +; VTABLE-CMP-NEXT: call void @Base1_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6:.*]] +; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT5]]: +; VTABLE-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1 +; VTABLE-CMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8 +; VTABLE-CMP-NEXT: call void [[TMP6]](ptr [[D]]) +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6]] +; VTABLE-CMP: [[IF_END_ICP6]]: +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3]] +; VTABLE-CMP: [[IF_END_ICP3]]: +; VTABLE-CMP-NEXT: br label %[[IF_END_ICP]] +; VTABLE-CMP: [[IF_END_ICP]]: +; VTABLE-CMP-NEXT: ret void +; +; FUNC-CMP-LABEL: define void @test( +; FUNC-CMP-SAME: ptr [[D:%.*]]) { +; FUNC-CMP-NEXT: [[ENTRY:.*:]] +; FUNC-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]] +; FUNC-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1") +; FUNC-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) +; FUNC-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1 +; FUNC-CMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8 +; FUNC-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @Derived1_bar +; FUNC-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]] +; FUNC-CMP: [[IF_TRUE_DIRECT_TARG]]: +; FUNC-CMP-NEXT: call void @Derived1_bar(ptr [[D]]) +; FUNC-CMP-NEXT: br label %[[IF_END_ICP:.*]] +; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT]]: +; FUNC-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP1]], @Derived2_bar +; FUNC-CMP-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]] +; FUNC-CMP: [[IF_TRUE_DIRECT_TARG1]]: +; FUNC-CMP-NEXT: call void @Derived2_bar(ptr [[D]]) +; FUNC-CMP-NEXT: br label %[[IF_END_ICP3:.*]] +; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT2]]: +; FUNC-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @Base1_bar +; FUNC-CMP-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]] +; FUNC-CMP: [[IF_TRUE_DIRECT_TARG4]]: +; FUNC-CMP-NEXT: call void @Base1_bar(ptr [[D]]) +; FUNC-CMP-NEXT: br label %[[IF_END_ICP6:.*]] +; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT5]]: +; FUNC-CMP-NEXT: call void [[TMP1]](ptr [[D]]) +; FUNC-CMP-NEXT: br label %[[IF_END_ICP6]] +; FUNC-CMP: [[IF_END_ICP6]]: +; FUNC-CMP-NEXT: br label %[[IF_END_ICP3]] +; FUNC-CMP: [[IF_END_ICP3]]: +; FUNC-CMP-NEXT: br label %[[IF_END_ICP]] +; FUNC-CMP: [[IF_END_ICP]]: +; FUNC-CMP-NEXT: ret void +; +entry: + %vtable = load ptr, ptr %d, !prof !9 + %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"Base1") + tail call void @llvm.assume(i1 %0) + %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 + %1 = load ptr, ptr %vfn + call void %1(ptr %d), !prof !10 + ret void +} + +define void @Base1_bar(ptr %this) { + ret void +} + +define void @Derived1_bar(ptr %this) { + ret void +} + +define void @Derived2_bar(ptr %this) { + ret void +} + + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.assume(i1) +declare i32 @Base2_foo(ptr) +declare i32 @Base1_foo(ptr) +declare void @Base3_foo(ptr) + +!0 = !{i64 16, !"Base1"} +!1 = !{i64 40, !"Base1"} +!2 = !{i64 16, !"Base2"} +!3 = !{i64 16, !"Derived1"} +!4 = !{i64 64, !"Base1"} +!5 = !{i64 40, !"Base2"} +!6 = !{i64 16, !"Base3"} +!7 = !{i64 16, !"Derived2"} +!8 = !{i64 16, !"Derived3"} +!9 = !{!"VP", i32 2, i64 1600, i64 -4123858694673519054, i64 600, i64 -7211198353767973908, i64 500, i64 -3574436251470806727, i64 200, i64 6288809125658696740, i64 200, i64 12345678, i64 100} +!10 = !{!"VP", i32 0, i64 1600, i64 3827408714133779784, i64 600, i64 5837445539218476403, i64 500, i64 -9064955852395570538, i64 400, i64 56781234, i64 100} +;. +; VTABLE-COMMON: [[PROF9]] = !{!"VP", i32 2, i64 100, i64 12345678, i64 100} +; VTABLE-COMMON: [[PROF10]] = !{!"branch_weights", i32 600, i32 1000} +; VTABLE-COMMON: [[PROF11]] = !{!"branch_weights", i32 500, i32 500} +; VTABLE-COMMON: [[PROF12]] = !{!"branch_weights", i32 400, i32 100} + diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll new file mode 100644 index 0000000000000..6d3a6972f6885 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll @@ -0,0 +1,125 @@ +; RUN: opt < %s -passes='pgo-icall-prom' -enable-vtable-profile-use -S | FileCheck %s --check-prefix=VTABLE + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1 +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3 + +@.str = private constant [15 x i8] c"out of tickets\00" + +define i32 @test(ptr %b) personality ptr @__gxx_personality_v0 { +; VTABLE-LABEL: define i32 @test( +; VTABLE-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 { +; VTABLE-NEXT: [[ENTRY:.*:]] +; VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8 +; VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base") +; VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) +; VTABLE-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV7Derived, i32 16) +; VTABLE-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF4:![0-9]+]] +; VTABLE: [[IF_TRUE_DIRECT_TARG]]: +; VTABLE-NEXT: [[TMP2:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr [[B]]) +; VTABLE-NEXT: to label %[[IF_END_ICP:.*]] unwind label %[[LPAD:.*]] +; VTABLE: [[IF_FALSE_ORIG_INDIRECT]]: +; VTABLE-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV4Base, i32 16) +; VTABLE-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF5:![0-9]+]] +; VTABLE: [[IF_TRUE_DIRECT_TARG1]]: +; VTABLE-NEXT: [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr [[B]]) +; VTABLE-NEXT: to label %[[IF_END_ICP3:.*]] unwind label %[[LPAD]] +; VTABLE: [[IF_FALSE_ORIG_INDIRECT2]]: +; VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; VTABLE-NEXT: [[CALL:%.*]] = invoke i32 [[TMP1]](ptr [[B]]) +; VTABLE-NEXT: to label %[[IF_END_ICP3]] unwind label %[[LPAD]] +; VTABLE: [[IF_END_ICP3]]: +; VTABLE-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL]], %[[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], %[[IF_TRUE_DIRECT_TARG1]] ] +; VTABLE-NEXT: br label %[[IF_END_ICP]] +; VTABLE: [[IF_END_ICP]]: +; VTABLE-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], %[[IF_END_ICP3]] ], [ [[TMP2]], %[[IF_TRUE_DIRECT_TARG]] ] +; VTABLE-NEXT: br label %[[NEXT:.*]] +; VTABLE: [[NEXT]]: +; VTABLE-NEXT: ret i32 [[TMP7]] +; VTABLE: [[LPAD]]: +; VTABLE-NEXT: [[EXN:%.*]] = landingpad { ptr, i32 } +; VTABLE-NEXT: cleanup +; VTABLE-NEXT: unreachable +; +entry: + %vtable = load ptr, ptr %b, !prof !4 + %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base") + tail call void @llvm.assume(i1 %0) + %1 = load ptr, ptr %vtable + %call = invoke i32 %1(ptr %b) to label %next unwind label %lpad, !prof !5 + +next: + ret i32 %call + +lpad: + %exn = landingpad {ptr, i32} + cleanup + unreachable +} + +declare void @make_error(ptr, ptr, i32) +declare i32 @get_ticket_id() +declare ptr @__cxa_allocate_exception(i64) + +define i32 @_ZN4Base10get_ticketEv(ptr %this) personality ptr @__gxx_personality_v0 { +entry: + %call = tail call i32 @get_ticket_id() + %cmp.not = icmp eq i32 %call, -1 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: + ret i32 %call + +if.end: + %exception = tail call ptr @__cxa_allocate_exception(i64 1) + invoke void @make_error(ptr %exception, ptr @.str, i32 1) + to label %invoke.cont unwind label %lpad + +invoke.cont: + unreachable + +lpad: + %0 = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } %0 +} + +define i32 @_ZN7Derived10get_ticketEv(ptr %this) personality ptr @__gxx_personality_v0 { +entry: + %call = tail call i32 @get_ticket_id() + %cmp.not = icmp eq i32 %call, -1 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: + ret i32 %call + +if.end: + %exception = tail call ptr @__cxa_allocate_exception(i64 1) + invoke void @make_error(ptr %exception, ptr @.str, i32 2) + to label %invoke.cont unwind label %lpad + +invoke.cont: + unreachable + +lpad: + %0 = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } %0 +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.assume(i1) +declare i32 @__gxx_personality_v0(...) + +!0 = !{i64 16, !"_ZTS4Base"} +!1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"} +!2 = !{i64 16, !"_ZTS7Derived"} +!3 = !{i64 16, !"_ZTSM7DerivedFivE.virtual"} +!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700} +!5 = !{!"VP", i32 0, i64 1600, i64 14811317294552474744, i64 900, i64 9261744921105590125, i64 700} + +; VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700} +; VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0} +;. diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll new file mode 100644 index 0000000000000..fb9ec0d0c85ff --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -S 2>&1 | FileCheck %s --check-prefixes=VTABLE,REMARK + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; REMARK: remark: :0:0: Promote indirect call to _ZN7Derived5func1Eii with count 900 out of 1600, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV7Derived} +; REMARK: remark: :0:0: Promote indirect call to _ZN4Base5func1Eii with count 700 out of 700, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV4Base} + +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, !type !0, !type !1, !type !2, !type !3 +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, !type !0, !type !1 + +define i32 @test_tail_call(ptr %ptr, i32 %a, i32 %b) { +; VTABLE-LABEL: define i32 @test_tail_call( +; VTABLE-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) { +; VTABLE-NEXT: entry: +; VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8 +; VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base") +; VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]]) +; VTABLE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV7Derived, i32 16) +; VTABLE-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF4:![0-9]+]] +; VTABLE: if.true.direct_targ: +; VTABLE-NEXT: [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]]) +; VTABLE-NEXT: ret i32 [[TMP3]] +; VTABLE: 3: +; VTABLE-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV4Base, i32 16) +; VTABLE-NEXT: br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF5:![0-9]+]] +; VTABLE: if.true.direct_targ1: +; VTABLE-NEXT: [[TMP6:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]]) +; VTABLE-NEXT: ret i32 [[TMP6]] +; VTABLE: 6: +; VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; VTABLE-NEXT: [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr [[PTR]], i32 [[A]], i32 [[B]]) +; VTABLE-NEXT: ret i32 [[CALL]] +; +entry: + %vtable = load ptr, ptr %ptr, !prof !4 + %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base") + tail call void @llvm.assume(i1 %0) + %1 = load ptr, ptr %vtable + %call = musttail call i32 %1(ptr %ptr, i32 %a, i32 %b), !prof !5 + ret i32 %call +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN7Derived5func1Eii(ptr %this, i32 %a, i32 %b) { +entry: + %sub = sub nsw i32 %a, %b + ret i32 %sub +} + +define i32 @_ZN4Base5func1Eii(ptr %this, i32 %a, i32 %b) { +entry: + %add = add nsw i32 %b, %a + ret i32 %add +} + + +!0 = !{i64 16, !"_ZTS4Base"} +!1 = !{i64 16, !"_ZTSM4BaseFiiiE.virtual"} +!2 = !{i64 16, !"_ZTS7Derived"} +!3 = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"} +!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700} +!5 = !{!"VP", i32 0, i64 1600, i64 7889036118036845314, i64 900, i64 10495086226207060333, i64 700} + +; VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700} +; VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0}