Skip to content

Commit c2d8926

Browse files
authored
[llvm][ctx_profile] Add instrumentation (#90136)
This adds instrumenting callsites to PGOInstrumentation, *if* contextual profiling is requested. The latter also enables inserting counters in the entry basic block and disables value profiling (the latter is a point in time change) This change adds the skeleton of the contextual profiling lowering pass, just so we can introduce the flag controlling that and the API to check that. The actual lowering pass will be introduced in a subsequent patch. (Tracking Issue: #89287, RFC referenced there)
1 parent a2be1b8 commit c2d8926

File tree

5 files changed

+149
-5
lines changed

5 files changed

+149
-5
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===-- PGOCtxProfLowering.h - Contextual PGO Instr. Lowering ---*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file declares the PGOCtxProfLoweringPass class.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
13+
#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
14+
15+
namespace llvm {
16+
class Type;
17+
18+
class PGOCtxProfLoweringPass {
19+
public:
20+
explicit PGOCtxProfLoweringPass() = default;
21+
static bool isContextualIRPGOEnabled();
22+
};
23+
} // namespace llvm
24+
#endif

llvm/lib/Transforms/Instrumentation/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ add_llvm_component_library(LLVMInstrumentation
1414
InstrProfiling.cpp
1515
KCFI.cpp
1616
LowerAllowCheckPass.cpp
17+
PGOCtxProfLowering.cpp
1718
PGOForceFunctionAttrs.cpp
1819
PGOInstrumentation.cpp
1920
PGOMemOPSizeOpt.cpp
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
10+
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
11+
#include "llvm/Support/CommandLine.h"
12+
13+
using namespace llvm;
14+
15+
static cl::list<std::string> ContextRoots(
16+
"profile-context-root", cl::Hidden,
17+
cl::desc(
18+
"A function name, assumed to be global, which will be treated as the "
19+
"root of an interesting graph, which will be profiled independently "
20+
"from other similar graphs."));
21+
22+
bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
23+
return !ContextRoots.empty();
24+
}

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
#include "llvm/Transforms/Instrumentation.h"
111111
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
112112
#include "llvm/Transforms/Instrumentation/CFGMST.h"
113+
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
113114
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
114115
#include "llvm/Transforms/Utils/MisExpect.h"
115116
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -333,6 +334,20 @@ extern cl::opt<bool> EnableVTableValueProfiling;
333334
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
334335
} // namespace llvm
335336

337+
bool shouldInstrumentEntryBB() {
338+
return PGOInstrumentEntry ||
339+
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
340+
}
341+
342+
// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
343+
// profiling implicitly captures indirect call cases, but not other values.
344+
// Supporting other values is relatively straight-forward - just another counter
345+
// range within the context.
346+
bool isValueProfilingDisabled() {
347+
return DisableValueProfiling ||
348+
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
349+
}
350+
336351
// Return a string describing the branch condition that can be
337352
// used in static branch probability heuristics:
338353
static std::string getBranchCondString(Instruction *TI) {
@@ -379,7 +394,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
379394
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
380395
if (IsCS)
381396
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
382-
if (PGOInstrumentEntry)
397+
if (shouldInstrumentEntryBB())
383398
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
384399
if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
385400
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
@@ -861,7 +876,7 @@ static void instrumentOneFunc(
861876
}
862877

863878
FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
864-
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
879+
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
865880
PGOBlockCoverage);
866881

867882
auto Name = FuncInfo.FuncNameVar;
@@ -883,6 +898,43 @@ static void instrumentOneFunc(
883898
unsigned NumCounters =
884899
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
885900

901+
if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
902+
auto *CSIntrinsic =
903+
Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
904+
// We want to count the instrumentable callsites, then instrument them. This
905+
// is because the llvm.instrprof.callsite intrinsic has an argument (like
906+
// the other instrprof intrinsics) capturing the total number of
907+
// instrumented objects (counters, or callsites, in this case). In this
908+
// case, we want that value so we can readily pass it to the compiler-rt
909+
// APIs that may have to allocate memory based on the nr of callsites.
910+
// The traversal logic is the same for both counting and instrumentation,
911+
// just needs to be done in succession.
912+
auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
913+
for (auto &BB : F)
914+
for (auto &Instr : BB)
915+
if (auto *CS = dyn_cast<CallBase>(&Instr)) {
916+
if ((CS->getCalledFunction() &&
917+
CS->getCalledFunction()->isIntrinsic()) ||
918+
dyn_cast<InlineAsm>(CS->getCalledOperand()))
919+
continue;
920+
Visitor(CS);
921+
}
922+
};
923+
// First, count callsites.
924+
uint32_t TotalNrCallsites = 0;
925+
Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
926+
927+
// Now instrument.
928+
uint32_t CallsiteIndex = 0;
929+
Visit([&](auto *CB) {
930+
IRBuilder<> Builder(CB);
931+
Builder.CreateCall(CSIntrinsic,
932+
{Name, CFGHash, Builder.getInt32(TotalNrCallsites),
933+
Builder.getInt32(CallsiteIndex++),
934+
CB->getCalledOperand()});
935+
});
936+
}
937+
886938
uint32_t I = 0;
887939
if (PGOTemporalInstrumentation) {
888940
NumCounters += PGOBlockCoverage ? 8 : 1;
@@ -914,7 +966,7 @@ static void instrumentOneFunc(
914966
FuncInfo.FunctionHash);
915967
assert(I == NumCounters);
916968

917-
if (DisableValueProfiling)
969+
if (isValueProfilingDisabled())
918970
return;
919971

920972
NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
@@ -1676,7 +1728,7 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
16761728

16771729
// Traverse all valuesites and annotate the instructions for all value kind.
16781730
void PGOUseFunc::annotateValueSites() {
1679-
if (DisableValueProfiling)
1731+
if (isValueProfilingDisabled())
16801732
return;
16811733

16821734
// Create the PGOFuncName meta data.
@@ -1779,7 +1831,7 @@ static bool InstrumentAllFunctions(
17791831
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
17801832
// For the context-sensitve instrumentation, we should have a separated pass
17811833
// (before LTO/ThinLTO linking) to create these variables.
1782-
if (!IsCS)
1834+
if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
17831835
createIRLevelProfileFlagVar(M, /*IsCS=*/false);
17841836

17851837
Triple TT(M.getTargetTriple());
@@ -2018,6 +2070,8 @@ static bool annotateAllFunctions(
20182070
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
20192071
if (PGOInstrumentEntry.getNumOccurrences() > 0)
20202072
InstrumentFuncEntry = PGOInstrumentEntry;
2073+
InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
2074+
20212075
bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
20222076
for (auto &F : M) {
20232077
if (skipPGOUse(F))
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
2+
; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
3+
; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s
4+
5+
declare void @bar()
6+
7+
;.
8+
; INSTRUMENT: @__profn_foo = private constant [3 x i8] c"foo"
9+
;.
10+
define void @foo(i32 %a, ptr %fct) {
11+
; INSTRUMENT-LABEL: define void @foo(
12+
; INSTRUMENT-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) {
13+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
14+
; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
15+
; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
16+
; INSTRUMENT: yes:
17+
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
18+
; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
19+
; INSTRUMENT-NEXT: call void [[FCT]](i32 [[A]])
20+
; INSTRUMENT-NEXT: br label [[EXIT:%.*]]
21+
; INSTRUMENT: no:
22+
; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
23+
; INSTRUMENT-NEXT: call void @bar()
24+
; INSTRUMENT-NEXT: br label [[EXIT]]
25+
; INSTRUMENT: exit:
26+
; INSTRUMENT-NEXT: ret void
27+
;
28+
%t = icmp eq i32 %a, 0
29+
br i1 %t, label %yes, label %no
30+
yes:
31+
call void %fct(i32 %a)
32+
br label %exit
33+
no:
34+
call void @bar()
35+
br label %exit
36+
exit:
37+
ret void
38+
}
39+
;.
40+
; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
41+
;.

0 commit comments

Comments
 (0)