Skip to content

[llvm][ctx_profile] Add instrumentation #90136

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===-- PGOCtxProfLowering.h - Contextual PGO Instr. Lowering ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the PGOCtxProfLoweringPass class.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H

namespace llvm {
class Type;

class PGOCtxProfLoweringPass {
public:
explicit PGOCtxProfLoweringPass() = default;
static bool isContextualIRPGOEnabled();
};
} // namespace llvm
#endif
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Instrumentation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ add_llvm_component_library(LLVMInstrumentation
InstrProfiling.cpp
KCFI.cpp
LowerAllowCheckPass.cpp
PGOCtxProfLowering.cpp
PGOForceFunctionAttrs.cpp
PGOInstrumentation.cpp
PGOMemOPSizeOpt.cpp
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//

#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;

static cl::list<std::string> ContextRoots(
"profile-context-root", cl::Hidden,
cl::desc(
"A function name, assumed to be global, which will be treated as the "
"root of an interesting graph, which will be profiled independently "
"from other similar graphs."));

bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
return !ContextRoots.empty();
}
64 changes: 59 additions & 5 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
#include "llvm/Transforms/Instrumentation/CFGMST.h"
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
Expand Down Expand Up @@ -333,6 +334,20 @@ extern cl::opt<bool> EnableVTableValueProfiling;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm

bool shouldInstrumentEntryBB() {
return PGOInstrumentEntry ||
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
}

// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
// profiling implicitly captures indirect call cases, but not other values.
// Supporting other values is relatively straight-forward - just another counter
// range within the context.
bool isValueProfilingDisabled() {
return DisableValueProfiling ||
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document why e.g. with a fixme?

}

// Return a string describing the branch condition that can be
// used in static branch probability heuristics:
static std::string getBranchCondString(Instruction *TI) {
Expand Down Expand Up @@ -379,7 +394,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
if (IsCS)
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
if (PGOInstrumentEntry)
if (shouldInstrumentEntryBB())
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
Expand Down Expand Up @@ -861,7 +876,7 @@ static void instrumentOneFunc(
}

FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
PGOBlockCoverage);

auto Name = FuncInfo.FuncNameVar;
Expand All @@ -883,6 +898,43 @@ static void instrumentOneFunc(
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();

if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
auto *CSIntrinsic =
Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
// We want to count the instrumentable callsites, then instrument them. This
// is because the llvm.instrprof.callsite intrinsic has an argument (like
// the other instrprof intrinsics) capturing the total number of
// instrumented objects (counters, or callsites, in this case). In this
// case, we want that value so we can readily pass it to the compiler-rt
// APIs that may have to allocate memory based on the nr of callsites.
// The traversal logic is the same for both counting and instrumentation,
// just needs to be done in succession.
auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
for (auto &BB : F)
for (auto &Instr : BB)
if (auto *CS = dyn_cast<CallBase>(&Instr)) {
if ((CS->getCalledFunction() &&
CS->getCalledFunction()->isIntrinsic()) ||
dyn_cast<InlineAsm>(CS->getCalledOperand()))
continue;
Visitor(CS);
}
};
// First, count callsites.
uint32_t TotalNrCallsites = 0;
Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });

// Now instrument.
uint32_t CallsiteIndex = 0;
Visit([&](auto *CB) {
IRBuilder<> Builder(CB);
Builder.CreateCall(CSIntrinsic,
{Name, CFGHash, Builder.getInt32(TotalNrCallsites),
Builder.getInt32(CallsiteIndex++),
CB->getCalledOperand()});
});
}

uint32_t I = 0;
if (PGOTemporalInstrumentation) {
NumCounters += PGOBlockCoverage ? 8 : 1;
Expand Down Expand Up @@ -914,7 +966,7 @@ static void instrumentOneFunc(
FuncInfo.FunctionHash);
assert(I == NumCounters);

if (DisableValueProfiling)
if (isValueProfilingDisabled())
return;

NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
Expand Down Expand Up @@ -1676,7 +1728,7 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {

// Traverse all valuesites and annotate the instructions for all value kind.
void PGOUseFunc::annotateValueSites() {
if (DisableValueProfiling)
if (isValueProfilingDisabled())
return;

// Create the PGOFuncName meta data.
Expand Down Expand Up @@ -1779,7 +1831,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
createIRLevelProfileFlagVar(M, /*IsCS=*/false);

Triple TT(M.getTargetTriple());
Expand Down Expand Up @@ -2018,6 +2070,8 @@ static bool annotateAllFunctions(
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
if (PGOInstrumentEntry.getNumOccurrences() > 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think by removing this you have changed the behavior of the code? With the old version, I think if the user explicitly specified -pgo-instrument-entry=false it would override instrEntryBBEnabled ? At least I believe that is the behavior when checking if (PGOInstrumentEntry.getNumOccurrences() > 0).

InstrumentFuncEntry = PGOInstrumentEntry;
InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();

bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
for (auto &F : M) {
if (skipPGOUse(F))
Expand Down
41 changes: 41 additions & 0 deletions llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s
Comment on lines +2 to +3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s
; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
; RUN: -S < %s | FileCheck %s

NIT: Why not use the default check prefix CHECK?


declare void @bar()

;.
; INSTRUMENT: @__profn_foo = private constant [3 x i8] c"foo"
;.
define void @foo(i32 %a, ptr %fct) {
; INSTRUMENT-LABEL: define void @foo(
; INSTRUMENT-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) {
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
; INSTRUMENT-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
; INSTRUMENT-NEXT: br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
; INSTRUMENT: yes:
; INSTRUMENT-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
; INSTRUMENT-NEXT: call void [[FCT]](i32 [[A]])
; INSTRUMENT-NEXT: br label [[EXIT:%.*]]
; INSTRUMENT: no:
; INSTRUMENT-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
; INSTRUMENT-NEXT: call void @bar()
; INSTRUMENT-NEXT: br label [[EXIT]]
; INSTRUMENT: exit:
; INSTRUMENT-NEXT: ret void
;
%t = icmp eq i32 %a, 0
br i1 %t, label %yes, label %no
yes:
call void %fct(i32 %a)
br label %exit
no:
call void @bar()
br label %exit
exit:
ret void
}
;.
; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
;.
Loading