Skip to content

Commit 3b22618

Browse files
authored
[ctx_prof] Insert the ctx prof flattener after the module inliner (#107499)
This patch enables experimenting with the contextual profile. ICP is currently disabled in this case - will reenable it subsequently. Also subsequently the inline cost model / decision making would be updated to be context-aware. Right now, this just achieves "complete use" of the profile, in that it's ingested, maintained, and sunk to a flat profile when not needed anymore. Issue [#89287](#89287)
1 parent b0d2411 commit 3b22618

File tree

5 files changed

+48
-10
lines changed

5 files changed

+48
-10
lines changed

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/Analysis/ScopedNoAliasAA.h"
2626
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
2727
#include "llvm/IR/PassManager.h"
28+
#include "llvm/Pass.h"
2829
#include "llvm/Passes/OptimizationLevel.h"
2930
#include "llvm/Passes/PassBuilder.h"
3031
#include "llvm/Support/CommandLine.h"
@@ -1014,6 +1015,11 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
10141015
IP.EnableDeferral = false;
10151016

10161017
MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1018+
if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
1019+
MPM.addPass(GlobalOptPass());
1020+
MPM.addPass(GlobalDCEPass());
1021+
MPM.addPass(PGOCtxProfFlatteningPass());
1022+
}
10171023

10181024
MPM.addPass(createModuleToFunctionPassAdaptor(
10191025
buildFunctionSimplificationPipeline(Level, Phase),
@@ -1740,11 +1746,14 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
17401746
MPM.addPass(GlobalDCEPass());
17411747
return MPM;
17421748
}
1743-
1744-
// Add the core simplification pipeline.
1745-
MPM.addPass(buildModuleSimplificationPipeline(
1746-
Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1747-
1749+
if (!UseCtxProfile.empty()) {
1750+
MPM.addPass(
1751+
buildModuleInlinerPipeline(Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1752+
} else {
1753+
// Add the core simplification pipeline.
1754+
MPM.addPass(buildModuleSimplificationPipeline(
1755+
Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1756+
}
17481757
// Now add the optimization pipeline.
17491758
MPM.addPass(buildModuleOptimizationPipeline(
17501759
Level, ThinOrFullLTOPhase::ThinLTOPostLink));

llvm/lib/Transforms/IPO/ModuleInliner.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,10 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
241241
// the post-inline cleanup and the next DevirtSCCRepeatedPass
242242
// iteration because the next iteration may not happen and we may
243243
// miss inlining it.
244-
if (tryPromoteCall(*ICB))
245-
NewCallee = ICB->getCalledFunction();
244+
// FIXME: enable for ctxprof.
245+
if (!CtxProf)
246+
if (tryPromoteCall(*ICB))
247+
NewCallee = ICB->getCalledFunction();
246248
}
247249
if (NewCallee)
248250
if (!NewCallee->isDeclaration())

llvm/test/Analysis/CtxProfAnalysis/inline.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@
3131
; CHECK-NEXT: %call2 = call i32 @a(i32 %x) #1
3232
; CHECK-NEXT: br label %exit
3333

34+
; Make sure the postlink thinlto pipeline is aware of ctxprof
35+
; RUN: opt -passes='thinlto<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
36+
; RUN: %t/module.ll -S -o - | FileCheck %s --check-prefix=PIPELINE
37+
38+
; PIPELINE-LABEL: define i32 @entrypoint
39+
; PIPELINE-SAME: !prof ![[ENTRYPOINT_COUNT:[0-9]+]]
40+
; PIPELINE-LABEL: loop.i:
41+
; PIPELINE: br i1 %cond.i, label %loop.i, label %exit, !prof ![[LOOP_BW_INL:[0-9]+]]
42+
; PIPELINE-LABEL: define i32 @a
43+
; PIPELINE-LABEL: loop:
44+
; PIPELINE: br i1 %cond, label %loop, label %exit, !prof ![[LOOP_BW_ORIG:[0-9]+]]
45+
46+
; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 10}
47+
; These are the weights of the inlined @a, where the counters were 2, 100 (2 for entry, 100 for loop)
48+
; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 98, i32 2}
49+
; These are the weights of the un-inlined @a, where the counters were 8, 500 (8 for entry, 500 for loop)
50+
; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 492, i32 8}
3451

3552
;--- module.ll
3653
define i32 @entrypoint(i32 %x) !guid !0 {

llvm/test/Other/opt-hot-cold-split.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='lto-pre-link<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-PRELINK-Os
33
; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='thinlto-pre-link<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-PRELINK-Os
44
; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='lto<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=LTO-POSTLINK-Os
5-
; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -passes='thinlto<Os>' -debug-pass-manager < %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=THINLTO-POSTLINK-Os
5+
; RUN: opt -mtriple=x86_64-- -hot-cold-split=true -LINK-Os
66

77
; REQUIRES: asserts
88

llvm/test/ThinLTO/X86/ctxprof.ll

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,23 @@
99
; RUN: mkdir -p %t
1010
; RUN: split-file %s %t
1111
;
12-
; RUN: opt -module-summary %t/m1.ll -o %t/m1.bc
13-
; RUN: opt -module-summary %t/m2.ll -o %t/m2.bc
12+
; RUN: opt -module-summary -passes=assign-guid %t/m1.ll -o %t/m1.bc
13+
; RUN: opt -module-summary -passes=assign-guid %t/m2.ll -o %t/m2.bc
1414
; RUN: llvm-dis %t/m1.bc -o - | FileCheck %s --check-prefix=GUIDS-1
1515
; RUN: llvm-dis %t/m2.bc -o - | FileCheck %s --check-prefix=GUIDS-2
1616
;
17+
; GUIDS-1-LABEL: @m1_f1
18+
; GUIDS-1-SAME: !guid ![[GUID1:[0-9]+]]
19+
; GUIDS-1: ![[GUID1]] = !{i64 6019442868614718803}
20+
; GUIDS-1: ^0 = module:
1721
; GUIDS-1: name: "m1_f1"
1822
; GUIDS-1-SAME: guid = 6019442868614718803
23+
24+
; note: -2853647799038631862 is 15593096274670919754
25+
; GUIDS-2-LABEL: @m2_f1
26+
; GUIDS-2-SAME: !guid ![[GUID2:[0-9]+]]
27+
; GUIDS-2: ![[GUID2]] = !{i64 -2853647799038631862}
28+
; GUIDS-2: ^0 = module:
1929
; GUIDS-2: name: "m2_f1"
2030
; GUIDS-2-SAME: guid = 15593096274670919754
2131
;

0 commit comments

Comments
 (0)