Skip to content

Commit c4a3ffd

Browse files
author
Yonghong Song
committed
[RFC][Transforms][IPO] Add func suffix in ArgumentPromotion and DeadArgumentElimination
ArgumentPromotion and DeadArgumentElimination passes could change function signatures but the function name remains the same as before the transformation. This makes it hard for tracing with bpf programs where user tends to use function signature in the source. See discussion [1] for details. This patch added suffix to functions whose signatures are changed. The suffix lets users know that function signature has changed and they need to impact the IR or binary to find modified signature before tracing those functions. The suffix for ArgumentPromotion is ".argprom" and the suffix for DeadArgumentElimination is ".argelim". The suffix also gives user hints about what kind of transformation has been done. With this patch, I built a recent linux kernel with full LTO enabled. I got 4 functions with only argpromotion like set_track_update.argelim.argprom pmd_trans_huge_lock.argprom ... I got 1058 functions with only deadargelim like process_bit0.argelim pci_io_ecs_init.argelim ... I got 3 functions with both argpromotion and deadargelim set_track_update.argelim.argprom zero_pud_populate.argelim.argprom zero_pmd_populate.argelim.argprom There are some concerns about func suffix may impact sample based profiling. I did some experiments and show that this is not the case. The sample profiling gets func name from dwarf and those func names in dwarf does not have suffixes added by this patch and sample profiling works fine with this patch. [1] #104678
1 parent e075dcf commit c4a3ffd

File tree

80 files changed

+326
-323
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+326
-323
lines changed

compiler-rt/test/cfi/stats.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ extern "C" __attribute__((noinline)) void nvcall(A *a) {
2626
}
2727

2828
extern "C" __attribute__((noinline)) A *dcast(A *a) {
29-
// CHECK: stats.cpp:[[@LINE+1]] {{_?}}dcast cfi-derived-cast 24
29+
// CHECK: stats.cpp:[[@LINE+1]] {{_?}}dcast.retelim cfi-derived-cast 24
3030
return (A *)(ABase *)a;
3131
}
3232

3333
extern "C" __attribute__((noinline)) A *ucast(A *a) {
34-
// CHECK: stats.cpp:[[@LINE+1]] {{_?}}ucast cfi-unrelated-cast 81
34+
// CHECK: stats.cpp:[[@LINE+1]] {{_?}}ucast.retelim cfi-unrelated-cast 81
3535
return (A *)(char *)a;
3636
}
3737

llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
215215

216216
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
217217
NF->takeName(F);
218+
NF->setName(NF->getName() + ".argprom");
218219

219220
// Loop over all the callers of the function, transforming the call sites to
220221
// pass in the loaded pointers.

llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,10 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) {
889889
// it again.
890890
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
891891
NF->takeName(F);
892+
if (NumArgumentsEliminated)
893+
NF->setName(NF->getName() + ".argelim");
894+
else
895+
NF->setName(NF->getName() + ".retelim");
892896
NF->IsNewDbgInfoFormat = F->IsNewDbgInfoFormat;
893897

894898
// Loop over all the callers of the function, transforming the call sites to

llvm/test/Analysis/LazyCallGraph/remove-dead-function-spurious-ref-edge.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ define internal void @a() alwaysinline {
99
}
1010

1111
define internal void @b(ptr) noinline {
12-
; CHECK-LABEL: @b(
12+
; CHECK-LABEL: @b.argprom(
1313
; CHECK-NEXT: ret void
1414
;
1515
ret void
1616
}
1717

1818
define internal void @c() noinline {
1919
; CHECK-LABEL: @c(
20-
; CHECK-NEXT: call void @b()
20+
; CHECK-NEXT: call void @b.argprom()
2121
; CHECK-NEXT: ret void
2222
;
2323
call void @b(ptr @a)

llvm/test/BugPoint/remove_arguments_test.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
declare i32 @test2()
1313

14-
; CHECK: define void @test() {
14+
; CHECK: define void @test.argelim() {
1515
define i32 @test(i32 %A, ptr %B, float %C) {
1616
call i32 @test2()
1717
ret i32 %1

llvm/test/CodeGen/AArch64/arg_promotion.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,16 @@ define dso_local void @caller_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
3838
; CHECK-LABEL: define dso_local void @caller_4xi32(
3939
; CHECK-NEXT: entry:
4040
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
41-
; CHECK-NEXT: call fastcc void @callee_4xi32(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
41+
; CHECK-NEXT: call fastcc void @callee_4xi32.argprom.argprom(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
4242
; CHECK-NEXT: ret void
4343
;
4444
entry:
45-
call fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst)
45+
call fastcc void @callee_4xi32.argprom(ptr noalias %src, ptr noalias %dst)
4646
ret void
4747
}
4848

49-
define internal fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
50-
; CHECK-LABEL: define internal fastcc void @callee_4xi32(
49+
define internal fastcc void @callee_4xi32.argprom(ptr noalias %src, ptr noalias %dst) #1 {
50+
; CHECK-LABEL: define internal fastcc void @callee_4xi32.argprom.argprom(
5151
; CHECK-NEXT: entry:
5252
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
5353
; CHECK-NEXT: ret void
@@ -65,7 +65,7 @@ define dso_local void @caller_i256(ptr noalias %src, ptr noalias %dst) #0 {
6565
; CHECK-LABEL: define dso_local void @caller_i256(
6666
; CHECK-NEXT: entry:
6767
; CHECK-NEXT: [[SRC_VAL:%.*]] = load i256, ptr [[SRC:%.*]], align 16
68-
; CHECK-NEXT: call fastcc void @callee_i256(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
68+
; CHECK-NEXT: call fastcc void @callee_i256.argprom(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
6969
; CHECK-NEXT: ret void
7070
;
7171
entry:
@@ -74,7 +74,7 @@ entry:
7474
}
7575

7676
define internal fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) #0 {
77-
; CHECK-LABEL: define internal fastcc void @callee_i256(
77+
; CHECK-LABEL: define internal fastcc void @callee_i256.argprom(
7878
; CHECK-NEXT: entry:
7979
; CHECK-NEXT: store i256 [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
8080
; CHECK-NEXT: ret void
@@ -159,7 +159,7 @@ define dso_local void @caller_struct4xi32(ptr noalias %src, ptr noalias %dst) #1
159159
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
160160
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC]], i64 16
161161
; CHECK-NEXT: [[SRC_VAL1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
162-
; CHECK-NEXT: call fastcc void @callee_struct4xi32(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
162+
; CHECK-NEXT: call fastcc void @callee_struct4xi32.argprom(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
163163
; CHECK-NEXT: ret void
164164
;
165165
entry:
@@ -168,7 +168,7 @@ entry:
168168
}
169169

170170
define internal fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 {
171-
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32(
171+
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32.argprom(
172172
; CHECK-NEXT: entry:
173173
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
174174
; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_4XI32:%.*]], ptr [[DST]], i64 0, i32 1

llvm/test/CodeGen/AMDGPU/internalize.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; ALL: gvar_used
1111
@gvar_used = addrspace(1) global i32 undef, align 4
1212

13-
; OPT: define internal fastcc void @func_used_noinline(
13+
; OPT: define internal fastcc void @func_used_noinline.argelim(
1414
; OPT-NONE: define fastcc void @func_used_noinline(
1515
define fastcc void @func_used_noinline(ptr addrspace(1) %out, i32 %tid) #1 {
1616
entry:

llvm/test/ThinLTO/X86/memprof-aliased-location1.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,22 +84,22 @@ attributes #0 = { noinline optnone }
8484
;; The first call to foo does not allocate cold memory. It should call the
8585
;; original functions, which ultimately call the original allocation decorated
8686
;; with a "notcold" attribute.
87-
; IR: call {{.*}} @_Z3foov()
87+
; IR: call {{.*}} @_Z3foov.retelim()
8888
;; The second call to foo allocates cold memory. It should call cloned functions
8989
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
90-
; IR: call {{.*}} @_Z3foov.memprof.1()
91-
; IR: define internal {{.*}} @_Z3barv()
90+
; IR: call {{.*}} @_Z3foov.memprof.1.retelim()
91+
; IR: define internal {{.*}} @_Z3barv.retelim()
9292
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
93-
; IR: define internal {{.*}} @_Z3bazv()
94-
; IR: call {{.*}} @_Z3barv()
95-
; IR: define internal {{.*}} @_Z3foov()
96-
; IR: call {{.*}} @_Z3bazv()
97-
; IR: define internal {{.*}} @_Z3barv.memprof.1()
93+
; IR: define internal {{.*}} @_Z3bazv.retelim()
94+
; IR: call {{.*}} @_Z3barv.retelim()
95+
; IR: define internal {{.*}} @_Z3foov.retelim()
96+
; IR: call {{.*}} @_Z3bazv.retelim()
97+
; IR: define internal {{.*}} @_Z3barv.memprof.1.retelim()
9898
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
99-
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
100-
; IR: call {{.*}} @_Z3barv.memprof.1()
101-
; IR: define internal {{.*}} @_Z3foov.memprof.1()
102-
; IR: call {{.*}} @_Z3bazv.memprof.1()
99+
; IR: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
100+
; IR: call {{.*}} @_Z3barv.memprof.1.retelim()
101+
; IR: define internal {{.*}} @_Z3foov.memprof.1.retelim()
102+
; IR: call {{.*}} @_Z3bazv.memprof.1.retelim()
103103
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
104104
; IR: attributes #[[COLD]] = { "memprof"="cold" }
105105

llvm/test/ThinLTO/X86/memprof-aliased-location2.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,22 +84,22 @@ attributes #0 = { noinline optnone }
8484
;; The first call to foo does not allocate cold memory. It should call the
8585
;; original functions, which ultimately call the original allocation decorated
8686
;; with a "notcold" attribute.
87-
; IR: call {{.*}} @_Z3foov()
87+
; IR: call {{.*}} @_Z3foov.retelim()
8888
;; The second call to foo allocates cold memory. It should call cloned functions
8989
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
90-
; IR: call {{.*}} @_Z3foov.memprof.1()
91-
; IR: define internal {{.*}} @_Z3barv()
90+
; IR: call {{.*}} @_Z3foov.memprof.1.retelim()
91+
; IR: define internal {{.*}} @_Z3barv.retelim()
9292
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
93-
; IR: define internal {{.*}} @_Z3bazv()
94-
; IR: call {{.*}} @_Z3barv()
95-
; IR: define internal {{.*}} @_Z3foov()
96-
; IR: call {{.*}} @_Z3bazv()
97-
; IR: define internal {{.*}} @_Z3barv.memprof.1()
93+
; IR: define internal {{.*}} @_Z3bazv.retelim()
94+
; IR: call {{.*}} @_Z3barv.retelim()
95+
; IR: define internal {{.*}} @_Z3foov.retelim()
96+
; IR: call {{.*}} @_Z3bazv.retelim()
97+
; IR: define internal {{.*}} @_Z3barv.memprof.1.retelim()
9898
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
99-
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
100-
; IR: call {{.*}} @_Z3barv.memprof.1()
101-
; IR: define internal {{.*}} @_Z3foov.memprof.1()
102-
; IR: call {{.*}} @_Z3bazv.memprof.1()
99+
; IR: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
100+
; IR: call {{.*}} @_Z3barv.memprof.1.retelim()
101+
; IR: define internal {{.*}} @_Z3foov.memprof.1.retelim()
102+
; IR: call {{.*}} @_Z3bazv.memprof.1.retelim()
103103
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
104104
; IR: attributes #[[COLD]] = { "memprof"="cold" }
105105

llvm/test/ThinLTO/X86/memprof-basic.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -284,26 +284,25 @@ attributes #0 = { noinline optnone }
284284
;; The first call to foo does not allocate cold memory. It should call the
285285
;; original functions, which ultimately call the original allocation decorated
286286
;; with a "notcold" attribute.
287-
; IR: call {{.*}} @_Z3foov()
287+
; IR: call {{.*}} @_Z3foov{{.*}}()
288288
;; The second call to foo allocates cold memory. It should call cloned functions
289289
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
290-
; IR: call {{.*}} @_Z3foov.memprof.1()
291-
; IR: define internal {{.*}} @_Z3barv()
290+
; IR: call {{.*}} @_Z3foov.memprof.1{{.*}}()
291+
; IR: define internal {{.*}} @_Z3barv{{.*}}()
292292
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
293-
; IR: define internal {{.*}} @_Z3bazv()
294-
; IR: call {{.*}} @_Z3barv()
295-
; IR: define internal {{.*}} @_Z3foov()
296-
; IR: call {{.*}} @_Z3bazv()
297-
; IR: define internal {{.*}} @_Z3barv.memprof.1()
293+
; IR: define internal {{.*}} @_Z3bazv{{.*}}()
294+
; IR: call {{.*}} @_Z3barv{{.*}}()
295+
; IR: define internal {{.*}} @_Z3foov{{.*}}()
296+
; IR: call {{.*}} @_Z3bazv{{.*}}()
297+
; IR: define internal {{.*}} @_Z3barv.memprof.1{{.*}}()
298298
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
299-
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
300-
; IR: call {{.*}} @_Z3barv.memprof.1()
301-
; IR: define internal {{.*}} @_Z3foov.memprof.1()
302-
; IR: call {{.*}} @_Z3bazv.memprof.1()
299+
; IR: define internal {{.*}} @_Z3bazv.memprof.1{{.*}}()
300+
; IR: call {{.*}} @_Z3barv.memprof.1{{.*}}()
301+
; IR: define internal {{.*}} @_Z3foov.memprof.1{{.*}}()
302+
; IR: call {{.*}} @_Z3bazv.memprof.1{{.*}}()
303303
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
304304
; IR: attributes #[[COLD]] = { "memprof"="cold" }
305305

306-
307306
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
308307
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
309308
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)

0 commit comments

Comments
 (0)