Skip to content

Commit 7fa3377

Browse files
authored
[BOLT][RISCV] Handle long tail calls (#67098)
Long tail calls use the following instruction sequence on RISC-V: ``` 1: auipc xi, %pcrel_hi(sym) jalr zero, %pcrel_lo(1b)(xi) ``` Since the second instruction in isolation looks like an indirect branch, this confused BOLT and most functions containing a long tail call got marked with "unknown control flow" and didn't get optimized as a consequence. This patch fixes this by detecting long tail call sequence in `analyzeIndirectBranch`. `FixRISCVCallsPass` also had to be updated to expand long tail calls to `PseudoTAIL` instead of `PseudoCALL`. Besides this, this patch also fixes a minor issue with compressed tail calls (`c.jr`) not being detected. Note that I had to change `BinaryFunction::postProcessIndirectBranches` slightly: the documentation of `MCPlusBuilder::analyzeIndirectBranch` mentions that the [`Begin`, `End`) range contains the instructions immediately preceding `Instruction`. However, in `postProcessIndirectBranches`, *all* the instructions in the BB where passed in the range. This made it difficult to find the preceding instruction so I made sure *only* the preceding instructions are passed.
1 parent d20fbc9 commit 7fa3377

File tree

4 files changed

+53
-4
lines changed

4 files changed

+53
-4
lines changed

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1761,7 +1761,8 @@ bool BinaryFunction::postProcessIndirectBranches(
17611761
uint64_t LastJT = 0;
17621762
uint16_t LastJTIndexReg = BC.MIB->getNoRegister();
17631763
for (BinaryBasicBlock &BB : blocks()) {
1764-
for (MCInst &Instr : BB) {
1764+
for (BinaryBasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
1765+
MCInst &Instr = *II;
17651766
if (!BC.MIB->isIndirectBranch(Instr))
17661767
continue;
17671768

@@ -1789,7 +1790,7 @@ bool BinaryFunction::postProcessIndirectBranches(
17891790
const MCExpr *DispExpr;
17901791
MCInst *PCRelBaseInstr;
17911792
IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
1792-
Instr, BB.begin(), BB.end(), PtrSize, MemLocInstr, BaseRegNum,
1793+
Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
17931794
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
17941795
if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
17951796
continue;

bolt/lib/Passes/FixRISCVCallsPass.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,12 @@ void FixRISCVCallsPass::runOnFunction(BinaryFunction &BF) {
4343

4444
MCInst OldCall = *NextII;
4545
auto L = BC.scopeLock();
46-
MIB->createCall(*II, Target, Ctx);
46+
47+
if (MIB->isTailCall(*NextII))
48+
MIB->createTailCall(*II, Target, Ctx);
49+
else
50+
MIB->createCall(*II, Target, Ctx);
51+
4752
MIB->moveAnnotations(std::move(OldCall), *II);
4853

4954
// The original offset was set on the jalr of the auipc+jalr pair. Since

bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
8787
return false;
8888
case RISCV::JALR:
8989
case RISCV::C_JALR:
90+
case RISCV::C_JR:
9091
return true;
9192
}
9293
}
@@ -158,6 +159,17 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
158159
DispValue = 0;
159160
DispExpr = nullptr;
160161
PCRelBaseOut = nullptr;
162+
163+
// Check for the following long tail call sequence:
164+
// 1: auipc xi, %pcrel_hi(sym)
165+
// jalr zero, %pcrel_lo(1b)(xi)
166+
if (Instruction.getOpcode() == RISCV::JALR && Begin != End) {
167+
MCInst &PrevInst = *std::prev(End);
168+
if (isRISCVCall(PrevInst, Instruction) &&
169+
Instruction.getOperand(0).getReg() == RISCV::X0)
170+
return IndirectBranchType::POSSIBLE_TAIL_CALL;
171+
}
172+
161173
return IndirectBranchType::UNKNOWN;
162174
}
163175

bolt/test/RISCV/call-annotations.s

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// RUN: llvm-mc -triple riscv64 -filetype obj -o %t.o %s
55
// RUN: ld.lld --emit-relocs -o %t %t.o
66
// RUN: llvm-bolt --enable-bat --print-cfg --print-fix-riscv-calls \
7-
// RUN: --print-only=_start -o /dev/null %t | FileCheck %s
7+
// RUN: -o /dev/null %t | FileCheck %s
88

99
.text
1010
.global f
@@ -19,15 +19,46 @@ f:
1919
// CHECK-NEXT: jal ra, f # Offset: 8
2020
// CHECK-NEXT: jal zero, f # TAILCALL # Offset: 12
2121

22+
// CHECK-LABEL: Binary Function "long_tail" after building cfg {
23+
// CHECK: auipc t1, f
24+
// CHECK-NEXT: jalr zero, -24(t1) # TAILCALL # Offset: 8
25+
26+
// CHECK-LABEL: Binary Function "compressed_tail" after building cfg {
27+
// CHECK: jr a0 # TAILCALL # Offset: 0
28+
2229
// CHECK-LABEL: Binary Function "_start" after fix-riscv-calls {
2330
// CHECK: call f # Offset: 0
2431
// CHECK-NEXT: call f # Offset: 8
2532
// CHECK-NEXT: tail f # TAILCALL # Offset: 12
2633

34+
// CHECK-LABEL: Binary Function "long_tail" after fix-riscv-calls {
35+
// CHECK: tail f # TAILCALL # Offset: 4
36+
37+
// CHECK-LABEL: Binary Function "compressed_tail" after fix-riscv-calls {
38+
// CHECK: jr a0 # TAILCALL # Offset: 0
39+
2740
.globl _start
2841
.p2align 1
2942
_start:
3043
call f
3144
jal f
3245
jal zero, f
3346
.size _start, .-_start
47+
48+
.globl long_tail
49+
.p2align 1
50+
long_tail:
51+
// NOTE: BOLT assumes indirect calls in single-BB functions are tail calls
52+
// so artificially introduce a second BB to force RISC-V-specific analysis
53+
// to get triggered.
54+
beq a0, a1, 1f
55+
1:
56+
tail f
57+
.size long_tail, .-long_tail
58+
59+
.globl compressed_tail
60+
.p2align 1
61+
.option rvc
62+
compressed_tail:
63+
c.jr a0
64+
.size compressed_tail, .-compressed_tail

0 commit comments

Comments
 (0)