Skip to content

Commit 3bbc823

Browse files
committed
cmd/internal/obj/riscv,cmd/link/internal/riscv64: add call trampolines for riscv64
CALL and JMP on riscv64 are currently implemented as an AUIPC+JALR pair. This means that every call requires two instructions and makes use of the REG_TMP register, even when the symbol would be directly reachable via a single JAL instruction. Add support for call trampolines - CALL and JMP are now implemented as a single JAL instruction, with the linker generating trampolines in the case where the symbol is not reachable (more than +/-1MiB from the JAL instruction), is an unknown symbol or does not yet have an address assigned. Each trampoline contains an AUIPC+JALR pair, which the relocation is applied to. Due to the limited reachability of the JAL instruction, combined with the way that the Go linker currently assigns symbol addresses, there are cases where a call is to a symbol that has no address currently assigned. In this situation we have to assume that a trampoline will be required, however we can patch this up during relocation, potentially calling directly instead. This means that we will end up with trampolines that are unused. In the case of the Go binary, there are around 3,500 trampolines of which approximately 2,300 are unused (around 9200 bytes of machine instructions). Overall, this removes over 72,000 AUIPC instructions from the Go binary. Change-Id: I2d9ecfb85dfc285c7729a3cd0b3a77b6f6c98be0 Reviewed-on: https://go-review.googlesource.com/c/go/+/345051 Trust: Joel Sing <[email protected]> Run-TryBot: Joel Sing <[email protected]> TryBot-Result: Go Bot <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent a7fe161 commit 3bbc823

File tree

9 files changed

+269
-135
lines changed

9 files changed

+269
-135
lines changed

src/cmd/asm/internal/asm/testdata/riscv64.s

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -357,13 +357,12 @@ start:
357357
JMP (X5) // 67800200
358358
JMP 4(X5) // 67804200
359359

360-
// JMP and CALL to symbol are encoded as:
361-
// AUIPC $0, TMP
362-
// JALR $0, TMP
363-
// with a R_RISCV_PCREL_ITYPE relocation - the linker resolves the
364-
// real address and updates the immediates for both instructions.
365-
CALL asmtest(SB) // 970f0000
366-
JMP asmtest(SB) // 970f0000
360+
// CALL and JMP to symbol are encoded as JAL (using LR or ZERO
361+
// respectively), with a R_RISCV_CALL relocation. The linker resolves
362+
// the real address and updates the immediate, using a trampoline in
363+
// the case where the address is not directly reachable.
364+
CALL asmtest(SB) // ef000000
365+
JMP asmtest(SB) // 6f000000
367366

368367
// Branch pseudo-instructions
369368
BEQZ X5, 2(PC) // 63840200

src/cmd/internal/obj/riscv/cpu.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ const (
260260
// corresponding *obj.Prog uses the temporary register.
261261
USES_REG_TMP = 1 << iota
262262

263+
// NEED_CALL_RELOC is set on JAL instructions to indicate that a
264+
// R_RISCV_CALL relocation is needed.
265+
NEED_CALL_RELOC
266+
263267
// NEED_PCREL_ITYPE_RELOC is set on AUIPC instructions to indicate that
264268
// it is the first instruction in an AUIPC + I-type pair that needs a
265269
// R_RISCV_PCREL_ITYPE relocation.
@@ -632,6 +636,10 @@ var unaryDst = map[obj.As]bool{
632636

633637
// Instruction encoding masks.
634638
const (
639+
// JTypeImmMask is a mask including only the immediate portion of
640+
// J-type instructions.
641+
JTypeImmMask = 0xfffff000
642+
635643
// ITypeImmMask is a mask including only the immediate portion of
636644
// I-type instructions.
637645
ITypeImmMask = 0xfff00000
@@ -643,8 +651,4 @@ const (
643651
// UTypeImmMask is a mask including only the immediate portion of
644652
// U-type instructions.
645653
UTypeImmMask = 0xfffff000
646-
647-
// UJTypeImmMask is a mask including only the immediate portion of
648-
// UJ-type instructions.
649-
UJTypeImmMask = UTypeImmMask
650654
)

src/cmd/internal/obj/riscv/obj.go

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -30,41 +30,19 @@ import (
3030

3131
func buildop(ctxt *obj.Link) {}
3232

33-
// jalrToSym replaces p with a set of Progs needed to jump to the Sym in p.
34-
// lr is the link register to use for the JALR.
35-
// p must be a CALL, JMP or RET.
36-
func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog {
37-
if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY {
38-
ctxt.Diag("unexpected Prog in jalrToSym: %v", p)
39-
return p
33+
func jalToSym(ctxt *obj.Link, p *obj.Prog, lr int16) {
34+
switch p.As {
35+
case obj.ACALL, obj.AJMP, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY:
36+
default:
37+
ctxt.Diag("unexpected Prog in jalToSym: %v", p)
38+
return
4039
}
4140

42-
// TODO(jsing): Consider using a single JAL instruction and teaching
43-
// the linker to provide trampolines for the case where the destination
44-
// offset is too large. This would potentially reduce instructions for
45-
// the common case, but would require three instructions to go via the
46-
// trampoline.
47-
48-
to := p.To
49-
50-
p.As = AAUIPC
51-
p.Mark |= NEED_PCREL_ITYPE_RELOC
52-
p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym})
53-
p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
54-
p.Reg = obj.REG_NONE
55-
p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
56-
p = obj.Appendp(p, newprog)
57-
58-
// Leave Sym only for the CALL reloc in assemble.
59-
p.As = AJALR
41+
p.As = AJAL
42+
p.Mark |= NEED_CALL_RELOC
6043
p.From.Type = obj.TYPE_REG
6144
p.From.Reg = lr
6245
p.Reg = obj.REG_NONE
63-
p.To.Type = obj.TYPE_REG
64-
p.To.Reg = REG_TMP
65-
p.To.Sym = to.Sym
66-
67-
return p
6846
}
6947

7048
// progedit is called individually for each *obj.Prog. It normalizes instruction
@@ -531,16 +509,15 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
531509
case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
532510
switch p.To.Type {
533511
case obj.TYPE_MEM:
534-
jalrToSym(ctxt, p, newprog, REG_LR)
512+
jalToSym(ctxt, p, REG_LR)
535513
}
536514

537515
case obj.AJMP:
538516
switch p.To.Type {
539517
case obj.TYPE_MEM:
540518
switch p.To.Name {
541519
case obj.NAME_EXTERN, obj.NAME_STATIC:
542-
// JMP to symbol.
543-
jalrToSym(ctxt, p, newprog, REG_ZERO)
520+
jalToSym(ctxt, p, REG_ZERO)
544521
}
545522
}
546523

@@ -566,7 +543,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
566543
if retJMP != nil {
567544
p.As = obj.ARET
568545
p.To.Sym = retJMP
569-
p = jalrToSym(ctxt, p, newprog, REG_ZERO)
546+
jalToSym(ctxt, p, REG_ZERO)
570547
} else {
571548
p.As = AJALR
572549
p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
@@ -640,8 +617,9 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
640617
rescan = true
641618
}
642619
case AJAL:
620+
// Linker will handle the intersymbol case and trampolines.
643621
if p.To.Target() == nil {
644-
panic("intersymbol jumps should be expressed as AUIPC+JALR")
622+
break
645623
}
646624
offset := p.To.Target().Pc - p.Pc
647625
if offset < -(1<<20) || (1<<20) <= offset {
@@ -676,14 +654,20 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
676654
// instructions will break everything--don't do it!
677655
for p := cursym.Func().Text; p != nil; p = p.Link {
678656
switch p.As {
679-
case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL:
657+
case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
680658
switch p.To.Type {
681659
case obj.TYPE_BRANCH:
682660
p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
683661
case obj.TYPE_MEM:
684662
panic("unhandled type")
685663
}
686664

665+
case AJAL:
666+
// Linker will handle the intersymbol case and trampolines.
667+
if p.To.Target() != nil {
668+
p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
669+
}
670+
687671
case AAUIPC:
688672
if p.From.Type == obj.TYPE_BRANCH {
689673
low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc)
@@ -802,7 +786,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA
802786
if to_more != nil {
803787
to_more.To.SetTarget(p)
804788
}
805-
p = jalrToSym(ctxt, p, newprog, REG_X5)
789+
jalToSym(ctxt, p, REG_X5)
806790

807791
// JMP start
808792
p = obj.Appendp(p, newprog)
@@ -1187,6 +1171,11 @@ func encodeU(ins *instruction) uint32 {
11871171
return imm<<12 | rd<<7 | enc.opcode
11881172
}
11891173

1174+
// encodeJImmediate encodes an immediate for a J-type RISC-V instruction.
1175+
func encodeJImmediate(imm uint32) uint32 {
1176+
return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12
1177+
}
1178+
11901179
// encodeJ encodes a J-type RISC-V instruction.
11911180
func encodeJ(ins *instruction) uint32 {
11921181
imm := immI(ins.as, ins.imm, 21)
@@ -1195,7 +1184,7 @@ func encodeJ(ins *instruction) uint32 {
11951184
if enc == nil {
11961185
panic("encodeJ: could not encode instruction")
11971186
}
1198-
return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12 | rd<<7 | enc.opcode
1187+
return encodeJImmediate(imm) | rd<<7 | enc.opcode
11991188
}
12001189

12011190
func encodeRawIns(ins *instruction) uint32 {
@@ -1207,6 +1196,16 @@ func encodeRawIns(ins *instruction) uint32 {
12071196
return uint32(ins.imm)
12081197
}
12091198

1199+
func EncodeJImmediate(imm int64) (int64, error) {
1200+
if !immIFits(imm, 21) {
1201+
return 0, fmt.Errorf("immediate %#x does not fit in 21 bits", imm)
1202+
}
1203+
if imm&1 != 0 {
1204+
return 0, fmt.Errorf("immediate %#x is not a multiple of two", imm)
1205+
}
1206+
return int64(encodeJImmediate(uint32(imm))), nil
1207+
}
1208+
12101209
func EncodeIImmediate(imm int64) (int64, error) {
12111210
if !immIFits(imm, 12) {
12121211
return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm)
@@ -2035,17 +2034,18 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
20352034

20362035
for p := cursym.Func().Text; p != nil; p = p.Link {
20372036
switch p.As {
2038-
case AJALR:
2039-
if p.To.Sym != nil {
2040-
// This is a CALL/JMP. We add a relocation only
2041-
// for linker stack checking. No actual
2042-
// relocation is needed.
2037+
case AJAL:
2038+
if p.Mark&NEED_CALL_RELOC == NEED_CALL_RELOC {
20432039
rel := obj.Addrel(cursym)
20442040
rel.Off = int32(p.Pc)
20452041
rel.Siz = 4
20462042
rel.Sym = p.To.Sym
20472043
rel.Add = p.To.Offset
2048-
rel.Type = objabi.R_CALLRISCV
2044+
rel.Type = objabi.R_RISCV_CALL
2045+
}
2046+
case AJALR:
2047+
if p.To.Sym != nil {
2048+
ctxt.Diag("%v: unexpected AJALR with to symbol", p)
20492049
}
20502050

20512051
case AAUIPC, AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:

src/cmd/internal/objabi/reloctype.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,6 @@ const (
5959
// R_CALLMIPS (only used on mips64) resolves to non-PC-relative target address
6060
// of a CALL (JAL) instruction, by encoding the address into the instruction.
6161
R_CALLMIPS
62-
// R_CALLRISCV marks RISC-V CALLs for stack checking.
63-
R_CALLRISCV
6462
R_CONST
6563
R_PCREL
6664
// R_TLS_LE, used on 386, amd64, and ARM, resolves to the offset of the
@@ -218,6 +216,15 @@ const (
218216

219217
// RISC-V.
220218

219+
// R_RISCV_CALL relocates a J-type instruction with a 21 bit PC-relative
220+
// address.
221+
R_RISCV_CALL
222+
223+
// R_RISCV_CALL_TRAMP is the same as R_RISCV_CALL but denotes the use of a
224+
// trampoline, which we may be able to avoid during relocation. These are
225+
// only used by the linker and are not emitted by the compiler or assembler.
226+
R_RISCV_CALL_TRAMP
227+
221228
// R_RISCV_PCREL_ITYPE resolves a 32-bit PC-relative address using an
222229
// AUIPC + I-type instruction pair.
223230
R_RISCV_PCREL_ITYPE
@@ -274,7 +281,7 @@ const (
274281
// the target address in register or memory.
275282
func (r RelocType) IsDirectCall() bool {
276283
switch r {
277-
case R_CALL, R_CALLARM, R_CALLARM64, R_CALLMIPS, R_CALLPOWER, R_CALLRISCV:
284+
case R_CALL, R_CALLARM, R_CALLARM64, R_CALLMIPS, R_CALLPOWER, R_RISCV_CALL, R_RISCV_CALL_TRAMP:
278285
return true
279286
}
280287
return false

src/cmd/internal/objabi/reloctype_string.go

Lines changed: 51 additions & 50 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)