Skip to content

Commit 8f4fd3f

Browse files
Zheng-Xucherrymui
authored andcommitted
build: support frame-pointer for arm64
Supporting frame-pointer makes Linux's perf and other profilers much more useful because it lets them gather a stack trace efficiently on profiling events. Major changes include: 1. save FP on the word below where RSP is pointing to (proposed by Cherry and Austin) 2. adjust some specific offsets in runtime assembly and wrapper code 3. add support to FP in goroutine scheduler 4. adjust link stack overflow check to take the extra word into account 5. adjust nosplit test cases to enable frame sizes which are 16 bytes aligned Performance impacts on go1 benchmarks: Enable frame-pointer (by default) name old time/op new time/op delta BinaryTree17-46 5.94s ± 0% 6.00s ± 0% +1.03% (p=0.029 n=4+4) Fannkuch11-46 2.84s ± 1% 2.77s ± 0% -2.58% (p=0.008 n=5+5) FmtFprintfEmpty-46 55.0ns ± 1% 58.9ns ± 1% +7.06% (p=0.008 n=5+5) FmtFprintfString-46 102ns ± 0% 105ns ± 0% +2.94% (p=0.008 n=5+5) FmtFprintfInt-46 118ns ± 0% 117ns ± 1% -1.19% (p=0.000 n=4+5) FmtFprintfIntInt-46 181ns ± 0% 182ns ± 1% ~ (p=0.444 n=5+5) FmtFprintfPrefixedInt-46 215ns ± 1% 214ns ± 0% ~ (p=0.254 n=5+4) FmtFprintfFloat-46 292ns ± 0% 296ns ± 0% +1.46% (p=0.029 n=4+4) FmtManyArgs-46 720ns ± 0% 732ns ± 0% +1.72% (p=0.008 n=5+5) GobDecode-46 9.82ms ± 1% 10.03ms ± 2% +2.10% (p=0.008 n=5+5) GobEncode-46 8.14ms ± 0% 8.72ms ± 1% +7.14% (p=0.008 n=5+5) Gzip-46 420ms ± 0% 424ms ± 0% +0.92% (p=0.008 n=5+5) Gunzip-46 48.2ms ± 0% 48.4ms ± 0% +0.41% (p=0.008 n=5+5) HTTPClientServer-46 201µs ± 4% 201µs ± 0% ~ (p=0.730 n=5+4) JSONEncode-46 17.1ms ± 0% 17.7ms ± 1% +3.80% (p=0.008 n=5+5) JSONDecode-46 88.0ms ± 0% 90.1ms ± 0% +2.42% (p=0.008 n=5+5) Mandelbrot200-46 5.06ms ± 0% 5.07ms ± 0% ~ (p=0.310 n=5+5) GoParse-46 5.04ms ± 0% 5.12ms ± 0% +1.53% (p=0.008 n=5+5) RegexpMatchEasy0_32-46 117ns ± 0% 117ns ± 0% ~ (all equal) RegexpMatchEasy0_1K-46 332ns ± 0% 329ns ± 0% -0.78% (p=0.008 n=5+5) RegexpMatchEasy1_32-46 104ns ± 0% 113ns ± 0% +8.65% (p=0.029 n=4+4) RegexpMatchEasy1_1K-46 563ns ± 0% 569ns ± 0% +1.10% (p=0.008 n=5+5) RegexpMatchMedium_32-46 167ns ± 2% 177ns ± 1% +5.74% (p=0.008 n=5+5) RegexpMatchMedium_1K-46 49.5µs ± 0% 53.4µs ± 0% +7.81% (p=0.008 n=5+5) RegexpMatchHard_32-46 2.56µs ± 1% 2.72µs ± 0% +6.01% (p=0.008 n=5+5) RegexpMatchHard_1K-46 77.0µs ± 0% 81.8µs ± 0% +6.24% (p=0.016 n=5+4) Revcomp-46 631ms ± 1% 627ms ± 1% ~ (p=0.095 n=5+5) Template-46 81.8ms ± 0% 86.3ms ± 0% +5.55% (p=0.008 n=5+5) TimeParse-46 423ns ± 0% 432ns ± 0% +2.32% (p=0.008 n=5+5) TimeFormat-46 478ns ± 2% 497ns ± 1% +3.89% (p=0.008 n=5+5) [Geo mean] 71.6µs 73.3µs +2.45% name old speed new speed delta GobDecode-46 78.1MB/s ± 1% 76.6MB/s ± 2% -2.04% (p=0.008 n=5+5) GobEncode-46 94.3MB/s ± 0% 88.0MB/s ± 1% -6.67% (p=0.008 n=5+5) Gzip-46 46.2MB/s ± 0% 45.8MB/s ± 0% -0.91% (p=0.008 n=5+5) Gunzip-46 403MB/s ± 0% 401MB/s ± 0% -0.41% (p=0.008 n=5+5) JSONEncode-46 114MB/s ± 0% 109MB/s ± 1% -3.66% (p=0.008 n=5+5) JSONDecode-46 22.0MB/s ± 0% 21.5MB/s ± 0% -2.35% (p=0.008 n=5+5) GoParse-46 11.5MB/s ± 0% 11.3MB/s ± 0% -1.51% (p=0.008 n=5+5) RegexpMatchEasy0_32-46 272MB/s ± 0% 272MB/s ± 1% ~ (p=0.190 n=4+5) RegexpMatchEasy0_1K-46 3.08GB/s ± 0% 3.11GB/s ± 0% +0.77% (p=0.008 n=5+5) RegexpMatchEasy1_32-46 306MB/s ± 0% 283MB/s ± 0% -7.63% (p=0.029 n=4+4) RegexpMatchEasy1_1K-46 1.82GB/s ± 0% 1.80GB/s ± 0% -1.07% (p=0.008 n=5+5) RegexpMatchMedium_32-46 5.99MB/s ± 0% 5.64MB/s ± 1% -5.77% (p=0.016 n=4+5) RegexpMatchMedium_1K-46 20.7MB/s ± 0% 19.2MB/s ± 0% -7.25% (p=0.008 n=5+5) RegexpMatchHard_32-46 12.5MB/s ± 1% 11.8MB/s ± 0% -5.66% (p=0.008 n=5+5) RegexpMatchHard_1K-46 13.3MB/s ± 0% 12.5MB/s ± 1% -6.01% (p=0.008 n=5+5) Revcomp-46 402MB/s ± 1% 405MB/s ± 1% ~ (p=0.095 n=5+5) Template-46 23.7MB/s ± 0% 22.5MB/s ± 0% -5.25% (p=0.008 n=5+5) [Geo mean] 82.2MB/s 79.6MB/s -3.26% Disable frame-pointer (GOEXPERIMENT=noframepointer) name old time/op new time/op delta BinaryTree17-46 5.94s ± 0% 5.96s ± 0% +0.39% (p=0.029 n=4+4) Fannkuch11-46 2.84s ± 1% 2.79s ± 1% -1.68% (p=0.008 n=5+5) FmtFprintfEmpty-46 55.0ns ± 1% 55.2ns ± 3% ~ (p=0.794 n=5+5) FmtFprintfString-46 102ns ± 0% 103ns ± 0% +0.98% (p=0.016 n=5+4) FmtFprintfInt-46 118ns ± 0% 115ns ± 0% -2.54% (p=0.029 n=4+4) FmtFprintfIntInt-46 181ns ± 0% 179ns ± 0% -1.10% (p=0.000 n=5+4) FmtFprintfPrefixedInt-46 215ns ± 1% 213ns ± 0% ~ (p=0.143 n=5+4) FmtFprintfFloat-46 292ns ± 0% 300ns ± 0% +2.83% (p=0.029 n=4+4) FmtManyArgs-46 720ns ± 0% 739ns ± 0% +2.64% (p=0.008 n=5+5) GobDecode-46 9.82ms ± 1% 9.78ms ± 1% ~ (p=0.151 n=5+5) GobEncode-46 8.14ms ± 0% 8.12ms ± 1% ~ (p=0.690 n=5+5) Gzip-46 420ms ± 0% 420ms ± 0% ~ (p=0.548 n=5+5) Gunzip-46 48.2ms ± 0% 48.0ms ± 0% -0.33% (p=0.032 n=5+5) HTTPClientServer-46 201µs ± 4% 199µs ± 3% ~ (p=0.548 n=5+5) JSONEncode-46 17.1ms ± 0% 17.2ms ± 0% ~ (p=0.056 n=5+5) JSONDecode-46 88.0ms ± 0% 88.6ms ± 0% +0.64% (p=0.008 n=5+5) Mandelbrot200-46 5.06ms ± 0% 5.07ms ± 0% ~ (p=0.548 n=5+5) GoParse-46 5.04ms ± 0% 5.07ms ± 0% +0.65% (p=0.008 n=5+5) RegexpMatchEasy0_32-46 117ns ± 0% 112ns ± 4% -4.27% (p=0.016 n=4+5) RegexpMatchEasy0_1K-46 332ns ± 0% 330ns ± 1% ~ (p=0.095 n=5+5) RegexpMatchEasy1_32-46 104ns ± 0% 110ns ± 1% +5.29% (p=0.029 n=4+4) RegexpMatchEasy1_1K-46 563ns ± 0% 567ns ± 2% ~ (p=0.151 n=5+5) RegexpMatchMedium_32-46 167ns ± 2% 166ns ± 0% ~ (p=0.333 n=5+4) RegexpMatchMedium_1K-46 49.5µs ± 0% 49.6µs ± 0% ~ (p=0.841 n=5+5) RegexpMatchHard_32-46 2.56µs ± 1% 2.49µs ± 0% -2.81% (p=0.008 n=5+5) RegexpMatchHard_1K-46 77.0µs ± 0% 75.8µs ± 0% -1.55% (p=0.008 n=5+5) Revcomp-46 631ms ± 1% 628ms ± 0% ~ (p=0.095 n=5+5) Template-46 81.8ms ± 0% 84.3ms ± 1% +3.05% (p=0.008 n=5+5) TimeParse-46 423ns ± 0% 425ns ± 0% +0.52% (p=0.008 n=5+5) TimeFormat-46 478ns ± 2% 478ns ± 1% ~ (p=1.000 n=5+5) [Geo mean] 71.6µs 71.6µs -0.01% name old speed new speed delta GobDecode-46 78.1MB/s ± 1% 78.5MB/s ± 1% ~ (p=0.151 n=5+5) GobEncode-46 94.3MB/s ± 0% 94.5MB/s ± 1% ~ (p=0.690 n=5+5) Gzip-46 46.2MB/s ± 0% 46.2MB/s ± 0% ~ (p=0.571 n=5+5) Gunzip-46 403MB/s ± 0% 404MB/s ± 0% +0.33% (p=0.032 n=5+5) JSONEncode-46 114MB/s ± 0% 113MB/s ± 0% ~ (p=0.056 n=5+5) JSONDecode-46 22.0MB/s ± 0% 21.9MB/s ± 0% -0.64% (p=0.008 n=5+5) GoParse-46 11.5MB/s ± 0% 11.4MB/s ± 0% -0.64% (p=0.008 n=5+5) RegexpMatchEasy0_32-46 272MB/s ± 0% 285MB/s ± 4% +4.74% (p=0.016 n=4+5) RegexpMatchEasy0_1K-46 3.08GB/s ± 0% 3.10GB/s ± 1% ~ (p=0.151 n=5+5) RegexpMatchEasy1_32-46 306MB/s ± 0% 290MB/s ± 1% -5.21% (p=0.029 n=4+4) RegexpMatchEasy1_1K-46 1.82GB/s ± 0% 1.81GB/s ± 2% ~ (p=0.151 n=5+5) RegexpMatchMedium_32-46 5.99MB/s ± 0% 6.02MB/s ± 1% ~ (p=0.063 n=4+5) RegexpMatchMedium_1K-46 20.7MB/s ± 0% 20.7MB/s ± 0% ~ (p=0.659 n=5+5) RegexpMatchHard_32-46 12.5MB/s ± 1% 12.8MB/s ± 0% +2.88% (p=0.008 n=5+5) RegexpMatchHard_1K-46 13.3MB/s ± 0% 13.5MB/s ± 0% +1.58% (p=0.008 n=5+5) Revcomp-46 402MB/s ± 1% 405MB/s ± 0% ~ (p=0.095 n=5+5) Template-46 23.7MB/s ± 0% 23.0MB/s ± 1% -2.95% (p=0.008 n=5+5) [Geo mean] 82.2MB/s 82.3MB/s +0.04% Frame-pointer is enabled on Linux by default but can be disabled by setting: GOEXPERIMENT=noframepointer. Fixes #10110 Change-Id: I1bfaca6dba29a63009d7c6ab04ed7a1413d9479e Reviewed-on: https://go-review.googlesource.com/61511 Reviewed-by: Cherry Zhang <[email protected]> Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 7b88b22 commit 8f4fd3f

File tree

14 files changed

+243
-57
lines changed

14 files changed

+243
-57
lines changed

src/cmd/compile/internal/arm64/ggen.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ import (
1414
var darwin = objabi.GOOS == "darwin"
1515

1616
func padframe(frame int64) int64 {
17-
// arm64 requires that the frame size (not counting saved LR)
18-
// be empty or be 8 mod 16. If not, pad it.
19-
if frame != 0 && frame%16 != 8 {
20-
frame += 8
17+
// arm64 requires that the frame size (not counting saved FP&LR)
18+
// be 16 bytes aligned. If not, pad it.
19+
if frame%16 != 0 {
20+
frame += 16 - (frame % 16)
2121
}
2222
return frame
2323
}

src/cmd/compile/internal/gc/pgen.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,8 @@ func createSimpleVars(automDecls []*Node) ([]*Node, []*dwarf.Var, map[*Node]bool
427427
if Ctxt.FixedFrameSize() == 0 {
428428
offs -= int64(Widthptr)
429429
}
430-
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
430+
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" {
431+
// There is a word space for FP on ARM64 even if the frame pointer is disabled
431432
offs -= int64(Widthptr)
432433
}
433434

@@ -607,7 +608,8 @@ func stackOffset(slot ssa.LocalSlot) int32 {
607608
if Ctxt.FixedFrameSize() == 0 {
608609
base -= int64(Widthptr)
609610
}
610-
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
611+
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) || objabi.GOARCH == "arm64" {
612+
// There is a word space for FP on ARM64 even if the frame pointer is disabled
611613
base -= int64(Widthptr)
612614
}
613615
case PPARAM, PPARAMOUT:

src/cmd/internal/obj/arm64/asm7.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ type ctxt7 struct {
4949
blitrl *obj.Prog
5050
elitrl *obj.Prog
5151
autosize int32
52+
extrasize int32
5253
instoffset int64
5354
pc int64
5455
pool struct {
@@ -777,7 +778,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
777778
ctxt.Diag("arm64 ops not initialized, call arm64.buildop first")
778779
}
779780

780-
c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset&0xffffffff) + 8}
781+
c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
782+
p.To.Offset &= 0xffffffff // extrasize is no longer needed
781783

782784
bflag := 1
783785
pc := int64(0)
@@ -1436,7 +1438,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
14361438
// a.Offset is still relative to pseudo-SP.
14371439
a.Reg = obj.REG_NONE
14381440
}
1439-
c.instoffset = int64(c.autosize) + a.Offset
1441+
// The frame top 8 or 16 bytes are for FP
1442+
c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
14401443
return autoclass(c.instoffset)
14411444

14421445
case obj.NAME_PARAM:
@@ -1536,7 +1539,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
15361539
// a.Offset is still relative to pseudo-SP.
15371540
a.Reg = obj.REG_NONE
15381541
}
1539-
c.instoffset = int64(c.autosize) + a.Offset
1542+
// The frame top 8 or 16 bytes are for FP
1543+
c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
15401544

15411545
case obj.NAME_PARAM:
15421546
if a.Reg == REGSP {

src/cmd/internal/obj/arm64/obj7.go

Lines changed: 166 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -542,32 +542,35 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
542542
c.autosize += 8
543543
}
544544

545-
if c.autosize != 0 && c.autosize&(16-1) != 0 {
546-
// The frame includes an LR.
547-
// If the frame size is 8, it's only an LR,
548-
// so there's no potential for breaking references to
549-
// local variables by growing the frame size,
550-
// because there are no local variables.
551-
// But otherwise, if there is a non-empty locals section,
552-
// the author of the code is responsible for making sure
553-
// that the frame size is 8 mod 16.
554-
if c.autosize == 8 {
555-
c.autosize += 8
556-
c.cursym.Func.Locals += 8
545+
if c.autosize != 0 {
546+
extrasize := int32(0)
547+
if c.autosize%16 == 8 {
548+
// Allocate extra 8 bytes on the frame top to save FP
549+
extrasize = 8
550+
} else if c.autosize&(16-1) == 0 {
551+
// Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16
552+
extrasize = 16
557553
} else {
558-
c.ctxt.Diag("%v: unaligned frame size %d - must be 8 mod 16 (or 0)", p, c.autosize-8)
554+
c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8)
559555
}
556+
c.autosize += extrasize
557+
c.cursym.Func.Locals += extrasize
558+
559+
// low 32 bits for autosize
560+
// high 32 bits for extrasize
561+
p.To.Offset = int64(c.autosize) | int64(extrasize)<<32
562+
} else {
563+
// NOFRAME
564+
p.To.Offset = 0
560565
}
566+
561567
if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 {
562568
if c.ctxt.Debugvlog {
563569
c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name)
564570
}
565571
c.cursym.Func.Text.Mark |= LEAF
566572
}
567573

568-
// FP offsets need an updated p.To.Offset.
569-
p.To.Offset = int64(c.autosize) - 8
570-
571574
if cursym.Func.Text.Mark&LEAF != 0 {
572575
cursym.Set(obj.AttrLeaf, true)
573576
if p.From.Sym.NoFrame() {
@@ -631,6 +634,26 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
631634
q1.Spadj = aoffset
632635
}
633636

637+
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
638+
q1 = obj.Appendp(q1, c.newprog)
639+
q1.Pos = p.Pos
640+
q1.As = AMOVD
641+
q1.From.Type = obj.TYPE_REG
642+
q1.From.Reg = REGFP
643+
q1.To.Type = obj.TYPE_MEM
644+
q1.To.Reg = REGSP
645+
q1.To.Offset = -8
646+
647+
q1 = obj.Appendp(q1, c.newprog)
648+
q1.Pos = p.Pos
649+
q1.As = ASUB
650+
q1.From.Type = obj.TYPE_CONST
651+
q1.From.Offset = 8
652+
q1.Reg = REGSP
653+
q1.To.Type = obj.TYPE_REG
654+
q1.To.Reg = REGFP
655+
}
656+
634657
if c.cursym.Func.Text.From.Sym.Wrapper() {
635658
// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
636659
//
@@ -753,9 +776,30 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
753776
p.To.Type = obj.TYPE_REG
754777
p.To.Reg = REGSP
755778
p.Spadj = -c.autosize
779+
780+
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
781+
p = obj.Appendp(p, c.newprog)
782+
p.As = ASUB
783+
p.From.Type = obj.TYPE_CONST
784+
p.From.Offset = 8
785+
p.Reg = REGSP
786+
p.To.Type = obj.TYPE_REG
787+
p.To.Reg = REGFP
788+
}
756789
}
757790
} else {
758791
/* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/
792+
793+
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
794+
p.As = AMOVD
795+
p.From.Type = obj.TYPE_MEM
796+
p.From.Reg = REGSP
797+
p.From.Offset = -8
798+
p.To.Type = obj.TYPE_REG
799+
p.To.Reg = REGFP
800+
p = obj.Appendp(p, c.newprog)
801+
}
802+
759803
aoffset := c.autosize
760804

761805
if aoffset > 0xF0 {
@@ -814,7 +858,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
814858
p.Spadj = int32(+p.From.Offset)
815859
}
816860
}
817-
break
818861

819862
case obj.AGETCALLERPC:
820863
if cursym.Leaf() {
@@ -828,6 +871,112 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
828871
p.From.Type = obj.TYPE_MEM
829872
p.From.Reg = REGSP
830873
}
874+
875+
case obj.ADUFFCOPY:
876+
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
877+
// ADR ret_addr, R27
878+
// STP (FP, R27), -24(SP)
879+
// SUB 24, SP, FP
880+
// DUFFCOPY
881+
// ret_addr:
882+
// SUB 8, SP, FP
883+
884+
q1 := p
885+
// copy DUFFCOPY from q1 to q4
886+
q4 := obj.Appendp(p, c.newprog)
887+
q4.Pos = p.Pos
888+
q4.As = obj.ADUFFCOPY
889+
q4.To = p.To
890+
891+
q1.As = AADR
892+
q1.From.Type = obj.TYPE_BRANCH
893+
q1.To.Type = obj.TYPE_REG
894+
q1.To.Reg = REG_R27
895+
896+
q2 := obj.Appendp(q1, c.newprog)
897+
q2.Pos = p.Pos
898+
q2.As = ASTP
899+
q2.From.Type = obj.TYPE_REGREG
900+
q2.From.Reg = REGFP
901+
q2.From.Offset = int64(REG_R27)
902+
q2.To.Type = obj.TYPE_MEM
903+
q2.To.Reg = REGSP
904+
q2.To.Offset = -24
905+
906+
// maintaine FP for DUFFCOPY
907+
q3 := obj.Appendp(q2, c.newprog)
908+
q3.Pos = p.Pos
909+
q3.As = ASUB
910+
q3.From.Type = obj.TYPE_CONST
911+
q3.From.Offset = 24
912+
q3.Reg = REGSP
913+
q3.To.Type = obj.TYPE_REG
914+
q3.To.Reg = REGFP
915+
916+
q5 := obj.Appendp(q4, c.newprog)
917+
q5.Pos = p.Pos
918+
q5.As = ASUB
919+
q5.From.Type = obj.TYPE_CONST
920+
q5.From.Offset = 8
921+
q5.Reg = REGSP
922+
q5.To.Type = obj.TYPE_REG
923+
q5.To.Reg = REGFP
924+
q1.Pcond = q5
925+
p = q5
926+
}
927+
928+
case obj.ADUFFZERO:
929+
if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
930+
// ADR ret_addr, R27
931+
// STP (FP, R27), -24(SP)
932+
// SUB 24, SP, FP
933+
// DUFFZERO
934+
// ret_addr:
935+
// SUB 8, SP, FP
936+
937+
q1 := p
938+
// copy DUFFZERO from q1 to q4
939+
q4 := obj.Appendp(p, c.newprog)
940+
q4.Pos = p.Pos
941+
q4.As = obj.ADUFFZERO
942+
q4.To = p.To
943+
944+
q1.As = AADR
945+
q1.From.Type = obj.TYPE_BRANCH
946+
q1.To.Type = obj.TYPE_REG
947+
q1.To.Reg = REG_R27
948+
949+
q2 := obj.Appendp(q1, c.newprog)
950+
q2.Pos = p.Pos
951+
q2.As = ASTP
952+
q2.From.Type = obj.TYPE_REGREG
953+
q2.From.Reg = REGFP
954+
q2.From.Offset = int64(REG_R27)
955+
q2.To.Type = obj.TYPE_MEM
956+
q2.To.Reg = REGSP
957+
q2.To.Offset = -24
958+
959+
// maintaine FP for DUFFZERO
960+
q3 := obj.Appendp(q2, c.newprog)
961+
q3.Pos = p.Pos
962+
q3.As = ASUB
963+
q3.From.Type = obj.TYPE_CONST
964+
q3.From.Offset = 24
965+
q3.Reg = REGSP
966+
q3.To.Type = obj.TYPE_REG
967+
q3.To.Reg = REGFP
968+
969+
q5 := obj.Appendp(q4, c.newprog)
970+
q5.Pos = p.Pos
971+
q5.As = ASUB
972+
q5.From.Type = obj.TYPE_CONST
973+
q5.From.Offset = 8
974+
q5.Reg = REGSP
975+
q5.To.Type = obj.TYPE_REG
976+
q5.To.Reg = REGFP
977+
q1.Pcond = q5
978+
p = q5
979+
}
831980
}
832981
}
833982
}

src/cmd/internal/objabi/util.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func init() {
7676
}
7777

7878
func Framepointer_enabled(goos, goarch string) bool {
79-
return framepointer_enabled != 0 && goarch == "amd64" && goos != "nacl"
79+
return framepointer_enabled != 0 && (goarch == "amd64" && goos != "nacl" || goarch == "arm64" && goos == "linux")
8080
}
8181

8282
func addexp(s string) {

src/cmd/link/internal/ld/lib.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1815,6 +1815,10 @@ func (ctxt *Link) dostkcheck() {
18151815
ch.up = nil
18161816

18171817
ch.limit = objabi.StackLimit - callsize(ctxt)
1818+
if objabi.GOARCH == "arm64" {
1819+
// need extra 8 bytes below SP to save FP
1820+
ch.limit -= 8
1821+
}
18181822

18191823
// Check every function, but do the nosplit functions in a first pass,
18201824
// to make the printed failure chains as short as possible.

0 commit comments

Comments
 (0)