Skip to content

Commit b662e52

Browse files
committed
cmd/compile: use CBZ/CBNZ instrunctions on ARM64
These are conditional branches that takes a register instead of flags as control value. Reduce binary size by 0.7%, text size by 2.4% (cmd/go as an exmaple). Change-Id: I0020cfde745f9eab680b8b949ad28c87fe183afd Reviewed-on: https://go-review.googlesource.com/30030 Reviewed-by: David Chase <[email protected]>
1 parent 4c9a372 commit b662e52

File tree

7 files changed

+385
-170
lines changed

7 files changed

+385
-170
lines changed

src/cmd/compile/internal/arm64/prog.go

+19-17
Original file line numberDiff line numberDiff line change
@@ -143,23 +143,25 @@ var progtable = [arm64.ALAST & obj.AMask]gc.ProgInfo{
143143
arm64.ASTLXR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.Move},
144144

145145
// Jumps
146-
arm64.AB & obj.AMask: {Flags: gc.Jump | gc.Break},
147-
arm64.ABL & obj.AMask: {Flags: gc.Call},
148-
arm64.ABEQ & obj.AMask: {Flags: gc.Cjmp},
149-
arm64.ABNE & obj.AMask: {Flags: gc.Cjmp},
150-
arm64.ABGE & obj.AMask: {Flags: gc.Cjmp},
151-
arm64.ABLT & obj.AMask: {Flags: gc.Cjmp},
152-
arm64.ABGT & obj.AMask: {Flags: gc.Cjmp},
153-
arm64.ABLE & obj.AMask: {Flags: gc.Cjmp},
154-
arm64.ABLO & obj.AMask: {Flags: gc.Cjmp},
155-
arm64.ABLS & obj.AMask: {Flags: gc.Cjmp},
156-
arm64.ABHI & obj.AMask: {Flags: gc.Cjmp},
157-
arm64.ABHS & obj.AMask: {Flags: gc.Cjmp},
158-
arm64.ACBZ & obj.AMask: {Flags: gc.Cjmp},
159-
arm64.ACBNZ & obj.AMask: {Flags: gc.Cjmp},
160-
obj.ARET: {Flags: gc.Break},
161-
obj.ADUFFZERO: {Flags: gc.Call},
162-
obj.ADUFFCOPY: {Flags: gc.Call},
146+
arm64.AB & obj.AMask: {Flags: gc.Jump | gc.Break},
147+
arm64.ABL & obj.AMask: {Flags: gc.Call},
148+
arm64.ABEQ & obj.AMask: {Flags: gc.Cjmp},
149+
arm64.ABNE & obj.AMask: {Flags: gc.Cjmp},
150+
arm64.ABGE & obj.AMask: {Flags: gc.Cjmp},
151+
arm64.ABLT & obj.AMask: {Flags: gc.Cjmp},
152+
arm64.ABGT & obj.AMask: {Flags: gc.Cjmp},
153+
arm64.ABLE & obj.AMask: {Flags: gc.Cjmp},
154+
arm64.ABLO & obj.AMask: {Flags: gc.Cjmp},
155+
arm64.ABLS & obj.AMask: {Flags: gc.Cjmp},
156+
arm64.ABHI & obj.AMask: {Flags: gc.Cjmp},
157+
arm64.ABHS & obj.AMask: {Flags: gc.Cjmp},
158+
arm64.ACBZ & obj.AMask: {Flags: gc.Cjmp},
159+
arm64.ACBNZ & obj.AMask: {Flags: gc.Cjmp},
160+
arm64.ACBZW & obj.AMask: {Flags: gc.Cjmp},
161+
arm64.ACBNZW & obj.AMask: {Flags: gc.Cjmp},
162+
obj.ARET: {Flags: gc.Break},
163+
obj.ADUFFZERO: {Flags: gc.Call},
164+
obj.ADUFFCOPY: {Flags: gc.Call},
163165
}
164166

165167
func proginfo(p *obj.Prog) gc.ProgInfo {

src/cmd/compile/internal/arm64/ssa.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,10 @@ var blockJump = map[ssa.BlockKind]struct {
761761
ssa.BlockARM64UGE: {arm64.ABHS, arm64.ABLO},
762762
ssa.BlockARM64UGT: {arm64.ABHI, arm64.ABLS},
763763
ssa.BlockARM64ULE: {arm64.ABLS, arm64.ABHI},
764+
ssa.BlockARM64Z: {arm64.ACBZ, arm64.ACBNZ},
765+
ssa.BlockARM64NZ: {arm64.ACBNZ, arm64.ACBZ},
766+
ssa.BlockARM64ZW: {arm64.ACBZW, arm64.ACBNZW},
767+
ssa.BlockARM64NZW: {arm64.ACBNZW, arm64.ACBZW},
764768
}
765769

766770
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
@@ -807,7 +811,9 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
807811
ssa.BlockARM64LT, ssa.BlockARM64GE,
808812
ssa.BlockARM64LE, ssa.BlockARM64GT,
809813
ssa.BlockARM64ULT, ssa.BlockARM64UGT,
810-
ssa.BlockARM64ULE, ssa.BlockARM64UGE:
814+
ssa.BlockARM64ULE, ssa.BlockARM64UGE,
815+
ssa.BlockARM64Z, ssa.BlockARM64NZ,
816+
ssa.BlockARM64ZW, ssa.BlockARM64NZW:
811817
jmp := blockJump[b.Kind]
812818
var p *obj.Prog
813819
switch next {
@@ -827,6 +833,10 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
827833
q.To.Type = obj.TYPE_BRANCH
828834
s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
829835
}
836+
if !b.Control.Type.IsFlags() {
837+
p.From.Type = obj.TYPE_REG
838+
p.From.Reg = b.Control.Reg()
839+
}
830840

831841
default:
832842
b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())

src/cmd/compile/internal/ssa/gen/ARM64.rules

+25-11
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@
476476
(If (GreaterEqual cc) yes no) -> (GE cc yes no)
477477
(If (GreaterEqualU cc) yes no) -> (UGE cc yes no)
478478

479-
(If cond yes no) -> (NE (CMPconst [0] cond) yes no)
479+
(If cond yes no) -> (NZ cond yes no)
480480

481481
// atomic intrinsics
482482
// Note: these ops do not accept offset.
@@ -503,16 +503,21 @@
503503
// Optimizations
504504

505505
// Absorb boolean tests into block
506-
(NE (CMPconst [0] (Equal cc)) yes no) -> (EQ cc yes no)
507-
(NE (CMPconst [0] (NotEqual cc)) yes no) -> (NE cc yes no)
508-
(NE (CMPconst [0] (LessThan cc)) yes no) -> (LT cc yes no)
509-
(NE (CMPconst [0] (LessThanU cc)) yes no) -> (ULT cc yes no)
510-
(NE (CMPconst [0] (LessEqual cc)) yes no) -> (LE cc yes no)
511-
(NE (CMPconst [0] (LessEqualU cc)) yes no) -> (ULE cc yes no)
512-
(NE (CMPconst [0] (GreaterThan cc)) yes no) -> (GT cc yes no)
513-
(NE (CMPconst [0] (GreaterThanU cc)) yes no) -> (UGT cc yes no)
514-
(NE (CMPconst [0] (GreaterEqual cc)) yes no) -> (GE cc yes no)
515-
(NE (CMPconst [0] (GreaterEqualU cc)) yes no) -> (UGE cc yes no)
506+
(NZ (Equal cc) yes no) -> (EQ cc yes no)
507+
(NZ (NotEqual cc) yes no) -> (NE cc yes no)
508+
(NZ (LessThan cc) yes no) -> (LT cc yes no)
509+
(NZ (LessThanU cc) yes no) -> (ULT cc yes no)
510+
(NZ (LessEqual cc) yes no) -> (LE cc yes no)
511+
(NZ (LessEqualU cc) yes no) -> (ULE cc yes no)
512+
(NZ (GreaterThan cc) yes no) -> (GT cc yes no)
513+
(NZ (GreaterThanU cc) yes no) -> (UGT cc yes no)
514+
(NZ (GreaterEqual cc) yes no) -> (GE cc yes no)
515+
(NZ (GreaterEqualU cc) yes no) -> (UGE cc yes no)
516+
517+
(EQ (CMPconst [0] x) yes no) -> (Z x yes no)
518+
(NE (CMPconst [0] x) yes no) -> (NZ x yes no)
519+
(EQ (CMPWconst [0] x) yes no) -> (ZW x yes no)
520+
(NE (CMPWconst [0] x) yes no) -> (NZW x yes no)
516521

517522
// fold offset into address
518523
(ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) -> (MOVDaddr [off1+off2] {sym} ptr)
@@ -925,6 +930,15 @@
925930
(UGE (FlagGT_ULT) yes no) -> (First nil no yes)
926931
(UGE (FlagGT_UGT) yes no) -> (First nil yes no)
927932

933+
(Z (MOVDconst [0]) yes no) -> (First nil yes no)
934+
(Z (MOVDconst [c]) yes no) && c != 0 -> (First nil no yes)
935+
(NZ (MOVDconst [0]) yes no) -> (First nil no yes)
936+
(NZ (MOVDconst [c]) yes no) && c != 0 -> (First nil yes no)
937+
(ZW (MOVDconst [c]) yes no) && int32(c) == 0 -> (First nil yes no)
938+
(ZW (MOVDconst [c]) yes no) && int32(c) != 0 -> (First nil no yes)
939+
(NZW (MOVDconst [c]) yes no) && int32(c) == 0 -> (First nil no yes)
940+
(NZW (MOVDconst [c]) yes no) && int32(c) != 0 -> (First nil yes no)
941+
928942
// absorb InvertFlags into branches
929943
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
930944
(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)

src/cmd/compile/internal/ssa/gen/ARM64Ops.go

+4
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,10 @@ func init() {
514514
{name: "ULE"},
515515
{name: "UGT"},
516516
{name: "UGE"},
517+
{name: "Z"}, // Control == 0 (take a register instead of flags)
518+
{name: "NZ"}, // Control != 0
519+
{name: "ZW"}, // Control == 0, 32-bit
520+
{name: "NZW"}, // Control != 0, 32-bit
517521
}
518522

519523
archs = append(archs, arch{

src/cmd/compile/internal/ssa/opGen.go

+8
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ const (
6767
BlockARM64ULE
6868
BlockARM64UGT
6969
BlockARM64UGE
70+
BlockARM64Z
71+
BlockARM64NZ
72+
BlockARM64ZW
73+
BlockARM64NZW
7074

7175
BlockMIPS64EQ
7276
BlockMIPS64NE
@@ -160,6 +164,10 @@ var blockString = [...]string{
160164
BlockARM64ULE: "ULE",
161165
BlockARM64UGT: "UGT",
162166
BlockARM64UGE: "UGE",
167+
BlockARM64Z: "Z",
168+
BlockARM64NZ: "NZ",
169+
BlockARM64ZW: "ZW",
170+
BlockARM64NZW: "NZW",
163171

164172
BlockMIPS64EQ: "EQ",
165173
BlockMIPS64NE: "NE",

0 commit comments

Comments
 (0)