Skip to content

Commit 4b00d3f

Browse files
committed
cmd/compile: implement comparisons directly with memory
Allow the compiler to generate code like CMPQ 16(AX), $7 It's tricky because it's difficult to spill such a comparison during flagalloc, because the same memory state might not be available at the restore locations. Solve this problem by decomposing the compare+load back into its parts if it needs to be spilled. The big win is that the write barrier test goes from: MOVL runtime.writeBarrier(SB), CX TESTL CX, CX JNE 60 to CMPL runtime.writeBarrier(SB), $0 JNE 59 It's one instruction and one byte smaller. Fixes #19485 Fixes #15245 Update #22460 Binaries are about 0.15% smaller. Change-Id: I4fd8d1111b6b9924d52f9a0901ca1b2e5cce0836 Reviewed-on: https://go-review.googlesource.com/86035 Reviewed-by: Cherry Zhang <[email protected]> Reviewed-by: Ilya Tocar <[email protected]>
1 parent 3067376 commit 4b00d3f

File tree

9 files changed

+1029
-16
lines changed

9 files changed

+1029
-16
lines changed

src/cmd/compile/internal/amd64/ssa.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,21 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
500500
p.From.Offset = v.AuxInt
501501
p.To.Type = obj.TYPE_REG
502502
p.To.Reg = v.Args[0].Reg()
503+
case ssa.OpAMD64CMPQmem, ssa.OpAMD64CMPLmem, ssa.OpAMD64CMPWmem, ssa.OpAMD64CMPBmem:
504+
p := s.Prog(v.Op.Asm())
505+
p.From.Type = obj.TYPE_MEM
506+
p.From.Reg = v.Args[0].Reg()
507+
gc.AddAux(&p.From, v)
508+
p.To.Type = obj.TYPE_REG
509+
p.To.Reg = v.Args[1].Reg()
510+
case ssa.OpAMD64CMPQconstmem, ssa.OpAMD64CMPLconstmem, ssa.OpAMD64CMPWconstmem, ssa.OpAMD64CMPBconstmem:
511+
sc := v.AuxValAndOff()
512+
p := s.Prog(v.Op.Asm())
513+
p.From.Type = obj.TYPE_MEM
514+
p.From.Reg = v.Args[0].Reg()
515+
gc.AddAux2(&p.From, v, sc.Off())
516+
p.To.Type = obj.TYPE_CONST
517+
p.To.Offset = sc.Val()
503518
case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
504519
x := v.Reg()
505520

src/cmd/compile/internal/gc/asm_test.go

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -923,14 +923,14 @@ var linuxAMD64Tests = []*asmTest{
923923
func f65(a string) bool {
924924
return a == "xx"
925925
}`,
926-
pos: []string{"\tCMPW\t[A-Z]"},
926+
pos: []string{"\tCMPW\t\\(.*\\), [$]"},
927927
},
928928
{
929929
fn: `
930930
func f66(a string) bool {
931931
return a == "xxxx"
932932
}`,
933-
pos: []string{"\tCMPL\t[A-Z]"},
933+
pos: []string{"\tCMPL\t\\(.*\\), [$]"},
934934
},
935935
{
936936
fn: `
@@ -1002,42 +1002,51 @@ var linuxAMD64Tests = []*asmTest{
10021002
func f68(a,b [2]byte) bool {
10031003
return a == b
10041004
}`,
1005-
pos: []string{"\tCMPW\t[A-Z]"},
1005+
pos: []string{"\tCMPW\t\"\"[.+_a-z0-9]+\\(SP\\), [A-Z]"},
10061006
},
10071007
{
10081008
fn: `
10091009
func f69(a,b [3]uint16) bool {
10101010
return a == b
10111011
}`,
1012-
pos: []string{"\tCMPL\t[A-Z]"},
1012+
pos: []string{
1013+
"\tCMPL\t\"\"[.+_a-z0-9]+\\(SP\\), [A-Z]",
1014+
"\tCMPW\t\"\"[.+_a-z0-9]+\\(SP\\), [A-Z]",
1015+
},
10131016
},
10141017
{
10151018
fn: `
10161019
func $(a,b [3]int16) bool {
10171020
return a == b
10181021
}`,
1019-
pos: []string{"\tCMPL\t[A-Z]"},
1022+
pos: []string{
1023+
"\tCMPL\t\"\"[.+_a-z0-9]+\\(SP\\), [A-Z]",
1024+
"\tCMPW\t\"\"[.+_a-z0-9]+\\(SP\\), [A-Z]",
1025+
},
10201026
},
10211027
{
10221028
fn: `
10231029
func $(a,b [12]int8) bool {
10241030
return a == b
10251031
}`,
1026-
pos: []string{"\tCMPQ\t[A-Z]", "\tCMPL\t[A-Z]"},
1032+
pos: []string{
1033+
"\tCMPQ\t\"\"[.+_a-z0-9]+\\(SP\\), [A-Z]",
1034+
"\tCMPL\t\"\"[.+_a-z0-9]+\\(SP\\), [A-Z]",
1035+
},
10271036
},
10281037
{
10291038
fn: `
10301039
func f70(a,b [15]byte) bool {
10311040
return a == b
10321041
}`,
1033-
pos: []string{"\tCMPQ\t[A-Z]"},
1042+
pos: []string{"\tCMPQ\t\"\"[.+_a-z0-9]+\\(SP\\), [A-Z]"},
10341043
},
10351044
{
10361045
fn: `
10371046
func f71(a,b unsafe.Pointer) bool { // This was a TODO in mapaccess1_faststr
10381047
return *((*[4]byte)(a)) != *((*[4]byte)(b))
10391048
}`,
1040-
pos: []string{"\tCMPL\t[A-Z]"},
1049+
pos: []string{"\tCMPL\t\\(.*\\), [A-Z]"},
10411050
},
10421051
{
10431052
// make sure assembly output has matching offset and base register.
@@ -1767,6 +1776,46 @@ var linuxAMD64Tests = []*asmTest{
17671776
`,
17681777
neg: []string{"TESTB"},
17691778
},
1779+
{
1780+
fn: `
1781+
func $(p int, q *int) bool {
1782+
return p < *q
1783+
}
1784+
`,
1785+
pos: []string{"CMPQ\t\\(.*\\), [A-Z]"},
1786+
},
1787+
{
1788+
fn: `
1789+
func $(p *int, q int) bool {
1790+
return *p < q
1791+
}
1792+
`,
1793+
pos: []string{"CMPQ\t\\(.*\\), [A-Z]"},
1794+
},
1795+
{
1796+
fn: `
1797+
func $(p *int) bool {
1798+
return *p < 7
1799+
}
1800+
`,
1801+
pos: []string{"CMPQ\t\\(.*\\), [$]7"},
1802+
},
1803+
{
1804+
fn: `
1805+
func $(p *int) bool {
1806+
return 7 < *p
1807+
}
1808+
`,
1809+
pos: []string{"CMPQ\t\\(.*\\), [$]7"},
1810+
},
1811+
{
1812+
fn: `
1813+
func $(p **int) {
1814+
*p = nil
1815+
}
1816+
`,
1817+
pos: []string{"CMPL\truntime.writeBarrier\\(SB\\), [$]0"},
1818+
},
17701819
}
17711820

17721821
var linux386Tests = []*asmTest{

src/cmd/compile/internal/ssa/flagalloc.go

Lines changed: 103 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,47 @@ func flagalloc(f *Func) {
5959
}
6060
}
6161

62-
// Add flag recomputations where they are needed.
62+
// Compute which flags values will need to be spilled.
63+
spill := map[ID]bool{}
64+
for _, b := range f.Blocks {
65+
var flag *Value
66+
if len(b.Preds) > 0 {
67+
flag = end[b.Preds[0].b.ID]
68+
}
69+
for _, v := range b.Values {
70+
for _, a := range v.Args {
71+
if !a.Type.IsFlags() {
72+
continue
73+
}
74+
if a == flag {
75+
continue
76+
}
77+
// a will need to be restored here.
78+
spill[a.ID] = true
79+
flag = a
80+
}
81+
if v.clobbersFlags() {
82+
flag = nil
83+
}
84+
if v.Type.IsFlags() {
85+
flag = v
86+
}
87+
}
88+
if v := b.Control; v != nil && v != flag && v.Type.IsFlags() {
89+
spill[v.ID] = true
90+
}
91+
if v := end[b.ID]; v != nil && v != flag {
92+
spill[v.ID] = true
93+
}
94+
}
95+
96+
// Add flag spill and recomputation where they are needed.
6397
// TODO: Remove original instructions if they are never used.
6498
var oldSched []*Value
6599
for _, b := range f.Blocks {
66100
oldSched = append(oldSched[:0], b.Values...)
67101
b.Values = b.Values[:0]
68-
// The current live flag value the pre-flagalloc copy).
102+
// The current live flag value (the pre-flagalloc copy).
69103
var flag *Value
70104
if len(b.Preds) > 0 {
71105
flag = end[b.Preds[0].b.ID]
@@ -81,6 +115,72 @@ func flagalloc(f *Func) {
81115
if v.Op == OpPhi && v.Type.IsFlags() {
82116
f.Fatalf("phi of flags not supported: %s", v.LongString())
83117
}
118+
119+
// If v will be spilled, and v uses memory, then we must split it
120+
// into a load + a flag generator.
121+
// TODO: figure out how to do this without arch-dependent code.
122+
if spill[v.ID] && v.MemoryArg() != nil {
123+
switch v.Op {
124+
case OpAMD64CMPQmem:
125+
load := b.NewValue2IA(v.Pos, OpAMD64MOVQload, f.Config.Types.UInt64, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
126+
v.Op = OpAMD64CMPQ
127+
v.AuxInt = 0
128+
v.Aux = nil
129+
v.SetArgs2(load, v.Args[1])
130+
case OpAMD64CMPLmem:
131+
load := b.NewValue2IA(v.Pos, OpAMD64MOVLload, f.Config.Types.UInt32, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
132+
v.Op = OpAMD64CMPL
133+
v.AuxInt = 0
134+
v.Aux = nil
135+
v.SetArgs2(load, v.Args[1])
136+
case OpAMD64CMPWmem:
137+
load := b.NewValue2IA(v.Pos, OpAMD64MOVWload, f.Config.Types.UInt16, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
138+
v.Op = OpAMD64CMPW
139+
v.AuxInt = 0
140+
v.Aux = nil
141+
v.SetArgs2(load, v.Args[1])
142+
case OpAMD64CMPBmem:
143+
load := b.NewValue2IA(v.Pos, OpAMD64MOVBload, f.Config.Types.UInt8, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
144+
v.Op = OpAMD64CMPB
145+
v.AuxInt = 0
146+
v.Aux = nil
147+
v.SetArgs2(load, v.Args[1])
148+
149+
case OpAMD64CMPQconstmem:
150+
vo := v.AuxValAndOff()
151+
load := b.NewValue2IA(v.Pos, OpAMD64MOVQload, f.Config.Types.UInt64, vo.Off(), v.Aux, v.Args[0], v.Args[1])
152+
v.Op = OpAMD64CMPQconst
153+
v.AuxInt = vo.Val()
154+
v.Aux = nil
155+
v.SetArgs1(load)
156+
case OpAMD64CMPLconstmem:
157+
vo := v.AuxValAndOff()
158+
load := b.NewValue2IA(v.Pos, OpAMD64MOVLload, f.Config.Types.UInt32, vo.Off(), v.Aux, v.Args[0], v.Args[1])
159+
v.Op = OpAMD64CMPLconst
160+
v.AuxInt = vo.Val()
161+
v.Aux = nil
162+
v.SetArgs1(load)
163+
case OpAMD64CMPWconstmem:
164+
vo := v.AuxValAndOff()
165+
load := b.NewValue2IA(v.Pos, OpAMD64MOVWload, f.Config.Types.UInt16, vo.Off(), v.Aux, v.Args[0], v.Args[1])
166+
v.Op = OpAMD64CMPWconst
167+
v.AuxInt = vo.Val()
168+
v.Aux = nil
169+
v.SetArgs1(load)
170+
case OpAMD64CMPBconstmem:
171+
vo := v.AuxValAndOff()
172+
load := b.NewValue2IA(v.Pos, OpAMD64MOVBload, f.Config.Types.UInt8, vo.Off(), v.Aux, v.Args[0], v.Args[1])
173+
v.Op = OpAMD64CMPBconst
174+
v.AuxInt = vo.Val()
175+
v.Aux = nil
176+
v.SetArgs1(load)
177+
178+
default:
179+
f.Fatalf("can't split flag generator: %s", v.LongString())
180+
}
181+
182+
}
183+
84184
// Make sure any flag arg of v is in the flags register.
85185
// If not, recompute it.
86186
for i, a := range v.Args {
@@ -108,7 +208,7 @@ func flagalloc(f *Func) {
108208
}
109209
if v := b.Control; v != nil && v != flag && v.Type.IsFlags() {
110210
// Recalculate control value.
111-
c := v.copyInto(b)
211+
c := copyFlags(v, b)
112212
b.SetControl(c)
113213
flag = v
114214
}

src/cmd/compile/internal/ssa/func.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,19 @@ func (b *Block) NewValue2I(pos src.XPos, op Op, t *types.Type, auxint int64, arg
350350
return v
351351
}
352352

353+
// NewValue2IA returns a new value in the block with two arguments and both an auxint and aux values.
354+
func (b *Block) NewValue2IA(pos src.XPos, op Op, t *types.Type, auxint int64, aux interface{}, arg0, arg1 *Value) *Value {
355+
v := b.Func.newValue(op, t, b, pos)
356+
v.AuxInt = auxint
357+
v.Aux = aux
358+
v.Args = v.argstorage[:2]
359+
v.argstorage[0] = arg0
360+
v.argstorage[1] = arg1
361+
arg0.Uses++
362+
arg1.Uses++
363+
return v
364+
}
365+
353366
// NewValue3 returns a new value in the block with three arguments and zero aux values.
354367
func (b *Block) NewValue3(pos src.XPos, op Op, t *types.Type, arg0, arg1, arg2 *Value) *Value {
355368
v := b.Func.newValue(op, t, b, pos)

src/cmd/compile/internal/ssa/gen/AMD64.rules

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2284,3 +2284,26 @@
22842284
// LEAQ is rematerializeable, so this helps to avoid register spill.
22852285
// See isuue 22947 for details
22862286
(ADDQconst [off] x:(SP)) -> (LEAQ [off] x)
2287+
2288+
// Fold loads into compares
2289+
// Note: these may be undone by the flagalloc pass.
2290+
(CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (CMP(Q|L|W|B)mem {sym} [off] ptr x mem)
2291+
(CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (InvertFlags (CMP(Q|L|W|B)mem {sym} [off] ptr x mem))
2292+
2293+
(CMP(Q|L|W|B)const l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) [c])
2294+
&& l.Uses == 1
2295+
&& validValAndOff(c, off)
2296+
&& clobber(l) ->
2297+
@l.Block (CMP(Q|L|W|B)constmem {sym} [makeValAndOff(c,off)] ptr mem)
2298+
2299+
(CMPQmem {sym} [off] ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
2300+
(CMPLmem {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(c,off) -> (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
2301+
(CMPWmem {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),off) -> (CMPWconstmem {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
2302+
(CMPBmem {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstmem {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
2303+
2304+
(TEST(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) l2)
2305+
&& l == l2
2306+
&& l.Uses == 2
2307+
&& validValAndOff(0,off)
2308+
&& clobber(l) ->
2309+
@l.Block (CMP(Q|L|W|B)constmem {sym} [makeValAndOff(0,off)] ptr mem)

src/cmd/compile/internal/ssa/gen/AMD64Ops.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,11 @@ func init() {
118118
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax, dx}}
119119
gp21hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
120120

121-
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}}
122-
gp1flags = regInfo{inputs: []regMask{gpsp}}
123-
flagsgp = regInfo{inputs: nil, outputs: gponly}
121+
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}}
122+
gp1flags = regInfo{inputs: []regMask{gpsp}}
123+
gp0flagsLoad = regInfo{inputs: []regMask{gpspsb, 0}}
124+
gp1flagsLoad = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
125+
flagsgp = regInfo{inputs: nil, outputs: gponly}
124126

125127
gp11flags = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
126128

@@ -246,6 +248,18 @@ func init() {
246248
{name: "CMPWconst", argLength: 1, reg: gp1flags, asm: "CMPW", typ: "Flags", aux: "Int16"}, // arg0 compare to auxint
247249
{name: "CMPBconst", argLength: 1, reg: gp1flags, asm: "CMPB", typ: "Flags", aux: "Int8"}, // arg0 compare to auxint
248250

251+
// compare *(arg0+auxint+aux) to arg1 (in that order). arg2=mem.
252+
{name: "CMPQmem", argLength: 3, reg: gp1flagsLoad, asm: "CMPQ", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
253+
{name: "CMPLmem", argLength: 3, reg: gp1flagsLoad, asm: "CMPL", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
254+
{name: "CMPWmem", argLength: 3, reg: gp1flagsLoad, asm: "CMPW", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
255+
{name: "CMPBmem", argLength: 3, reg: gp1flagsLoad, asm: "CMPB", aux: "SymOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
256+
257+
// compare *(arg0+ValAndOff(AuxInt).Off()+aux) to ValAndOff(AuxInt).Val() (in that order). arg1=mem.
258+
{name: "CMPQconstmem", argLength: 2, reg: gp0flagsLoad, asm: "CMPQ", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
259+
{name: "CMPLconstmem", argLength: 2, reg: gp0flagsLoad, asm: "CMPL", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
260+
{name: "CMPWconstmem", argLength: 2, reg: gp0flagsLoad, asm: "CMPW", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
261+
{name: "CMPBconstmem", argLength: 2, reg: gp0flagsLoad, asm: "CMPB", aux: "SymValAndOff", typ: "Flags", symEffect: "Read", faultOnNilArg0: true},
262+
249263
{name: "UCOMISS", argLength: 2, reg: fp2flags, asm: "UCOMISS", typ: "Flags"}, // arg0 compare to arg1, f32
250264
{name: "UCOMISD", argLength: 2, reg: fp2flags, asm: "UCOMISD", typ: "Flags"}, // arg0 compare to arg1, f64
251265

0 commit comments

Comments
 (0)