Skip to content

Commit 144e0b1

Browse files
author
Martin Möhrmann
committed
cmd/compile: add MOVOstoreconst with offset folding on amd64
Replace MOVOstorezero with new MOVOstoreconst. MOVOstoreconst has similar address folding rules then other MOVstoreconst operations but only supports zero as store value. Currently only MOVO stores with zero values are generated. Using MOVOstoreconst with SymValAndOff aux has the advantage that we can just add one more MOVstoreconst variant to the existing rules. The main effect of this CL is converting 16 byte zeroing of a value on the stack from LEAQ+MOVUPS to just MOVUPS which reduces binary size. old: LEAQ 0x20(SP), DX MOVUPS X15, 0(DX) new: MOVUPS X15, 0x20(SP) file before after Δ % addr2line 3661568 3657472 -4096 -0.112% asm 4566432 4562336 -4096 -0.090% cgo 4305456 4301360 -4096 -0.095% compile 22878528 22874512 -4016 -0.018% cover 4517952 4513856 -4096 -0.091% link 6287248 6283152 -4096 -0.065% nm 3640768 3636672 -4096 -0.113% objdump 4010592 4006496 -4096 -0.102% pack 2188224 2184128 -4096 -0.187% pprof 13429504 13421312 -8192 -0.061% trace 10143968 10135776 -8192 -0.081% vet 6868864 6864768 -4096 -0.060% Change-Id: I08f5dd5ab9251448a4572d3ddd1e0c8cd417f5e3 Reviewed-on: https://go-review.googlesource.com/c/go/+/346249 Run-TryBot: Martin Möhrmann <[email protected]> TryBot-Result: Go Bot <[email protected]> Reviewed-by: Keith Randall <[email protected]> Trust: Martin Möhrmann <[email protected]>
1 parent f27d6a2 commit 144e0b1

File tree

5 files changed

+235
-106
lines changed

5 files changed

+235
-106
lines changed

src/cmd/compile/internal/amd64/ssa.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
822822
p.To.Type = obj.TYPE_MEM
823823
p.To.Reg = v.Args[0].Reg()
824824
ssagen.AddAux2(&p.To, v, sc.Off64())
825-
case ssa.OpAMD64MOVOstorezero:
825+
case ssa.OpAMD64MOVOstoreconst:
826+
sc := v.AuxValAndOff()
827+
if sc.Val() != 0 {
828+
v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString())
829+
}
830+
826831
if s.ABI != obj.ABIInternal {
827832
// zero X15 manually
828833
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
@@ -832,7 +837,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
832837
p.From.Reg = x86.REG_X15
833838
p.To.Type = obj.TYPE_MEM
834839
p.To.Reg = v.Args[0].Reg()
835-
ssagen.AddAux(&p.To, v)
840+
ssagen.AddAux2(&p.To, v, sc.Off64())
841+
836842
case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
837843
ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
838844
ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,

src/cmd/compile/internal/ssa/gen/AMD64.rules

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -362,26 +362,26 @@
362362
// Adjust zeros to be a multiple of 16 bytes.
363363
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE =>
364364
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
365-
(MOVOstorezero destptr mem))
365+
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
366366

367367
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE =>
368368
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
369-
(MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
369+
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
370370

371371
(Zero [16] destptr mem) && config.useSSE =>
372-
(MOVOstorezero destptr mem)
372+
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem)
373373
(Zero [32] destptr mem) && config.useSSE =>
374-
(MOVOstorezero (OffPtr <destptr.Type> destptr [16])
375-
(MOVOstorezero destptr mem))
374+
(MOVOstoreconst [makeValAndOff(0,16)] destptr
375+
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
376376
(Zero [48] destptr mem) && config.useSSE =>
377-
(MOVOstorezero (OffPtr <destptr.Type> destptr [32])
378-
(MOVOstorezero (OffPtr <destptr.Type> destptr [16])
379-
(MOVOstorezero destptr mem)))
377+
(MOVOstoreconst [makeValAndOff(0,32)] destptr
378+
(MOVOstoreconst [makeValAndOff(0,16)] destptr
379+
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem)))
380380
(Zero [64] destptr mem) && config.useSSE =>
381-
(MOVOstorezero (OffPtr <destptr.Type> destptr [48])
382-
(MOVOstorezero (OffPtr <destptr.Type> destptr [32])
383-
(MOVOstorezero (OffPtr <destptr.Type> destptr [16])
384-
(MOVOstorezero destptr mem))))
381+
(MOVOstoreconst [makeValAndOff(0,48)] destptr
382+
(MOVOstoreconst [makeValAndOff(0,32)] destptr
383+
(MOVOstoreconst [makeValAndOff(0,16)] destptr
384+
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))))
385385

386386
// Medium zeroing uses a duff device.
387387
(Zero [s] destptr mem)
@@ -1134,8 +1134,8 @@
11341134
(MOVBstoreconst [makeValAndOff(int32(int8(c)),off)] {sym} ptr mem)
11351135

11361136
// Fold address offsets into constant stores.
1137-
(MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd32(off) =>
1138-
(MOV(Q|L|W|B)storeconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
1137+
(MOV(Q|L|W|B|O)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd32(off) =>
1138+
(MOV(Q|L|W|B|O)storeconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
11391139

11401140
// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
11411141
// what variables are being read/written by the ops.
@@ -1145,8 +1145,8 @@
11451145
(MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
11461146
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
11471147
(MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
1148-
(MOV(Q|L|W|B)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) =>
1149-
(MOV(Q|L|W|B)storeconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
1148+
(MOV(Q|L|W|B|O)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) =>
1149+
(MOV(Q|L|W|B|O)storeconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
11501150
(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
11511151
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
11521152
(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
@@ -1897,14 +1897,22 @@
18971897
&& a.Off() + 4 == c.Off()
18981898
&& clobber(x)
18991899
=> (MOVQstore [a.Off()] {s} p (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem)
1900-
(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
1900+
(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [a] {s} p mem))
1901+
&& config.useSSE
1902+
&& x.Uses == 1
1903+
&& a.Off() + 8 == c.Off()
1904+
&& a.Val() == 0
1905+
&& c.Val() == 0
1906+
&& clobber(x)
1907+
=> (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p mem)
1908+
(MOVQstoreconst [a] {s} p x:(MOVQstoreconst [c] {s} p mem))
19011909
&& config.useSSE
19021910
&& x.Uses == 1
1903-
&& c2.Off() + 8 == c.Off()
1911+
&& a.Off() + 8 == c.Off()
1912+
&& a.Val() == 0
19041913
&& c.Val() == 0
1905-
&& c2.Val() == 0
19061914
&& clobber(x)
1907-
=> (MOVOstorezero [c2.Off()] {s} p mem)
1915+
=> (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p mem)
19081916

19091917
// Combine stores into larger (unaligned) stores. Little endian.
19101918
(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
@@ -2031,6 +2039,8 @@
20312039
(MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
20322040
(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
20332041

2042+
(MOVOstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
2043+
(MOVOstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
20342044
(MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
20352045
(MOVQstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
20362046
(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
@@ -2056,6 +2066,8 @@
20562066
(MOVWstoreconst [sc.addOffset32(off)] {s} ptr mem)
20572067
(MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && sc.canAdd32(off) =>
20582068
(MOVBstoreconst [sc.addOffset32(off)] {s} ptr mem)
2069+
(MOVOstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && sc.canAdd32(off) =>
2070+
(MOVOstoreconst [sc.addOffset32(off)] {s} ptr mem)
20592071

20602072
// Merge load and op
20612073
// TODO: add indexed variants?

src/cmd/compile/internal/ssa/gen/AMD64Ops.go

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -679,20 +679,19 @@ func init() {
679679
// Note: LEAx{1,2,4,8} must not have OpSB as either argument.
680680

681681
// auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address
682-
{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load byte from arg0+auxint+aux. arg1=mem. Zero extend.
683-
{name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
684-
{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Zero extend.
685-
{name: "MOVWQSXload", argLength: 2, reg: gpload, asm: "MOVWQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
686-
{name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Zero extend.
687-
{name: "MOVLQSXload", argLength: 2, reg: gpload, asm: "MOVLQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
688-
{name: "MOVQload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem
689-
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
690-
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
691-
{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
692-
{name: "MOVQstore", argLength: 3, reg: gpstore, asm: "MOVQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
693-
{name: "MOVOload", argLength: 2, reg: fpload, asm: "MOVUPS", aux: "SymOff", typ: "Int128", faultOnNilArg0: true, symEffect: "Read"}, // load 16 bytes from arg0+auxint+aux. arg1=mem
694-
{name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
695-
{name: "MOVOstorezero", argLength: 2, reg: regInfo{inputs: []regMask{gpspsb, 0}}, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0+auxint+aux. arg1=mem
682+
{name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8", faultOnNilArg0: true, symEffect: "Read"}, // load byte from arg0+auxint+aux. arg1=mem. Zero extend.
683+
{name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
684+
{name: "MOVWload", argLength: 2, reg: gpload, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes from arg0+auxint+aux. arg1=mem. Zero extend.
685+
{name: "MOVWQSXload", argLength: 2, reg: gpload, asm: "MOVWQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
686+
{name: "MOVLload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff", typ: "UInt32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes from arg0+auxint+aux. arg1=mem. Zero extend.
687+
{name: "MOVLQSXload", argLength: 2, reg: gpload, asm: "MOVLQSX", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // ditto, sign extend to int64
688+
{name: "MOVQload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes from arg0+auxint+aux. arg1=mem
689+
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
690+
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
691+
{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
692+
{name: "MOVQstore", argLength: 3, reg: gpstore, asm: "MOVQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
693+
{name: "MOVOload", argLength: 2, reg: fpload, asm: "MOVUPS", aux: "SymOff", typ: "Int128", faultOnNilArg0: true, symEffect: "Read"}, // load 16 bytes from arg0+auxint+aux. arg1=mem
694+
{name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
696695

697696
// indexed loads/stores
698697
{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", scale: 1, aux: "SymOff", typ: "UInt8", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
@@ -717,10 +716,11 @@ func init() {
717716
// For storeconst ops, the AuxInt field encodes both
718717
// the value to store and an address offset of the store.
719718
// Cast AuxInt to a ValAndOff to extract Val and Off fields.
720-
{name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux. arg1=mem
721-
{name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 2 bytes of ...
722-
{name: "MOVLstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVL", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 4 bytes of ...
723-
{name: "MOVQstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVQ", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of ...
719+
{name: "MOVBstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVB", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux. arg1=mem
720+
{name: "MOVWstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVW", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 2 bytes of ...
721+
{name: "MOVLstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVL", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store low 4 bytes of ...
722+
{name: "MOVQstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVQ", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of ...
723+
{name: "MOVOstoreconst", argLength: 2, reg: gpstoreconst, asm: "MOVUPS", aux: "SymValAndOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of ...
724724

725725
{name: "MOVBstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVB", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low byte of ValAndOff(AuxInt).Val() to arg0+1*arg1+ValAndOff(AuxInt).Off()+aux. arg2=mem
726726
{name: "MOVWstoreconstidx1", argLength: 3, reg: gpstoreconstidx, commutative: true, asm: "MOVW", scale: 1, aux: "SymValAndOff", typ: "Mem", symEffect: "Write"}, // store low 2 bytes of ... arg1 ...

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 14 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)