Skip to content

Commit b548eee

Browse files
committed
cmd/compile: fix load-combining rules
CL 33632 reorders args of commutative ops in order to make CSE for commutative ops more robust. Unfortunately, that broke the load-combining rules which depend on a certain ordering of OR ops' arguments. Introduce some additional rules that order OR ops' arguments consistently so that the load-combining rules fire. Note: there's also something else wrong with the s390x rules. I've filed #19059 for that. Fixes #18946 Change-Id: I0a5447196bd88a55ccee683c69a57b943a9972e1 Reviewed-on: https://go-review.googlesource.com/36911 Run-TryBot: Keith Randall <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Josh Bleecher Snyder <[email protected]>
1 parent 76b4b8c commit b548eee

File tree

7 files changed

+129
-0
lines changed

7 files changed

+129
-0
lines changed

src/cmd/compile/internal/gc/asm_test.go

+33
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,39 @@ func f(b []byte, i int) uint32 {
221221
`,
222222
[]string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"},
223223
},
224+
{"s390x", "linux", `
225+
import "encoding/binary"
226+
func f(b []byte) uint32 {
227+
return binary.LittleEndian.Uint32(b)
228+
}
229+
`,
230+
[]string{"\tMOVWZ\t\\(.*\\),"},
231+
},
232+
{"s390x", "linux", `
233+
import "encoding/binary"
234+
func f(b []byte, i int) uint32 {
235+
return binary.LittleEndian.Uint32(b[i:])
236+
}
237+
`,
238+
[]string{"\tMOVWZ\t\\(.*\\)\\(.*\\*1\\),"},
239+
},
240+
{"s390x", "linux", `
241+
import "encoding/binary"
242+
func f(b []byte) uint64 {
243+
return binary.LittleEndian.Uint64(b)
244+
}
245+
`,
246+
[]string{"\tMOVD\t\\(.*\\),"},
247+
},
248+
{"s390x", "linux", `
249+
import "encoding/binary"
250+
func f(b []byte, i int) uint64 {
251+
return binary.LittleEndian.Uint64(b[i:])
252+
}
253+
`,
254+
[]string{"\tMOVD\t\\(.*\\)\\(.*\\*1\\),"},
255+
},
256+
// TODO: s390x big-endian tests.
224257

225258
// Structure zeroing. See issue #18370.
226259
{"amd64", "linux", `

src/cmd/compile/internal/ssa/gen/386.rules

+3
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,9 @@
11311131
(CMPWconst x [0]) -> (TESTW x x)
11321132
(CMPBconst x [0]) -> (TESTB x x)
11331133

1134+
// Move shifts to second argument of ORs. Helps load combining rules below.
1135+
(ORL x:(SHLLconst _) y) && y.Op != Op386SHLLconst -> (ORL y x)
1136+
11341137
// Combining byte loads into larger (unaligned) loads.
11351138
// There are many ways these combinations could occur. This is
11361139
// designed to match the way encoding/binary.LittleEndian does it.

src/cmd/compile/internal/ssa/gen/AMD64.rules

+4
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,10 @@
13831383
(CMPWconst x [0]) -> (TESTW x x)
13841384
(CMPBconst x [0]) -> (TESTB x x)
13851385

1386+
// Move shifts to second argument of ORs. Helps load combining rules below.
1387+
(ORQ x:(SHLQconst _) y) && y.Op != OpAMD64SHLQconst -> (ORQ y x)
1388+
(ORL x:(SHLLconst _) y) && y.Op != OpAMD64SHLLconst -> (ORL y x)
1389+
13861390
// Combining byte loads into larger (unaligned) loads.
13871391
// There are many ways these combinations could occur. This is
13881392
// designed to match the way encoding/binary.LittleEndian does it.

src/cmd/compile/internal/ssa/gen/S390X.rules

+4
Original file line numberDiff line numberDiff line change
@@ -1266,6 +1266,10 @@
12661266
&& clobber(x)
12671267
-> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
12681268

1269+
// Move shifts to second argument of ORs. Helps load combining rules below.
1270+
(ORW x:(SLWconst _) y) && y.Op != OpS390XSLWconst -> (ORW y x)
1271+
(OR x:(SLDconst _) y) && y.Op != OpS390XSLDconst -> (OR y x)
1272+
12691273
// Combining byte loads into larger (unaligned) loads.
12701274

12711275
// Little endian loads.

src/cmd/compile/internal/ssa/rewrite386.go

+17
Original file line numberDiff line numberDiff line change
@@ -7465,6 +7465,23 @@ func rewriteValue386_Op386ORL(v *Value, config *Config) bool {
74657465
v.AddArg(x)
74667466
return true
74677467
}
7468+
// match: (ORL x:(SHLLconst _) y)
7469+
// cond: y.Op != Op386SHLLconst
7470+
// result: (ORL y x)
7471+
for {
7472+
x := v.Args[0]
7473+
if x.Op != Op386SHLLconst {
7474+
break
7475+
}
7476+
y := v.Args[1]
7477+
if !(y.Op != Op386SHLLconst) {
7478+
break
7479+
}
7480+
v.reset(Op386ORL)
7481+
v.AddArg(y)
7482+
v.AddArg(x)
7483+
return true
7484+
}
74687485
// match: (ORL x0:(MOVBload [i] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
74697486
// cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
74707487
// result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)

src/cmd/compile/internal/ssa/rewriteAMD64.go

+34
Original file line numberDiff line numberDiff line change
@@ -11282,6 +11282,23 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value, config *Config) bool {
1128211282
v.AddArg(x)
1128311283
return true
1128411284
}
11285+
// match: (ORL x:(SHLLconst _) y)
11286+
// cond: y.Op != OpAMD64SHLLconst
11287+
// result: (ORL y x)
11288+
for {
11289+
x := v.Args[0]
11290+
if x.Op != OpAMD64SHLLconst {
11291+
break
11292+
}
11293+
y := v.Args[1]
11294+
if !(y.Op != OpAMD64SHLLconst) {
11295+
break
11296+
}
11297+
v.reset(OpAMD64ORL)
11298+
v.AddArg(y)
11299+
v.AddArg(x)
11300+
return true
11301+
}
1128511302
// match: (ORL x0:(MOVBload [i] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
1128611303
// cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
1128711304
// result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
@@ -11909,6 +11926,23 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value, config *Config) bool {
1190911926
v.AddArg(x)
1191011927
return true
1191111928
}
11929+
// match: (ORQ x:(SHLQconst _) y)
11930+
// cond: y.Op != OpAMD64SHLQconst
11931+
// result: (ORQ y x)
11932+
for {
11933+
x := v.Args[0]
11934+
if x.Op != OpAMD64SHLQconst {
11935+
break
11936+
}
11937+
y := v.Args[1]
11938+
if !(y.Op != OpAMD64SHLQconst) {
11939+
break
11940+
}
11941+
v.reset(OpAMD64ORQ)
11942+
v.AddArg(y)
11943+
v.AddArg(x)
11944+
return true
11945+
}
1191211946
// match: (ORQ o0:(ORQ o1:(ORQ o2:(ORQ o3:(ORQ o4:(ORQ o5:(ORQ x0:(MOVBload [i] {s} p mem) s0:(SHLQconst [8] x1:(MOVBload [i+1] {s} p mem))) s1:(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem))) s2:(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem))) s3:(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem))) s4:(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem))) s5:(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem))) s6:(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem)))
1191311947
// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)
1191411948
// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)

src/cmd/compile/internal/ssa/rewriteS390X.go

+34
Original file line numberDiff line numberDiff line change
@@ -14324,6 +14324,23 @@ func rewriteValueS390X_OpS390XOR(v *Value, config *Config) bool {
1432414324
v.AddArg(mem)
1432514325
return true
1432614326
}
14327+
// match: (OR x:(SLDconst _) y)
14328+
// cond: y.Op != OpS390XSLDconst
14329+
// result: (OR y x)
14330+
for {
14331+
x := v.Args[0]
14332+
if x.Op != OpS390XSLDconst {
14333+
break
14334+
}
14335+
y := v.Args[1]
14336+
if !(y.Op != OpS390XSLDconst) {
14337+
break
14338+
}
14339+
v.reset(OpS390XOR)
14340+
v.AddArg(y)
14341+
v.AddArg(x)
14342+
return true
14343+
}
1432714344
// match: (OR o0:(OR o1:(OR o2:(OR o3:(OR o4:(OR o5:(OR x0:(MOVBZload [i] {s} p mem) s0:(SLDconst [8] x1:(MOVBZload [i+1] {s} p mem))) s1:(SLDconst [16] x2:(MOVBZload [i+2] {s} p mem))) s2:(SLDconst [24] x3:(MOVBZload [i+3] {s} p mem))) s3:(SLDconst [32] x4:(MOVBZload [i+4] {s} p mem))) s4:(SLDconst [40] x5:(MOVBZload [i+5] {s} p mem))) s5:(SLDconst [48] x6:(MOVBZload [i+6] {s} p mem))) s6:(SLDconst [56] x7:(MOVBZload [i+7] {s} p mem)))
1432814345
// cond: p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3) && clobber(s4) && clobber(s5) && clobber(s6) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5)
1432914346
// result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDBRload [i] {s} p mem)
@@ -15412,6 +15429,23 @@ func rewriteValueS390X_OpS390XORW(v *Value, config *Config) bool {
1541215429
v.AddArg(mem)
1541315430
return true
1541415431
}
15432+
// match: (ORW x:(SLWconst _) y)
15433+
// cond: y.Op != OpS390XSLWconst
15434+
// result: (ORW y x)
15435+
for {
15436+
x := v.Args[0]
15437+
if x.Op != OpS390XSLWconst {
15438+
break
15439+
}
15440+
y := v.Args[1]
15441+
if !(y.Op != OpS390XSLWconst) {
15442+
break
15443+
}
15444+
v.reset(OpS390XORW)
15445+
v.AddArg(y)
15446+
v.AddArg(x)
15447+
return true
15448+
}
1541515449
// match: (ORW x0:(MOVBZload [i] {s} p mem) s0:(SLWconst [8] x1:(MOVBZload [i+1] {s} p mem)))
1541615450
// cond: p.Op != OpSB && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
1541715451
// result: @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i] {s} p mem))

0 commit comments

Comments
 (0)