Skip to content

Commit d99cee7

Browse files
committed
[dev.ssa] cmd/compile, etc.: more ARM64 optimizations, and enable SSA by default
Add more ARM64 optimizations: - use hardware zero register when it is possible. - use shifted ops. The assembler supports shifted ops but not documented, nor knows how to print it. This CL adds them. - enable fast division. This was disabled because it makes the old backend generate slower code. But with SSA it generates faster code. Turn on SSA by default, also adjust tests. Change-Id: I7794479954c83bb65008dcb457bc1e21d7496da6 Reviewed-on: https://go-review.googlesource.com/26950 Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: David Chase <[email protected]>
1 parent 94c8e59 commit d99cee7

File tree

17 files changed

+3804
-381
lines changed

17 files changed

+3804
-381
lines changed

src/cmd/asm/internal/asm/operand_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717

1818
func setArch(goarch string) (*arch.Arch, *obj.Link) {
1919
os.Setenv("GOOS", "linux") // obj can handle this OS for all architectures.
20+
os.Setenv("GOARCH", goarch)
2021
architecture := arch.Set(goarch)
2122
if architecture == nil {
2223
panic("asm: unrecognized architecture " + goarch)

src/cmd/compile/internal/arm64/prog.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ var progtable = [arm64.ALAST & obj.AMask]obj.ProgInfo{
7171
arm64.ACMPW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead},
7272
arm64.AADC & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.UseCarry},
7373
arm64.AROR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
74+
arm64.ARORW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
7475
arm64.AADDS & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.SetCarry},
7576
arm64.ACSET & obj.AMask: {Flags: gc.SizeQ | gc.RightWrite},
7677
arm64.ACSEL & obj.AMask: {Flags: gc.SizeQ | gc.RegRead | gc.RightWrite},

src/cmd/compile/internal/arm64/ssa.go

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,24 @@ func storeByType(t ssa.Type) obj.As {
148148
panic("bad store type")
149149
}
150150

151+
// makeshift encodes a register shifted by a constant, used as an Offset in Prog
152+
func makeshift(reg int16, typ int64, s int64) int64 {
153+
return int64(reg&31)<<16 | typ | (s&63)<<10
154+
}
155+
156+
// genshift generates a Prog for r = r0 op (r1 shifted by s)
157+
func genshift(as obj.As, r0, r1, r int16, typ int64, s int64) *obj.Prog {
158+
p := gc.Prog(as)
159+
p.From.Type = obj.TYPE_SHIFT
160+
p.From.Offset = makeshift(r1, typ, s)
161+
p.Reg = r0
162+
if r != 0 {
163+
p.To.Type = obj.TYPE_REG
164+
p.To.Reg = r
165+
}
166+
return p
167+
}
168+
151169
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
152170
s.SetLineno(v.Line)
153171
switch v.Op {
@@ -284,6 +302,27 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
284302
p.Reg = gc.SSARegNum(v.Args[0])
285303
p.To.Type = obj.TYPE_REG
286304
p.To.Reg = gc.SSARegNum(v)
305+
case ssa.OpARM64ADDshiftLL,
306+
ssa.OpARM64SUBshiftLL,
307+
ssa.OpARM64ANDshiftLL,
308+
ssa.OpARM64ORshiftLL,
309+
ssa.OpARM64XORshiftLL,
310+
ssa.OpARM64BICshiftLL:
311+
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LL, v.AuxInt)
312+
case ssa.OpARM64ADDshiftRL,
313+
ssa.OpARM64SUBshiftRL,
314+
ssa.OpARM64ANDshiftRL,
315+
ssa.OpARM64ORshiftRL,
316+
ssa.OpARM64XORshiftRL,
317+
ssa.OpARM64BICshiftRL:
318+
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LR, v.AuxInt)
319+
case ssa.OpARM64ADDshiftRA,
320+
ssa.OpARM64SUBshiftRA,
321+
ssa.OpARM64ANDshiftRA,
322+
ssa.OpARM64ORshiftRA,
323+
ssa.OpARM64XORshiftRA,
324+
ssa.OpARM64BICshiftRA:
325+
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_AR, v.AuxInt)
287326
case ssa.OpARM64MOVDconst:
288327
p := gc.Prog(v.Op.Asm())
289328
p.From.Type = obj.TYPE_CONST
@@ -315,6 +354,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
315354
p.From.Type = obj.TYPE_CONST
316355
p.From.Offset = v.AuxInt
317356
p.Reg = gc.SSARegNum(v.Args[0])
357+
case ssa.OpARM64CMPshiftLL:
358+
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LL, v.AuxInt)
359+
case ssa.OpARM64CMPshiftRL:
360+
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LR, v.AuxInt)
361+
case ssa.OpARM64CMPshiftRA:
362+
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_AR, v.AuxInt)
318363
case ssa.OpARM64MOVDaddr:
319364
p := gc.Prog(arm64.AMOVD)
320365
p.From.Type = obj.TYPE_ADDR
@@ -372,6 +417,16 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
372417
p.To.Type = obj.TYPE_MEM
373418
p.To.Reg = gc.SSARegNum(v.Args[0])
374419
gc.AddAux(&p.To, v)
420+
case ssa.OpARM64MOVBstorezero,
421+
ssa.OpARM64MOVHstorezero,
422+
ssa.OpARM64MOVWstorezero,
423+
ssa.OpARM64MOVDstorezero:
424+
p := gc.Prog(v.Op.Asm())
425+
p.From.Type = obj.TYPE_REG
426+
p.From.Reg = arm64.REGZERO
427+
p.To.Type = obj.TYPE_MEM
428+
p.To.Reg = gc.SSARegNum(v.Args[0])
429+
gc.AddAux(&p.To, v)
375430
case ssa.OpARM64MOVBreg,
376431
ssa.OpARM64MOVBUreg,
377432
ssa.OpARM64MOVHreg,
@@ -433,12 +488,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
433488
p.From.Reg = gc.SSARegNum(v.Args[0])
434489
p.To.Type = obj.TYPE_REG
435490
p.To.Reg = gc.SSARegNum(v)
436-
case ssa.OpARM64CSELULT:
491+
case ssa.OpARM64CSELULT,
492+
ssa.OpARM64CSELULT0:
493+
r1 := int16(arm64.REGZERO)
494+
if v.Op == ssa.OpARM64CSELULT {
495+
r1 = gc.SSARegNum(v.Args[1])
496+
}
437497
p := gc.Prog(v.Op.Asm())
438498
p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
439499
p.From.Reg = arm64.COND_LO
440500
p.Reg = gc.SSARegNum(v.Args[0])
441-
p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: gc.SSARegNum(v.Args[1])}
501+
p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r1}
442502
p.To.Type = obj.TYPE_REG
443503
p.To.Reg = gc.SSARegNum(v)
444504
case ssa.OpARM64DUFFZERO:

src/cmd/compile/internal/gc/ssa.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func shouldssa(fn *Node) bool {
4040
if os.Getenv("SSATEST") == "" {
4141
return false
4242
}
43-
case "amd64", "amd64p32", "arm", "386":
43+
case "amd64", "amd64p32", "arm", "386", "arm64":
4444
// Generally available.
4545
}
4646
if !ssaEnabled {

src/cmd/compile/internal/gc/walk.go

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3336,6 +3336,7 @@ func samecheap(a *Node, b *Node) bool {
33363336
// The result of walkrotate MUST be assigned back to n, e.g.
33373337
// n.Left = walkrotate(n.Left)
33383338
func walkrotate(n *Node) *Node {
3339+
//TODO: enable LROT on ARM64 once the old backend is gone
33393340
if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM64, sys.PPC64) {
33403341
return n
33413342
}
@@ -3529,16 +3530,6 @@ func walkdiv(n *Node, init *Nodes) *Node {
35293530
goto ret
35303531
}
35313532

3532-
// TODO(zhongwei) Test shows that TUINT8, TINT8, TUINT16 and TINT16's "quick division" method
3533-
// on current arm64 backend is slower than hardware div instruction on ARM64 due to unnecessary
3534-
// data movement between registers. It could be enabled when generated code is good enough.
3535-
if Thearch.LinkArch.Family == sys.ARM64 {
3536-
switch Simtype[nl.Type.Etype] {
3537-
case TUINT8, TINT8, TUINT16, TINT16:
3538-
return n
3539-
}
3540-
}
3541-
35423533
switch Simtype[nl.Type.Etype] {
35433534
default:
35443535
return n

src/cmd/compile/internal/ssa/gen/ARM64.rules

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,10 @@
523523
(MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVDstore [off1+off2] {sym} ptr val mem)
524524
(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
525525
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
526+
(MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBstorezero [off1+off2] {sym} ptr mem)
527+
(MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVHstorezero [off1+off2] {sym} ptr mem)
528+
(MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVWstorezero [off1+off2] {sym} ptr mem)
529+
(MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVDstorezero [off1+off2] {sym} ptr mem)
526530

527531
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
528532
(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@@ -555,6 +559,20 @@
555559
(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
556560
(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
557561
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
562+
(MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
563+
(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
564+
(MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
565+
(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
566+
(MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
567+
(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
568+
(MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
569+
(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
570+
571+
// store zero
572+
(MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem)
573+
(MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVHstorezero [off] {sym} ptr mem)
574+
(MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVWstorezero [off] {sym} ptr mem)
575+
(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
558576

559577
// replace load from same location as preceding store with copy
560578
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
@@ -567,6 +585,14 @@
567585
(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
568586
(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
569587

588+
(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
589+
(MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
590+
(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
591+
(MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
592+
(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
593+
(MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
594+
(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
595+
570596
// don't extend after proper load
571597
(MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
572598
(MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
@@ -645,21 +671,46 @@
645671
(MUL _ (MOVDconst [0])) -> (MOVDconst [0])
646672
(MUL x (MOVDconst [1])) -> x
647673
(MUL x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
674+
(MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
675+
(MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
676+
(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
677+
(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
678+
(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
679+
(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
648680

649681
(MUL (MOVDconst [-1]) x) -> (NEG x)
650682
(MUL (MOVDconst [0]) _) -> (MOVDconst [0])
651683
(MUL (MOVDconst [1]) x) -> x
652684
(MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
685+
(MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
686+
(MUL (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
687+
(MUL (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
688+
(MUL (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
689+
(MUL (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
690+
(MUL (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
691+
(MUL (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
653692

654693
(MULW x (MOVDconst [c])) && int32(c)==-1 -> (NEG x)
655694
(MULW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
656695
(MULW x (MOVDconst [c])) && int32(c)==1 -> x
657696
(MULW x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
697+
(MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
698+
(MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
699+
(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
700+
(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
701+
(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
702+
(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
658703

659704
(MULW (MOVDconst [c]) x) && int32(c)==-1 -> (NEG x)
660705
(MULW (MOVDconst [c]) _) && int32(c)==0 -> (MOVDconst [0])
661706
(MULW (MOVDconst [c]) x) && int32(c)==1 -> x
662707
(MULW (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
708+
(MULW (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
709+
(MULW (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
710+
(MULW (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
711+
(MULW (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
712+
(MULW (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
713+
(MULW (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
663714

664715
// div by constant
665716
(UDIV x (MOVDconst [1])) -> x
@@ -680,6 +731,7 @@
680731
(XOR x x) -> (MOVDconst [0])
681732
(BIC x x) -> (MOVDconst [0])
682733
(AND x (MVN y)) -> (BIC x y)
734+
(CSELULT x (MOVDconst [0]) flag) -> (CSELULT0 x flag)
683735

684736
// remove redundant *const ops
685737
(ADDconst [0] x) -> x
@@ -903,3 +955,103 @@
903955
(CSELULT _ y (FlagLT_UGT)) -> y
904956
(CSELULT x _ (FlagGT_ULT)) -> x
905957
(CSELULT _ y (FlagGT_UGT)) -> y
958+
(CSELULT0 _ (FlagEQ)) -> (MOVDconst [0])
959+
(CSELULT0 x (FlagLT_ULT)) -> x
960+
(CSELULT0 _ (FlagLT_UGT)) -> (MOVDconst [0])
961+
(CSELULT0 x (FlagGT_ULT)) -> x
962+
(CSELULT0 _ (FlagGT_UGT)) -> (MOVDconst [0])
963+
964+
// absorb shifts into ops
965+
(ADD x (SLLconst [c] y)) -> (ADDshiftLL x y [c])
966+
(ADD (SLLconst [c] y) x) -> (ADDshiftLL x y [c])
967+
(ADD x (SRLconst [c] y)) -> (ADDshiftRL x y [c])
968+
(ADD (SRLconst [c] y) x) -> (ADDshiftRL x y [c])
969+
(ADD x (SRAconst [c] y)) -> (ADDshiftRA x y [c])
970+
(ADD (SRAconst [c] y) x) -> (ADDshiftRA x y [c])
971+
(SUB x (SLLconst [c] y)) -> (SUBshiftLL x y [c])
972+
(SUB x (SRLconst [c] y)) -> (SUBshiftRL x y [c])
973+
(SUB x (SRAconst [c] y)) -> (SUBshiftRA x y [c])
974+
(AND x (SLLconst [c] y)) -> (ANDshiftLL x y [c])
975+
(AND (SLLconst [c] y) x) -> (ANDshiftLL x y [c])
976+
(AND x (SRLconst [c] y)) -> (ANDshiftRL x y [c])
977+
(AND (SRLconst [c] y) x) -> (ANDshiftRL x y [c])
978+
(AND x (SRAconst [c] y)) -> (ANDshiftRA x y [c])
979+
(AND (SRAconst [c] y) x) -> (ANDshiftRA x y [c])
980+
(OR x (SLLconst [c] y)) -> (ORshiftLL x y [c])
981+
(OR (SLLconst [c] y) x) -> (ORshiftLL x y [c])
982+
(OR x (SRLconst [c] y)) -> (ORshiftRL x y [c])
983+
(OR (SRLconst [c] y) x) -> (ORshiftRL x y [c])
984+
(OR x (SRAconst [c] y)) -> (ORshiftRA x y [c])
985+
(OR (SRAconst [c] y) x) -> (ORshiftRA x y [c])
986+
(XOR x (SLLconst [c] y)) -> (XORshiftLL x y [c])
987+
(XOR (SLLconst [c] y) x) -> (XORshiftLL x y [c])
988+
(XOR x (SRLconst [c] y)) -> (XORshiftRL x y [c])
989+
(XOR (SRLconst [c] y) x) -> (XORshiftRL x y [c])
990+
(XOR x (SRAconst [c] y)) -> (XORshiftRA x y [c])
991+
(XOR (SRAconst [c] y) x) -> (XORshiftRA x y [c])
992+
(BIC x (SLLconst [c] y)) -> (BICshiftLL x y [c])
993+
(BIC x (SRLconst [c] y)) -> (BICshiftRL x y [c])
994+
(BIC x (SRAconst [c] y)) -> (BICshiftRA x y [c])
995+
(CMP x (SLLconst [c] y)) -> (CMPshiftLL x y [c])
996+
(CMP (SLLconst [c] y) x) -> (InvertFlags (CMPshiftLL x y [c]))
997+
(CMP x (SRLconst [c] y)) -> (CMPshiftRL x y [c])
998+
(CMP (SRLconst [c] y) x) -> (InvertFlags (CMPshiftRL x y [c]))
999+
(CMP x (SRAconst [c] y)) -> (CMPshiftRA x y [c])
1000+
(CMP (SRAconst [c] y) x) -> (InvertFlags (CMPshiftRA x y [c]))
1001+
1002+
// prefer *const ops to *shift ops
1003+
(ADDshiftLL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SLLconst <x.Type> x [d]))
1004+
(ADDshiftRL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRLconst <x.Type> x [d]))
1005+
(ADDshiftRA (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRAconst <x.Type> x [d]))
1006+
(ANDshiftLL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SLLconst <x.Type> x [d]))
1007+
(ANDshiftRL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRLconst <x.Type> x [d]))
1008+
(ANDshiftRA (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRAconst <x.Type> x [d]))
1009+
(ORshiftLL (MOVDconst [c]) x [d]) -> (ORconst [c] (SLLconst <x.Type> x [d]))
1010+
(ORshiftRL (MOVDconst [c]) x [d]) -> (ORconst [c] (SRLconst <x.Type> x [d]))
1011+
(ORshiftRA (MOVDconst [c]) x [d]) -> (ORconst [c] (SRAconst <x.Type> x [d]))
1012+
(XORshiftLL (MOVDconst [c]) x [d]) -> (XORconst [c] (SLLconst <x.Type> x [d]))
1013+
(XORshiftRL (MOVDconst [c]) x [d]) -> (XORconst [c] (SRLconst <x.Type> x [d]))
1014+
(XORshiftRA (MOVDconst [c]) x [d]) -> (XORconst [c] (SRAconst <x.Type> x [d]))
1015+
(CMPshiftLL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
1016+
(CMPshiftRL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
1017+
(CMPshiftRA (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
1018+
1019+
// constant folding in *shift ops
1020+
(ADDshiftLL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)<<uint64(d))])
1021+
(ADDshiftRL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)>>uint64(d))])
1022+
(ADDshiftRA x (MOVDconst [c]) [d]) -> (ADDconst x [int64(int64(c)>>uint64(d))])
1023+
(SUBshiftLL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)<<uint64(d))])
1024+
(SUBshiftRL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)>>uint64(d))])
1025+
(SUBshiftRA x (MOVDconst [c]) [d]) -> (SUBconst x [int64(int64(c)>>uint64(d))])
1026+
(ANDshiftLL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)<<uint64(d))])
1027+
(ANDshiftRL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)>>uint64(d))])
1028+
(ANDshiftRA x (MOVDconst [c]) [d]) -> (ANDconst x [int64(int64(c)>>uint64(d))])
1029+
(ORshiftLL x (MOVDconst [c]) [d]) -> (ORconst x [int64(uint64(c)<<uint64(d))])
1030+
(ORshiftRL x (MOVDconst [c]) [d]) -> (ORconst x [int64(uint64(c)>>uint64(d))])
1031+
(ORshiftRA x (MOVDconst [c]) [d]) -> (ORconst x [int64(int64(c)>>uint64(d))])
1032+
(XORshiftLL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)<<uint64(d))])
1033+
(XORshiftRL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)>>uint64(d))])
1034+
(XORshiftRA x (MOVDconst [c]) [d]) -> (XORconst x [int64(int64(c)>>uint64(d))])
1035+
(BICshiftLL x (MOVDconst [c]) [d]) -> (BICconst x [int64(uint64(c)<<uint64(d))])
1036+
(BICshiftRL x (MOVDconst [c]) [d]) -> (BICconst x [int64(uint64(c)>>uint64(d))])
1037+
(BICshiftRA x (MOVDconst [c]) [d]) -> (BICconst x [int64(int64(c)>>uint64(d))])
1038+
(CMPshiftLL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)<<uint64(d))])
1039+
(CMPshiftRL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)>>uint64(d))])
1040+
(CMPshiftRA x (MOVDconst [c]) [d]) -> (CMPconst x [int64(int64(c)>>uint64(d))])
1041+
1042+
// simplification with *shift ops
1043+
(SUBshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
1044+
(SUBshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
1045+
(SUBshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
1046+
(ANDshiftLL x y:(SLLconst x [c]) [d]) && c==d -> y
1047+
(ANDshiftRL x y:(SRLconst x [c]) [d]) && c==d -> y
1048+
(ANDshiftRA x y:(SRAconst x [c]) [d]) && c==d -> y
1049+
(ORshiftLL x y:(SLLconst x [c]) [d]) && c==d -> y
1050+
(ORshiftRL x y:(SRLconst x [c]) [d]) && c==d -> y
1051+
(ORshiftRA x y:(SRAconst x [c]) [d]) && c==d -> y
1052+
(XORshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
1053+
(XORshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
1054+
(XORshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
1055+
(BICshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
1056+
(BICshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
1057+
(BICshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])

0 commit comments

Comments
 (0)