Skip to content

Commit b1df8d6

Browse files
committed
cmd/compile: add some LEAL{1,2,4,8} rewrite rules for AMD64
This should improve some 32 bit arithmetic operations. During make.bash, this increases the number of rules firing by 15518: $ wc -l rulelog-* 13490514 rulelog-head 13474996 rulelog-master compress/flate benchmarks: name old time/op new time/op delta Decode/Digits/Huffman/1e4-8 103µs ± 4% 102µs ± 0% -0.95% (p=0.000 n=30+27) Decode/Digits/Huffman/1e5-8 962µs ± 2% 954µs ± 1% -0.80% (p=0.000 n=25+25) Decode/Digits/Huffman/1e6-8 9.55ms ± 1% 9.50ms ± 1% -0.57% (p=0.000 n=29+29) Decode/Digits/Speed/1e4-8 110µs ± 2% 110µs ± 2% -0.41% (p=0.003 n=28+30) Decode/Digits/Speed/1e5-8 1.15ms ± 1% 1.14ms ± 1% -0.85% (p=0.000 n=29+28) Decode/Digits/Speed/1e6-8 11.5ms ± 2% 11.4ms ± 1% -1.26% (p=0.000 n=28+27) Decode/Digits/Default/1e4-8 113µs ± 1% 112µs ± 1% -0.49% (p=0.001 n=27+30) Decode/Digits/Default/1e5-8 1.13ms ± 0% 1.12ms ± 1% -0.75% (p=0.000 n=26+24) Decode/Digits/Default/1e6-8 11.1ms ± 1% 11.1ms ± 1% -0.47% (p=0.000 n=28+27) Decode/Digits/Compression/1e4-8 113µs ± 1% 112µs ± 1% -0.70% (p=0.000 n=28+29) Decode/Digits/Compression/1e5-8 1.13ms ± 2% 1.12ms ± 1% -1.41% (p=0.000 n=28+26) Decode/Digits/Compression/1e6-8 11.1ms ± 1% 11.1ms ± 1% -0.33% (p=0.002 n=29+27) Decode/Twain/Huffman/1e4-8 115µs ± 1% 115µs ± 1% -0.40% (p=0.000 n=28+26) Decode/Twain/Huffman/1e5-8 1.05ms ± 1% 1.04ms ± 0% -0.41% (p=0.000 n=27+25) Decode/Twain/Huffman/1e6-8 10.4ms ± 1% 10.4ms ± 1% ~ (p=0.993 n=28+24) Decode/Twain/Speed/1e4-8 118µs ± 2% 116µs ± 1% -1.08% (p=0.000 n=27+29) Decode/Twain/Speed/1e5-8 1.07ms ± 1% 1.07ms ± 1% -0.23% (p=0.041 n=26+27) Decode/Twain/Speed/1e6-8 10.6ms ± 1% 10.5ms ± 0% -0.68% (p=0.000 n=29+27) Decode/Twain/Default/1e4-8 110µs ± 1% 109µs ± 0% -0.49% (p=0.000 n=29+26) Decode/Twain/Default/1e5-8 906µs ± 1% 902µs ± 1% -0.48% (p=0.000 n=27+28) Decode/Twain/Default/1e6-8 8.75ms ± 1% 8.68ms ± 2% -0.73% (p=0.000 n=28+28) Decode/Twain/Compression/1e4-8 110µs ± 1% 109µs ± 1% -0.80% (p=0.000 n=27+28) Decode/Twain/Compression/1e5-8 905µs ± 1% 906µs ± 5% ~ (p=0.065 n=27+29) Decode/Twain/Compression/1e6-8 8.75ms ± 2% 8.68ms ± 1% -0.76% (p=0.000 n=26+26) Encode/Digits/Huffman/1e4-8 31.8µs ± 1% 32.3µs ± 2% +1.43% (p=0.000 n=28+27) Encode/Digits/Huffman/1e5-8 299µs ± 2% 296µs ± 1% -1.05% (p=0.000 n=29+29) Encode/Digits/Huffman/1e6-8 2.99ms ± 3% 2.96ms ± 1% -1.00% (p=0.000 n=29+28) Encode/Digits/Speed/1e4-8 149µs ± 1% 152µs ± 4% +2.18% (p=0.000 n=30+30) Encode/Digits/Speed/1e5-8 1.39ms ± 1% 1.40ms ± 2% +1.02% (p=0.000 n=27+27) Encode/Digits/Speed/1e6-8 13.7ms ± 0% 13.8ms ± 1% +0.81% (p=0.000 n=27+27) Encode/Digits/Default/1e4-8 297µs ± 7% 297µs ± 7% ~ (p=1.000 n=30+30) Encode/Digits/Default/1e5-8 4.51ms ± 1% 4.42ms ± 1% -2.06% (p=0.000 n=29+29) Encode/Digits/Default/1e6-8 47.5ms ± 1% 46.6ms ± 1% -1.90% (p=0.000 n=27+25) Encode/Digits/Compression/1e4-8 302µs ± 7% 303µs ± 9% ~ (p=0.854 n=30+30) Encode/Digits/Compression/1e5-8 4.52ms ± 1% 4.43ms ± 2% -1.91% (p=0.000 n=26+25) Encode/Digits/Compression/1e6-8 47.5ms ± 1% 46.7ms ± 1% -1.70% (p=0.000 n=26+27) Encode/Twain/Huffman/1e4-8 46.6µs ± 2% 46.8µs ± 2% ~ (p=0.114 n=30+30) Encode/Twain/Huffman/1e5-8 357µs ± 3% 352µs ± 2% -1.13% (p=0.000 n=29+28) Encode/Twain/Huffman/1e6-8 3.58ms ± 4% 3.52ms ± 1% -1.43% (p=0.003 n=30+28) Encode/Twain/Speed/1e4-8 173µs ± 1% 174µs ± 1% +0.65% (p=0.000 n=27+28) Encode/Twain/Speed/1e5-8 1.39ms ± 1% 1.40ms ± 1% +0.92% (p=0.000 n=28+27) Encode/Twain/Speed/1e6-8 13.6ms ± 1% 13.7ms ± 1% +0.51% (p=0.000 n=25+26) Encode/Twain/Default/1e4-8 364µs ± 5% 361µs ± 5% ~ (p=0.219 n=30+30) Encode/Twain/Default/1e5-8 5.41ms ± 1% 5.43ms ± 5% ~ (p=0.655 n=27+27) Encode/Twain/Default/1e6-8 57.2ms ± 1% 58.4ms ± 4% +2.15% (p=0.000 n=22+28) Encode/Twain/Compression/1e4-8 371µs ± 9% 373µs ± 6% ~ (p=0.503 n=30+29) Encode/Twain/Compression/1e5-8 5.97ms ± 2% 5.92ms ± 1% -0.75% (p=0.000 n=28+26) Encode/Twain/Compression/1e6-8 64.0ms ± 1% 63.8ms ± 1% -0.36% (p=0.036 n=27+25) [Geo mean] 1.37ms 1.36ms -0.38% Change-Id: I3df4de63f06eaf121c38821bd889453a8de1b199 Reviewed-on: https://go-review.googlesource.com/101276 Reviewed-by: Keith Randall <[email protected]>
1 parent 44286b1 commit b1df8d6

File tree

2 files changed

+5554
-4198
lines changed

2 files changed

+5554
-4198
lines changed

src/cmd/compile/internal/ssa/gen/AMD64.rules

Lines changed: 71 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -852,10 +852,8 @@
852852
(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
853853

854854
// Using MOVZX instead of AND is cheaper.
855-
(ANDLconst [0xFF] x) -> (MOVBQZX x)
856-
(ANDLconst [0xFFFF] x) -> (MOVWQZX x)
857-
(ANDQconst [0xFF] x) -> (MOVBQZX x)
858-
(ANDQconst [0xFFFF] x) -> (MOVWQZX x)
855+
(AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x)
856+
(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x)
859857
(ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
860858

861859
// strength reduction
@@ -867,75 +865,75 @@
867865
// which can require a register-register move
868866
// to preserve the original value,
869867
// so it must be used with care.
870-
(MULQconst [-9] x) -> (NEGQ (LEAQ8 <v.Type> x x))
871-
(MULQconst [-5] x) -> (NEGQ (LEAQ4 <v.Type> x x))
872-
(MULQconst [-3] x) -> (NEGQ (LEAQ2 <v.Type> x x))
873-
(MULQconst [-1] x) -> (NEGQ x)
874-
(MULQconst [0] _) -> (MOVQconst [0])
875-
(MULQconst [1] x) -> x
876-
(MULQconst [3] x) -> (LEAQ2 x x)
877-
(MULQconst [5] x) -> (LEAQ4 x x)
878-
(MULQconst [7] x) -> (LEAQ2 x (LEAQ2 <v.Type> x x))
879-
(MULQconst [9] x) -> (LEAQ8 x x)
880-
(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x))
881-
(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x))
882-
(MULQconst [19] x) -> (LEAQ2 x (LEAQ8 <v.Type> x x))
883-
(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x))
884-
(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x))
885-
(MULQconst [27] x) -> (LEAQ8 (LEAQ2 <v.Type> x x) (LEAQ2 <v.Type> x x))
886-
(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x))
887-
(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x))
888-
(MULQconst [45] x) -> (LEAQ8 (LEAQ4 <v.Type> x x) (LEAQ4 <v.Type> x x))
889-
(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x))
890-
(MULQconst [81] x) -> (LEAQ8 (LEAQ8 <v.Type> x x) (LEAQ8 <v.Type> x x))
891-
892-
(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
893-
(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
894-
(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
895-
(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
896-
(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
897-
(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
898-
(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
899-
(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
900-
901-
// combine add/shift into LEAQ
902-
(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
903-
(ADDQ x (SHLQconst [2] y)) -> (LEAQ4 x y)
904-
(ADDQ x (SHLQconst [1] y)) -> (LEAQ2 x y)
905-
(ADDQ x (ADDQ y y)) -> (LEAQ2 x y)
906-
(ADDQ x (ADDQ x y)) -> (LEAQ2 y x)
907-
908-
// combine ADDQ/ADDQconst into LEAQ1
909-
(ADDQconst [c] (ADDQ x y)) -> (LEAQ1 [c] x y)
910-
(ADDQ (ADDQconst [c] x) y) -> (LEAQ1 [c] x y)
868+
(MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x))
869+
(MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x))
870+
(MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x))
871+
(MUL(Q|L)const [-1] x) -> (NEG(Q|L) x)
872+
(MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0])
873+
(MUL(Q|L)const [ 1] x) -> x
874+
(MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x)
875+
(MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x)
876+
(MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x))
877+
(MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x)
878+
(MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x))
879+
(MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x))
880+
(MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x))
881+
(MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x))
882+
(MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x))
883+
(MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x))
884+
(MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x))
885+
(MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x))
886+
(MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x))
887+
(MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
888+
(MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
889+
890+
(MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB(Q|L) (SHL(Q|L)const <v.Type> [log2(c+1)] x) x)
891+
(MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x)
892+
(MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x)
893+
(MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x)
894+
(MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x)
895+
(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x))
896+
(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x))
897+
(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x))
898+
899+
// combine add/shift into LEAQ/LEAL
900+
(ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y)
901+
(ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y)
902+
(ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y)
903+
(ADD(L|Q) x (ADD(L|Q) y y)) -> (LEA(L|Q)2 x y)
904+
(ADD(L|Q) x (ADD(L|Q) x y)) -> (LEA(L|Q)2 y x)
905+
906+
// combine ADDQ/ADDQconst into LEAQ1/LEAL1
907+
(ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y)
908+
(ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y)
911909
(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x)
912910

913-
// fold ADDQ into LEAQ
914-
(ADDQconst [c] (LEAQ [d] {s} x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
915-
(LEAQ [c] {s} (ADDQconst [d] x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x)
916-
(LEAQ [c] {s} (ADDQ x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
917-
(ADDQ x (LEAQ [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y)
918-
919-
// fold ADDQconst into LEAQx
920-
(ADDQconst [c] (LEAQ1 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ1 [c+d] {s} x y)
921-
(ADDQconst [c] (LEAQ2 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ2 [c+d] {s} x y)
922-
(ADDQconst [c] (LEAQ4 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ4 [c+d] {s} x y)
923-
(ADDQconst [c] (LEAQ8 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ8 [c+d] {s} x y)
924-
(LEAQ1 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ1 [c+d] {s} x y)
925-
(LEAQ2 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ2 [c+d] {s} x y)
926-
(LEAQ2 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAQ2 [c+2*d] {s} x y)
927-
(LEAQ4 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ4 [c+d] {s} x y)
928-
(LEAQ4 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEAQ4 [c+4*d] {s} x y)
929-
(LEAQ8 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ8 [c+d] {s} x y)
930-
(LEAQ8 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEAQ8 [c+8*d] {s} x y)
931-
932-
// fold shifts into LEAQx
933-
(LEAQ1 [c] {s} x (SHLQconst [1] y)) -> (LEAQ2 [c] {s} x y)
934-
(LEAQ1 [c] {s} x (SHLQconst [2] y)) -> (LEAQ4 [c] {s} x y)
935-
(LEAQ1 [c] {s} x (SHLQconst [3] y)) -> (LEAQ8 [c] {s} x y)
936-
(LEAQ2 [c] {s} x (SHLQconst [1] y)) -> (LEAQ4 [c] {s} x y)
937-
(LEAQ2 [c] {s} x (SHLQconst [2] y)) -> (LEAQ8 [c] {s} x y)
938-
(LEAQ4 [c] {s} x (SHLQconst [1] y)) -> (LEAQ8 [c] {s} x y)
911+
// fold ADDQ/ADDL into LEAQ/LEAL
912+
(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
913+
(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
914+
(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
915+
(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
916+
917+
// fold ADDQconst/ADDLconst into LEAQx/LEALx
918+
(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y)
919+
(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y)
920+
(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y)
921+
(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y)
922+
(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y)
923+
(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y)
924+
(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y)
925+
(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y)
926+
(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y)
927+
(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y)
928+
(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y)
929+
930+
// fold shifts into LEAQx/LEALx
931+
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y)
932+
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y)
933+
(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y)
934+
(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y)
935+
(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y)
936+
(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y)
939937

940938
// reverse ordering of compare instruction
941939
(SETL (InvertFlags x)) -> (SETG x)
@@ -2219,12 +2217,6 @@
22192217
&& clobber(mem2)
22202218
-> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
22212219

2222-
// amd64p32 rules
2223-
// same as the rules above, but with 32 instead of 64 bit pointer arithmetic.
2224-
// LEAQ,ADDQ -> LEAL,ADDL
2225-
(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
2226-
(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
2227-
22282220
(MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
22292221
(MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
22302222
(MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
@@ -2410,8 +2402,8 @@
24102402
(MOVLi2f <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
24112403

24122404
// LEAQ is rematerializeable, so this helps to avoid register spill.
2413-
// See isuue 22947 for details
2414-
(ADDQconst [off] x:(SP)) -> (LEAQ [off] x)
2405+
// See issue 22947 for details
2406+
(ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x)
24152407

24162408
// Fold loads into compares
24172409
// Note: these may be undone by the flagalloc pass.

0 commit comments

Comments
 (0)