|
852 | 852 | (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
|
853 | 853 |
|
854 | 854 | // Using MOVZX instead of AND is cheaper.
|
855 |
| -(ANDLconst [0xFF] x) -> (MOVBQZX x) |
856 |
| -(ANDLconst [0xFFFF] x) -> (MOVWQZX x) |
857 |
| -(ANDQconst [0xFF] x) -> (MOVBQZX x) |
858 |
| -(ANDQconst [0xFFFF] x) -> (MOVWQZX x) |
| 855 | +(AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x) |
| 856 | +(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x) |
859 | 857 | (ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
|
860 | 858 |
|
861 | 859 | // strength reduction
|
|
867 | 865 | // which can require a register-register move
|
868 | 866 | // to preserve the original value,
|
869 | 867 | // so it must be used with care.
|
870 |
| -(MULQconst [-9] x) -> (NEGQ (LEAQ8 <v.Type> x x)) |
871 |
| -(MULQconst [-5] x) -> (NEGQ (LEAQ4 <v.Type> x x)) |
872 |
| -(MULQconst [-3] x) -> (NEGQ (LEAQ2 <v.Type> x x)) |
873 |
| -(MULQconst [-1] x) -> (NEGQ x) |
874 |
| -(MULQconst [0] _) -> (MOVQconst [0]) |
875 |
| -(MULQconst [1] x) -> x |
876 |
| -(MULQconst [3] x) -> (LEAQ2 x x) |
877 |
| -(MULQconst [5] x) -> (LEAQ4 x x) |
878 |
| -(MULQconst [7] x) -> (LEAQ2 x (LEAQ2 <v.Type> x x)) |
879 |
| -(MULQconst [9] x) -> (LEAQ8 x x) |
880 |
| -(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x)) |
881 |
| -(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x)) |
882 |
| -(MULQconst [19] x) -> (LEAQ2 x (LEAQ8 <v.Type> x x)) |
883 |
| -(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x)) |
884 |
| -(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x)) |
885 |
| -(MULQconst [27] x) -> (LEAQ8 (LEAQ2 <v.Type> x x) (LEAQ2 <v.Type> x x)) |
886 |
| -(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x)) |
887 |
| -(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x)) |
888 |
| -(MULQconst [45] x) -> (LEAQ8 (LEAQ4 <v.Type> x x) (LEAQ4 <v.Type> x x)) |
889 |
| -(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x)) |
890 |
| -(MULQconst [81] x) -> (LEAQ8 (LEAQ8 <v.Type> x x) (LEAQ8 <v.Type> x x)) |
891 |
| - |
892 |
| -(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x) |
893 |
| -(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x) |
894 |
| -(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x) |
895 |
| -(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x) |
896 |
| -(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x) |
897 |
| -(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x)) |
898 |
| -(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x)) |
899 |
| -(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x)) |
900 |
| - |
901 |
| -// combine add/shift into LEAQ |
902 |
| -(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y) |
903 |
| -(ADDQ x (SHLQconst [2] y)) -> (LEAQ4 x y) |
904 |
| -(ADDQ x (SHLQconst [1] y)) -> (LEAQ2 x y) |
905 |
| -(ADDQ x (ADDQ y y)) -> (LEAQ2 x y) |
906 |
| -(ADDQ x (ADDQ x y)) -> (LEAQ2 y x) |
907 |
| - |
908 |
| -// combine ADDQ/ADDQconst into LEAQ1 |
909 |
| -(ADDQconst [c] (ADDQ x y)) -> (LEAQ1 [c] x y) |
910 |
| -(ADDQ (ADDQconst [c] x) y) -> (LEAQ1 [c] x y) |
| 868 | +(MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x)) |
| 869 | +(MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x)) |
| 870 | +(MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x)) |
| 871 | +(MUL(Q|L)const [-1] x) -> (NEG(Q|L) x) |
| 872 | +(MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0]) |
| 873 | +(MUL(Q|L)const [ 1] x) -> x |
| 874 | +(MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x) |
| 875 | +(MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x) |
| 876 | +(MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x)) |
| 877 | +(MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x) |
| 878 | +(MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x)) |
| 879 | +(MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x)) |
| 880 | +(MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x)) |
| 881 | +(MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x)) |
| 882 | +(MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x)) |
| 883 | +(MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x)) |
| 884 | +(MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x)) |
| 885 | +(MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x)) |
| 886 | +(MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x)) |
| 887 | +(MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x)) |
| 888 | +(MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x)) |
| 889 | + |
| 890 | +(MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB(Q|L) (SHL(Q|L)const <v.Type> [log2(c+1)] x) x) |
| 891 | +(MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x) |
| 892 | +(MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x) |
| 893 | +(MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x) |
| 894 | +(MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x) |
| 895 | +(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x)) |
| 896 | +(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x)) |
| 897 | +(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x)) |
| 898 | + |
| 899 | +// combine add/shift into LEAQ/LEAL |
| 900 | +(ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y) |
| 901 | +(ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y) |
| 902 | +(ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y) |
| 903 | +(ADD(L|Q) x (ADD(L|Q) y y)) -> (LEA(L|Q)2 x y) |
| 904 | +(ADD(L|Q) x (ADD(L|Q) x y)) -> (LEA(L|Q)2 y x) |
| 905 | + |
| 906 | +// combine ADDQ/ADDQconst into LEAQ1/LEAL1 |
| 907 | +(ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y) |
| 908 | +(ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y) |
911 | 909 | (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x)
|
912 | 910 |
|
913 |
| -// fold ADDQ into LEAQ |
914 |
| -(ADDQconst [c] (LEAQ [d] {s} x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x) |
915 |
| -(LEAQ [c] {s} (ADDQconst [d] x)) && is32Bit(c+d) -> (LEAQ [c+d] {s} x) |
916 |
| -(LEAQ [c] {s} (ADDQ x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y) |
917 |
| -(ADDQ x (LEAQ [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y) |
918 |
| - |
919 |
| -// fold ADDQconst into LEAQx |
920 |
| -(ADDQconst [c] (LEAQ1 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ1 [c+d] {s} x y) |
921 |
| -(ADDQconst [c] (LEAQ2 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ2 [c+d] {s} x y) |
922 |
| -(ADDQconst [c] (LEAQ4 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ4 [c+d] {s} x y) |
923 |
| -(ADDQconst [c] (LEAQ8 [d] {s} x y)) && is32Bit(c+d) -> (LEAQ8 [c+d] {s} x y) |
924 |
| -(LEAQ1 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ1 [c+d] {s} x y) |
925 |
| -(LEAQ2 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ2 [c+d] {s} x y) |
926 |
| -(LEAQ2 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAQ2 [c+2*d] {s} x y) |
927 |
| -(LEAQ4 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ4 [c+d] {s} x y) |
928 |
| -(LEAQ4 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEAQ4 [c+4*d] {s} x y) |
929 |
| -(LEAQ8 [c] {s} (ADDQconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAQ8 [c+d] {s} x y) |
930 |
| -(LEAQ8 [c] {s} x (ADDQconst [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEAQ8 [c+8*d] {s} x y) |
931 |
| - |
932 |
| -// fold shifts into LEAQx |
933 |
| -(LEAQ1 [c] {s} x (SHLQconst [1] y)) -> (LEAQ2 [c] {s} x y) |
934 |
| -(LEAQ1 [c] {s} x (SHLQconst [2] y)) -> (LEAQ4 [c] {s} x y) |
935 |
| -(LEAQ1 [c] {s} x (SHLQconst [3] y)) -> (LEAQ8 [c] {s} x y) |
936 |
| -(LEAQ2 [c] {s} x (SHLQconst [1] y)) -> (LEAQ4 [c] {s} x y) |
937 |
| -(LEAQ2 [c] {s} x (SHLQconst [2] y)) -> (LEAQ8 [c] {s} x y) |
938 |
| -(LEAQ4 [c] {s} x (SHLQconst [1] y)) -> (LEAQ8 [c] {s} x y) |
| 911 | +// fold ADDQ/ADDL into LEAQ/LEAL |
| 912 | +(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) |
| 913 | +(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) |
| 914 | +(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) |
| 915 | +(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) |
| 916 | + |
| 917 | +// fold ADDQconst/ADDLconst into LEAQx/LEALx |
| 918 | +(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y) |
| 919 | +(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y) |
| 920 | +(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y) |
| 921 | +(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y) |
| 922 | +(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y) |
| 923 | +(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y) |
| 924 | +(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y) |
| 925 | +(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y) |
| 926 | +(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y) |
| 927 | +(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y) |
| 928 | +(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y) |
| 929 | + |
| 930 | +// fold shifts into LEAQx/LEALx |
| 931 | +(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y) |
| 932 | +(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y) |
| 933 | +(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y) |
| 934 | +(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y) |
| 935 | +(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y) |
| 936 | +(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y) |
939 | 937 |
|
940 | 938 | // reverse ordering of compare instruction
|
941 | 939 | (SETL (InvertFlags x)) -> (SETG x)
|
|
2219 | 2217 | && clobber(mem2)
|
2220 | 2218 | -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
|
2221 | 2219 |
|
2222 |
| -// amd64p32 rules |
2223 |
| -// same as the rules above, but with 32 instead of 64 bit pointer arithmetic. |
2224 |
| -// LEAQ,ADDQ -> LEAL,ADDL |
2225 |
| -(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x) |
2226 |
| -(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x) |
2227 |
| - |
2228 | 2220 | (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
|
2229 | 2221 | (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
2230 | 2222 | (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) ->
|
|
2410 | 2402 | (MOVLi2f <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
|
2411 | 2403 |
|
2412 | 2404 | // LEAQ is rematerializeable, so this helps to avoid register spill.
|
2413 |
| -// See isuue 22947 for details |
2414 |
| -(ADDQconst [off] x:(SP)) -> (LEAQ [off] x) |
| 2405 | +// See issue 22947 for details |
| 2406 | +(ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x) |
2415 | 2407 |
|
2416 | 2408 | // Fold loads into compares
|
2417 | 2409 | // Note: these may be undone by the flagalloc pass.
|
|
0 commit comments