Skip to content

Commit cedf500

Browse files
committed
cmd/compile: introduce separate memory op combining pass
Memory op combining is currently done using arch-specific rewrite rules. Instead, do them as a arch-independent rewrite pass. This ensures that all architectures (with unaligned loads & stores) get equal treatment. This removes a lot of rewrite rules. The new pass is a bit more comprehensive. It handles things like out-of-order writes and is careful not to apply partial optimizations that then block further optimizations. Change-Id: I780ff3bb052475cd725a923309616882d25b8d9e Reviewed-on: https://go-review.googlesource.com/c/go/+/478475 Reviewed-by: Keith Randall <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Run-TryBot: Keith Randall <[email protected]> Reviewed-by: David Chase <[email protected]>
1 parent e9c2607 commit cedf500

17 files changed

+8907
-21955
lines changed

src/cmd/compile/internal/ssa/_gen/386.rules

Lines changed: 1 addition & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
(OffPtr [off] ptr) => (ADDLconst [int32(off)] ptr)
5353

5454
(Bswap32 ...) => (BSWAPL ...)
55+
(Bswap16 x) => (ROLWconst [8] x)
5556

5657
(Sqrt ...) => (SQRTSD ...)
5758
(Sqrt32 ...) => (SQRTSS ...)
@@ -918,160 +919,6 @@
918919
// Convert LEAL1 back to ADDL if we can
919920
(LEAL1 [0] {nil} x y) => (ADDL x y)
920921

921-
// Combining byte loads into larger (unaligned) loads.
922-
// There are many ways these combinations could occur. This is
923-
// designed to match the way encoding/binary.LittleEndian does it.
924-
(ORL x0:(MOVBload [i0] {s} p mem)
925-
s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
926-
&& i1 == i0+1
927-
&& x0.Uses == 1
928-
&& x1.Uses == 1
929-
&& s0.Uses == 1
930-
&& mergePoint(b,x0,x1) != nil
931-
&& clobber(x0, x1, s0)
932-
=> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
933-
934-
(ORL x0:(MOVBload [i] {s} p0 mem)
935-
s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
936-
&& x0.Uses == 1
937-
&& x1.Uses == 1
938-
&& s0.Uses == 1
939-
&& sequentialAddresses(p0, p1, 1)
940-
&& mergePoint(b,x0,x1) != nil
941-
&& clobber(x0, x1, s0)
942-
=> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
943-
944-
(ORL o0:(ORL
945-
x0:(MOVWload [i0] {s} p mem)
946-
s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
947-
s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
948-
&& i2 == i0+2
949-
&& i3 == i0+3
950-
&& x0.Uses == 1
951-
&& x1.Uses == 1
952-
&& x2.Uses == 1
953-
&& s0.Uses == 1
954-
&& s1.Uses == 1
955-
&& o0.Uses == 1
956-
&& mergePoint(b,x0,x1,x2) != nil
957-
&& clobber(x0, x1, x2, s0, s1, o0)
958-
=> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
959-
960-
(ORL o0:(ORL
961-
x0:(MOVWload [i] {s} p0 mem)
962-
s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem)))
963-
s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem)))
964-
&& x0.Uses == 1
965-
&& x1.Uses == 1
966-
&& x2.Uses == 1
967-
&& s0.Uses == 1
968-
&& s1.Uses == 1
969-
&& o0.Uses == 1
970-
&& sequentialAddresses(p0, p1, 2)
971-
&& sequentialAddresses(p1, p2, 1)
972-
&& mergePoint(b,x0,x1,x2) != nil
973-
&& clobber(x0, x1, x2, s0, s1, o0)
974-
=> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem)
975-
976-
// Combine constant stores into larger (unaligned) stores.
977-
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
978-
&& x.Uses == 1
979-
&& a.Off() + 1 == c.Off()
980-
&& clobber(x)
981-
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
982-
(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
983-
&& x.Uses == 1
984-
&& a.Off() + 1 == c.Off()
985-
&& clobber(x)
986-
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
987-
988-
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
989-
&& x.Uses == 1
990-
&& a.Off() == c.Off()
991-
&& sequentialAddresses(p0, p1, 1)
992-
&& clobber(x)
993-
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
994-
(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
995-
&& x.Uses == 1
996-
&& a.Off() == c.Off()
997-
&& sequentialAddresses(p0, p1, 1)
998-
&& clobber(x)
999-
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
1000-
1001-
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
1002-
&& x.Uses == 1
1003-
&& a.Off() + 2 == c.Off()
1004-
&& clobber(x)
1005-
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
1006-
(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
1007-
&& x.Uses == 1
1008-
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
1009-
&& clobber(x)
1010-
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
1011-
1012-
(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
1013-
&& x.Uses == 1
1014-
&& a.Off() == c.Off()
1015-
&& sequentialAddresses(p0, p1, 2)
1016-
&& clobber(x)
1017-
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
1018-
(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
1019-
&& x.Uses == 1
1020-
&& a.Off() == c.Off()
1021-
&& sequentialAddresses(p0, p1, 2)
1022-
&& clobber(x)
1023-
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
1024-
1025-
// Combine stores into larger (unaligned) stores.
1026-
(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
1027-
&& x.Uses == 1
1028-
&& clobber(x)
1029-
=> (MOVWstore [i-1] {s} p w mem)
1030-
(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem))
1031-
&& x.Uses == 1
1032-
&& clobber(x)
1033-
=> (MOVWstore [i] {s} p w mem)
1034-
(MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
1035-
&& x.Uses == 1
1036-
&& clobber(x)
1037-
=> (MOVWstore [i-1] {s} p w0 mem)
1038-
1039-
(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i] {s} p0 w mem))
1040-
&& x.Uses == 1
1041-
&& sequentialAddresses(p0, p1, 1)
1042-
&& clobber(x)
1043-
=> (MOVWstore [i] {s} p0 w mem)
1044-
(MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHR(W|L)const [8] w) mem))
1045-
&& x.Uses == 1
1046-
&& sequentialAddresses(p0, p1, 1)
1047-
&& clobber(x)
1048-
=> (MOVWstore [i] {s} p0 w mem)
1049-
(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
1050-
&& x.Uses == 1
1051-
&& sequentialAddresses(p0, p1, 1)
1052-
&& clobber(x)
1053-
=> (MOVWstore [i] {s} p0 w0 mem)
1054-
1055-
(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
1056-
&& x.Uses == 1
1057-
&& clobber(x)
1058-
=> (MOVLstore [i-2] {s} p w mem)
1059-
(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
1060-
&& x.Uses == 1
1061-
&& clobber(x)
1062-
=> (MOVLstore [i-2] {s} p w0 mem)
1063-
1064-
(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
1065-
&& x.Uses == 1
1066-
&& sequentialAddresses(p0, p1, 2)
1067-
&& clobber(x)
1068-
=> (MOVLstore [i] {s} p0 w mem)
1069-
(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
1070-
&& x.Uses == 1
1071-
&& sequentialAddresses(p0, p1, 2)
1072-
&& clobber(x)
1073-
=> (MOVLstore [i] {s} p0 w0 mem)
1074-
1075922
// For PIC, break floating-point constant loading into two instructions so we have
1076923
// a register to use for holding the address of the constant pool entry.
1077924
(MOVSSconst [c]) && config.ctxt.Flag_shared => (MOVSSconst2 (MOVSSconst1 [c]))

0 commit comments

Comments
 (0)