Skip to content

Commit 4658748

Browse files
randall77thanm
authored andcommitted
[release-branch.go1.22] cmd/compile: fix sign/zero-extension removal
When an opcode generates a known high bit state (typically, a sub-word operation that zeros the high bits), we can remove any subsequent extension operation that would be a no-op. x = (OP ...) y = (ZeroExt32to64 x) If OP zeros the high 32 bits, then we can replace y with x, as the zero extension doesn't do anything. However, x in this situation normally has a sub-word-sized type. The semantics of values in registers is typically that the high bits beyond the value's type size are junk. So although the opcode generating x *currently* zeros the high bits, after x is rewritten to another opcode it may not - rewrites of sub-word-sized values can trash the high bits. To fix, move the extension-removing rules to late lower. That ensures that their arguments won't be rewritten to change their high bits. I am also worried about spilling and restoring. Spilling and restoring doesn't preserve the high bits, but instead sets them to a known value (often 0, but in some cases it could be sign-extended). I am unable to come up with a case that would cause a problem here, so leaving for another time. Update #66076 Change-Id: I3b5c091b3b3278ccbb7f11beda8b56f4b6d3fde7 Reviewed-on: https://go-review.googlesource.com/c/go/+/568616 Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Cherry Mui <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> (cherry picked from commit a46ecdc) Reviewed-on: https://go-review.googlesource.com/c/go/+/573375
1 parent 0a5b33a commit 4658748

10 files changed

+914
-828
lines changed

src/cmd/compile/internal/ssa/_gen/AMD64.rules

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,10 +1020,6 @@
10201020
(MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
10211021
(MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
10221022

1023-
(MOVLQZX x) && zeroUpper32Bits(x,3) => x
1024-
(MOVWQZX x) && zeroUpper48Bits(x,3) => x
1025-
(MOVBQZX x) && zeroUpper56Bits(x,3) => x
1026-
10271023
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
10281024
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQZX x)
10291025
(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQZX x)

src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,8 @@
66
(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
77
(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y)
88
(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
9+
10+
// See comments in ARM64latelower.rules for why these are here.
11+
(MOVLQZX x) && zeroUpper32Bits(x,3) => x
12+
(MOVWQZX x) && zeroUpper48Bits(x,3) => x
13+
(MOVBQZX x) && zeroUpper56Bits(x,3) => x

src/cmd/compile/internal/ssa/_gen/ARM64.rules

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,61 +1054,6 @@
10541054
(MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
10551055
(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
10561056

1057-
// don't extend after proper load
1058-
(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
1059-
(MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
1060-
(MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
1061-
(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
1062-
(MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
1063-
(MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
1064-
(MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
1065-
(MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
1066-
(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
1067-
(MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
1068-
(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
1069-
(MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
1070-
(MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
1071-
(MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
1072-
(MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
1073-
(MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
1074-
(MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
1075-
(MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
1076-
(MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
1077-
(MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
1078-
(MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
1079-
(MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
1080-
(MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
1081-
(MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
1082-
(MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
1083-
(MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
1084-
(MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x)
1085-
(MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
1086-
(MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
1087-
(MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x)
1088-
(MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
1089-
(MOVHUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
1090-
(MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
1091-
(MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
1092-
(MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x)
1093-
(MOVWUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
1094-
(MOVWUreg x:(MOVWUloadidx4 _ _ _)) => (MOVDreg x)
1095-
1096-
// fold double extensions
1097-
(MOVBreg x:(MOVBreg _)) => (MOVDreg x)
1098-
(MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
1099-
(MOVHreg x:(MOVBreg _)) => (MOVDreg x)
1100-
(MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
1101-
(MOVHreg x:(MOVHreg _)) => (MOVDreg x)
1102-
(MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
1103-
(MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
1104-
(MOVWreg x:(MOVBreg _)) => (MOVDreg x)
1105-
(MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
1106-
(MOVWreg x:(MOVHreg _)) => (MOVDreg x)
1107-
(MOVWreg x:(MOVWreg _)) => (MOVDreg x)
1108-
(MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
1109-
(MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
1110-
(MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)
1111-
11121057
// don't extend before store
11131058
(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
11141059
(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
@@ -1572,18 +1517,11 @@
15721517
(LessThanNoov (InvertFlags x)) => (CSEL0 [OpARM64NotEqual] (GreaterEqualNoov <typ.Bool> x) x)
15731518
(GreaterEqualNoov (InvertFlags x)) => (CSINC [OpARM64NotEqual] (LessThanNoov <typ.Bool> x) (MOVDconst [0]) x)
15741519

1575-
// Boolean-generating instructions (NOTE: NOT all boolean Values) always
1576-
// zero upper bit of the register; no need to zero-extend
1577-
(MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => (MOVDreg x)
1578-
15791520
// Don't bother extending if we're not using the higher bits.
15801521
(MOV(B|BU)reg x) && v.Type.Size() <= 1 => x
15811522
(MOV(H|HU)reg x) && v.Type.Size() <= 2 => x
15821523
(MOV(W|WU)reg x) && v.Type.Size() <= 4 => x
15831524

1584-
// omit unsign extension
1585-
(MOVWUreg x) && zeroUpper32Bits(x, 3) => x
1586-
15871525
// omit sign extension
15881526
(MOVWreg <t> (ANDconst x [c])) && uint64(c) & uint64(0xffffffff80000000) == 0 => (ANDconst <t> x [c])
15891527
(MOVHreg <t> (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffff8000) == 0 => (ANDconst <t> x [c])

src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,69 @@
1919
(CMNWconst [c] x) && !isARM64addcon(int64(c)) => (CMNW x (MOVDconst [int64(c)]))
2020

2121
(ADDSconstflags [c] x) && !isARM64addcon(c) => (ADDSflags x (MOVDconst [c]))
22+
23+
// These rules remove unneeded sign/zero extensions.
24+
// They occur in late lower because they rely on the fact
25+
// that their arguments don't get rewritten to a non-extended opcode instead.
26+
27+
// Boolean-generating instructions (NOTE: NOT all boolean Values) always
28+
// zero upper bit of the register; no need to zero-extend
29+
(MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => x
30+
31+
// omit unsigned extension
32+
(MOVWUreg x) && zeroUpper32Bits(x, 3) => x
33+
34+
// don't extend after proper load
35+
(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
36+
(MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
37+
(MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
38+
(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
39+
(MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
40+
(MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
41+
(MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
42+
(MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
43+
(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
44+
(MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
45+
(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
46+
(MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
47+
(MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
48+
(MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
49+
(MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
50+
(MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
51+
(MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
52+
(MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
53+
(MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
54+
(MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
55+
(MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
56+
(MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
57+
(MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
58+
(MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
59+
(MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
60+
(MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
61+
(MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x)
62+
(MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
63+
(MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
64+
(MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x)
65+
(MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
66+
(MOVHUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
67+
(MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
68+
(MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
69+
(MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x)
70+
(MOVWUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
71+
(MOVWUreg x:(MOVWUloadidx4 _ _ _)) => (MOVDreg x)
72+
73+
// fold double extensions
74+
(MOVBreg x:(MOVBreg _)) => (MOVDreg x)
75+
(MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
76+
(MOVHreg x:(MOVBreg _)) => (MOVDreg x)
77+
(MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
78+
(MOVHreg x:(MOVHreg _)) => (MOVDreg x)
79+
(MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
80+
(MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
81+
(MOVWreg x:(MOVBreg _)) => (MOVDreg x)
82+
(MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
83+
(MOVWreg x:(MOVHreg _)) => (MOVDreg x)
84+
(MOVWreg x:(MOVWreg _)) => (MOVDreg x)
85+
(MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
86+
(MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
87+
(MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)

src/cmd/compile/internal/ssa/rewrite.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,7 +1294,7 @@ func zeroUpper32Bits(x *Value, depth int) bool {
12941294
OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
12951295
OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
12961296
return true
1297-
case OpArg:
1297+
case OpArg: // note: but not ArgIntReg
12981298
return x.Type.Size() == 4
12991299
case OpPhi, OpSelect0, OpSelect1:
13001300
// Phis can use each-other as an arguments, instead of tracking visited values,
@@ -1318,7 +1318,7 @@ func zeroUpper48Bits(x *Value, depth int) bool {
13181318
switch x.Op {
13191319
case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
13201320
return true
1321-
case OpArg:
1321+
case OpArg: // note: but not ArgIntReg
13221322
return x.Type.Size() == 2
13231323
case OpPhi, OpSelect0, OpSelect1:
13241324
// Phis can use each-other as an arguments, instead of tracking visited values,
@@ -1342,7 +1342,7 @@ func zeroUpper56Bits(x *Value, depth int) bool {
13421342
switch x.Op {
13431343
case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
13441344
return true
1345-
case OpArg:
1345+
case OpArg: // note: but not ArgIntReg
13461346
return x.Type.Size() == 1
13471347
case OpPhi, OpSelect0, OpSelect1:
13481348
// Phis can use each-other as an arguments, instead of tracking visited values,

src/cmd/compile/internal/ssa/rewriteAMD64.go

Lines changed: 0 additions & 33 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteAMD64latelower.go

Lines changed: 51 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)