Skip to content

Commit deb4177

Browse files
committed
cmd/compile: use masks instead of branches for slicing
When we do var x []byte = ... y := x[i:] We can't just use y.ptr = x.ptr + i, as the new pointer may point to the next object in memory after the backing array. We used to fix this by doing: y.cap = x.cap - i delta := i if y.cap == 0 { delta = 0 } y.ptr = x.ptr + delta That generates a branch in what is otherwise straight-line code. Better to do: y.cap = x.cap - i mask := (y.cap - 1) >> 63 // -1 if y.cap==0, 0 otherwise y.ptr = x.ptr + i &^ mask It's about the same number of instructions (~4, depending on what parts are constant, and the target architecture), but it is all inline. It plays nicely with CSE, and the mask can be computed in parallel with the index (in cases where a multiply is required). It is a minor win in both speed and space. Change-Id: Ied60465a0b8abb683c02208402e5bb7ac0e8370f Reviewed-on: https://go-review.googlesource.com/32022 Run-TryBot: Keith Randall <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Cherry Zhang <[email protected]>
1 parent 50f66fb commit deb4177

21 files changed

+325
-56
lines changed

src/cmd/compile/internal/gc/ssa.go

+20-40
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,6 @@ var (
295295
typVar = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "typ"}}
296296
idataVar = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "idata"}}
297297
okVar = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "ok"}}
298-
deltaVar = Node{Op: ONAME, Class: Pxxx, Sym: &Sym{Name: "delta"}}
299298
)
300299

301300
// startBlock sets the current block we're generating code in to b.
@@ -3516,19 +3515,17 @@ func (s *state) slice(t *Type, v, i, j, k *ssa.Value) (p, l, c *ssa.Value) {
35163515
}
35173516

35183517
// Generate the following code assuming that indexes are in bounds.
3519-
// The conditional is to make sure that we don't generate a slice
3518+
// The masking is to make sure that we don't generate a slice
35203519
// that points to the next object in memory.
3521-
// rlen = j-i
3522-
// rcap = k-i
3523-
// delta = i*elemsize
3524-
// if rcap == 0 {
3525-
// delta = 0
3526-
// }
3527-
// rptr = p+delta
3520+
// rlen = j - i
3521+
// rcap = k - i
3522+
// delta = i * elemsize
3523+
// rptr = p + delta&mask(rcap)
35283524
// result = (SliceMake rptr rlen rcap)
3525+
// where mask(x) is 0 if x==0 and -1 if x>0.
35293526
subOp := s.ssaOp(OSUB, Types[TINT])
3530-
eqOp := s.ssaOp(OEQ, Types[TINT])
35313527
mulOp := s.ssaOp(OMUL, Types[TINT])
3528+
andOp := s.ssaOp(OAND, Types[TINT])
35323529
rlen := s.newValue2(subOp, Types[TINT], j, i)
35333530
var rcap *ssa.Value
35343531
switch {
@@ -3543,38 +3540,21 @@ func (s *state) slice(t *Type, v, i, j, k *ssa.Value) (p, l, c *ssa.Value) {
35433540
rcap = s.newValue2(subOp, Types[TINT], k, i)
35443541
}
35453542

3546-
// delta = # of elements to offset pointer by.
3547-
s.vars[&deltaVar] = i
3548-
3549-
// Generate code to set delta=0 if the resulting capacity is zero.
3550-
if !((i.Op == ssa.OpConst64 && i.AuxInt == 0) ||
3551-
(i.Op == ssa.OpConst32 && int32(i.AuxInt) == 0)) {
3552-
cmp := s.newValue2(eqOp, Types[TBOOL], rcap, zero)
3553-
3554-
b := s.endBlock()
3555-
b.Kind = ssa.BlockIf
3556-
b.Likely = ssa.BranchUnlikely
3557-
b.SetControl(cmp)
3558-
3559-
// Generate block which zeros the delta variable.
3560-
nz := s.f.NewBlock(ssa.BlockPlain)
3561-
b.AddEdgeTo(nz)
3562-
s.startBlock(nz)
3563-
s.vars[&deltaVar] = zero
3564-
s.endBlock()
3565-
3566-
// All done.
3567-
merge := s.f.NewBlock(ssa.BlockPlain)
3568-
b.AddEdgeTo(merge)
3569-
nz.AddEdgeTo(merge)
3570-
s.startBlock(merge)
3571-
3572-
// TODO: use conditional moves somehow?
3543+
var rptr *ssa.Value
3544+
if (i.Op == ssa.OpConst64 || i.Op == ssa.OpConst32) && i.AuxInt == 0 {
3545+
// No pointer arithmetic necessary.
3546+
rptr = ptr
3547+
} else {
3548+
// delta = # of bytes to offset pointer by.
3549+
delta := s.newValue2(mulOp, Types[TINT], i, s.constInt(Types[TINT], elemtype.Width))
3550+
// If we're slicing to the point where the capacity is zero,
3551+
// zero out the delta.
3552+
mask := s.newValue1(ssa.OpSlicemask, Types[TINT], rcap)
3553+
delta = s.newValue2(andOp, Types[TINT], delta, mask)
3554+
// Compute rptr = ptr + delta
3555+
rptr = s.newValue2(ssa.OpAddPtr, ptrtype, ptr, delta)
35733556
}
35743557

3575-
// Compute rptr = ptr + delta * elemsize
3576-
rptr := s.newValue2(ssa.OpAddPtr, ptrtype, ptr, s.newValue2(mulOp, Types[TINT], s.variable(&deltaVar, Types[TINT]), s.constInt(Types[TINT], elemtype.Width)))
3577-
delete(s.vars, &deltaVar)
35783558
return rptr, rlen, rcap
35793559
}
35803560

src/cmd/compile/internal/ssa/gen/386.rules

+2-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@
101101
(ZeroExt16to32 x) -> (MOVWLZX x)
102102

103103
(Signmask x) -> (SARLconst x [31])
104-
(Zeromask <t> x) -> (XORLconst [-1] (SBBLcarrymask <t> (CMPL x (MOVLconst [1]))))
104+
(Zeromask <t> x) -> (XORLconst [-1] (SBBLcarrymask <t> (CMPLconst x [1])))
105+
(Slicemask <t> x) -> (XORLconst [-1] (SARLconst <t> (SUBLconst <t> x [1]) [31]))
105106

106107
// Lowering truncation
107108
// Because we ignore high parts of registers, truncates are just copies.

src/cmd/compile/internal/ssa/gen/AMD64.rules

+2
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@
125125
(ZeroExt16to64 x) -> (MOVWQZX x)
126126
(ZeroExt32to64 x) -> (MOVLQZX x)
127127

128+
(Slicemask <t> x) -> (XORQconst [-1] (SARQconst <t> (SUBQconst <t> x [1]) [63]))
129+
128130
// Lowering truncation
129131
// Because we ignore high parts of registers, truncates are just copies.
130132
(Trunc16to8 x) -> x

src/cmd/compile/internal/ssa/gen/ARM.rules

+1
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@
207207

208208
(Signmask x) -> (SRAconst x [31])
209209
(Zeromask x) -> (SRAconst (RSBshiftRL <config.fe.TypeInt32()> x x [1]) [31]) // sign bit of uint32(x)>>1 - x
210+
(Slicemask <t> x) -> (MVN (SRAconst <t> (SUBconst <t> x [1]) [31]))
210211

211212
// float <-> int conversion
212213
(Cvt32to32F x) -> (MOVWF x)

src/cmd/compile/internal/ssa/gen/ARM64.rules

+2
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@
207207
(ConstNil) -> (MOVDconst [0])
208208
(ConstBool [b]) -> (MOVDconst [b])
209209

210+
(Slicemask <t> x) -> (MVN (SRAconst <t> (SUBconst <t> x [1]) [63]))
211+
210212
// truncations
211213
// Because we ignore high parts of registers, truncates are just copies.
212214
(Trunc16to8 x) -> x

src/cmd/compile/internal/ssa/gen/MIPS64.rules

+3-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@
152152
(OrB x y) -> (OR x y)
153153
(EqB x y) -> (XOR (MOVVconst [1]) (XOR <config.fe.TypeBool()> x y))
154154
(NeqB x y) -> (XOR x y)
155-
(Not x) -> (XOR (MOVVconst [1]) x)
155+
(Not x) -> (XORconst [1] x)
156156

157157
// constants
158158
(Const64 [val]) -> (MOVVconst [val])
@@ -164,6 +164,8 @@
164164
(ConstNil) -> (MOVVconst [0])
165165
(ConstBool [b]) -> (MOVVconst [b])
166166

167+
(Slicemask <t> x) -> (NORconst [0] (SRAVconst <t> (SUBVconst <t> x [1]) [63]))
168+
167169
// truncations
168170
// Because we ignore high parts of registers, truncates are just copies.
169171
(Trunc16to8 x) -> x

src/cmd/compile/internal/ssa/gen/PPC64.rules

+2
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,8 @@
790790
(Trunc64to16 x) -> (MOVHreg x)
791791
(Trunc64to32 x) -> (MOVWreg x)
792792

793+
(Slicemask <t> x) -> (XORconst [-1] (SRADconst <t> (ADDconst <t> x [-1]) [63]))
794+
793795
// Note that MOV??reg returns a 64-bit int, x is not necessarily that wide
794796
// This may interact with other patterns in the future. (Compare with arm64)
795797
(MOVBZreg x:(MOVBZload _ _)) -> x

src/cmd/compile/internal/ssa/gen/S390X.rules

+2
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@
152152
(ZeroExt16to64 x) -> (MOVHZreg x)
153153
(ZeroExt32to64 x) -> (MOVWZreg x)
154154

155+
(Slicemask <t> x) -> (XOR (MOVDconst [-1]) (SRADconst <t> (SUBconst <t> x [1]) [63]))
156+
155157
// Lowering truncation
156158
// Because we ignore high parts of registers, truncates are just copies.
157159
(Trunc16to8 x) -> x

src/cmd/compile/internal/ssa/gen/generic.rules

+5
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,11 @@
602602
(Trunc32to16 (And32 (Const32 [y]) x)) && y&0xFFFF == 0xFFFF -> (Trunc32to16 x)
603603
(Trunc16to8 (And16 (Const16 [y]) x)) && y&0xFF == 0xFF -> (Trunc16to8 x)
604604

605+
(Slicemask (Const32 [x])) && x > 0 -> (Const32 [-1])
606+
(Slicemask (Const32 [0])) -> (Const32 [0])
607+
(Slicemask (Const64 [x])) && x > 0 -> (Const64 [-1])
608+
(Slicemask (Const64 [0])) -> (Const64 [0])
609+
605610
// Rewrite AND of consts as shifts if possible, slightly faster for 64 bit operands
606611
// leading zeros can be shifted left, then right
607612
(And64 <t> (Const64 [y]) x) && nlz(y) + nto(y) == 64 && nto(y) >= 32

src/cmd/compile/internal/ssa/gen/genericOps.go

+1
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ var genericOps = []opData{
437437

438438
{name: "Signmask", argLength: 1, typ: "Int32"}, // 0 if arg0 >= 0, -1 if arg0 < 0
439439
{name: "Zeromask", argLength: 1, typ: "UInt32"}, // 0 if arg0 == 0, 0xffffffff if arg0 != 0
440+
{name: "Slicemask", argLength: 1}, // 0 if arg0 == 0, -1 if arg0 > 0, undef if arg0<0. Type is native int size.
440441

441442
{name: "Cvt32Uto32F", argLength: 1}, // uint32 -> float32, only used on 32-bit arch
442443
{name: "Cvt32Uto64F", argLength: 1}, // uint32 -> float64, only used on 32-bit arch

src/cmd/compile/internal/ssa/opGen.go

+6
Original file line numberDiff line numberDiff line change
@@ -1736,6 +1736,7 @@ const (
17361736
OpSub32withcarry
17371737
OpSignmask
17381738
OpZeromask
1739+
OpSlicemask
17391740
OpCvt32Uto32F
17401741
OpCvt32Uto64F
17411742
OpCvt32Fto32U
@@ -19812,6 +19813,11 @@ var opcodeTable = [...]opInfo{
1981219813
argLen: 1,
1981319814
generic: true,
1981419815
},
19816+
{
19817+
name: "Slicemask",
19818+
argLen: 1,
19819+
generic: true,
19820+
},
1981519821
{
1981619822
name: "Cvt32Uto32F",
1981719823
argLen: 1,

src/cmd/compile/internal/ssa/prove.go

+38
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,44 @@ func updateRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r
568568
// simplifyBlock simplifies block known the restrictions in ft.
569569
// Returns which branch must always be taken.
570570
func simplifyBlock(ft *factsTable, b *Block) branch {
571+
for _, v := range b.Values {
572+
if v.Op != OpSlicemask {
573+
continue
574+
}
575+
add := v.Args[0]
576+
if add.Op != OpAdd64 && add.Op != OpAdd32 {
577+
continue
578+
}
579+
// Note that the arg of slicemask was originally a sub, but
580+
// was rewritten to an add by generic.rules (if the thing
581+
// being subtracted was a constant).
582+
x := add.Args[0]
583+
y := add.Args[1]
584+
if x.Op == OpConst64 || x.Op == OpConst32 {
585+
x, y = y, x
586+
}
587+
if y.Op != OpConst64 && y.Op != OpConst32 {
588+
continue
589+
}
590+
// slicemask(x + y)
591+
// if x is larger than -y (y is negative), then slicemask is -1.
592+
lim, ok := ft.limits[x.ID]
593+
if !ok {
594+
continue
595+
}
596+
if lim.umin > uint64(-y.AuxInt) {
597+
if v.Args[0].Op == OpAdd64 {
598+
v.reset(OpConst64)
599+
} else {
600+
v.reset(OpConst32)
601+
}
602+
if b.Func.pass.debug > 0 {
603+
b.Func.Config.Warnl(v.Line, "Proved slicemask not needed")
604+
}
605+
v.AuxInt = -1
606+
}
607+
}
608+
571609
if b.Kind != BlockIf {
572610
return unknown
573611
}

src/cmd/compile/internal/ssa/rewrite386.go

+26-5
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,8 @@ func rewriteValue386(v *Value, config *Config) bool {
540540
return rewriteValue386_OpSignExt8to32(v, config)
541541
case OpSignmask:
542542
return rewriteValue386_OpSignmask(v, config)
543+
case OpSlicemask:
544+
return rewriteValue386_OpSlicemask(v, config)
543545
case OpSqrt:
544546
return rewriteValue386_OpSqrt(v, config)
545547
case OpStaticCall:
@@ -12432,6 +12434,27 @@ func rewriteValue386_OpSignmask(v *Value, config *Config) bool {
1243212434
return true
1243312435
}
1243412436
}
12437+
func rewriteValue386_OpSlicemask(v *Value, config *Config) bool {
12438+
b := v.Block
12439+
_ = b
12440+
// match: (Slicemask <t> x)
12441+
// cond:
12442+
// result: (XORLconst [-1] (SARLconst <t> (SUBLconst <t> x [1]) [31]))
12443+
for {
12444+
t := v.Type
12445+
x := v.Args[0]
12446+
v.reset(Op386XORLconst)
12447+
v.AuxInt = -1
12448+
v0 := b.NewValue0(v.Line, Op386SARLconst, t)
12449+
v0.AuxInt = 31
12450+
v1 := b.NewValue0(v.Line, Op386SUBLconst, t)
12451+
v1.AuxInt = 1
12452+
v1.AddArg(x)
12453+
v0.AddArg(v1)
12454+
v.AddArg(v0)
12455+
return true
12456+
}
12457+
}
1243512458
func rewriteValue386_OpSqrt(v *Value, config *Config) bool {
1243612459
b := v.Block
1243712460
_ = b
@@ -13088,18 +13111,16 @@ func rewriteValue386_OpZeromask(v *Value, config *Config) bool {
1308813111
_ = b
1308913112
// match: (Zeromask <t> x)
1309013113
// cond:
13091-
// result: (XORLconst [-1] (SBBLcarrymask <t> (CMPL x (MOVLconst [1]))))
13114+
// result: (XORLconst [-1] (SBBLcarrymask <t> (CMPLconst x [1])))
1309213115
for {
1309313116
t := v.Type
1309413117
x := v.Args[0]
1309513118
v.reset(Op386XORLconst)
1309613119
v.AuxInt = -1
1309713120
v0 := b.NewValue0(v.Line, Op386SBBLcarrymask, t)
13098-
v1 := b.NewValue0(v.Line, Op386CMPL, TypeFlags)
13121+
v1 := b.NewValue0(v.Line, Op386CMPLconst, TypeFlags)
13122+
v1.AuxInt = 1
1309913123
v1.AddArg(x)
13100-
v2 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
13101-
v2.AuxInt = 1
13102-
v1.AddArg(v2)
1310313124
v0.AddArg(v1)
1310413125
v.AddArg(v0)
1310513126
return true

src/cmd/compile/internal/ssa/rewriteAMD64.go

+23
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
742742
return rewriteValueAMD64_OpSignExt8to32(v, config)
743743
case OpSignExt8to64:
744744
return rewriteValueAMD64_OpSignExt8to64(v, config)
745+
case OpSlicemask:
746+
return rewriteValueAMD64_OpSlicemask(v, config)
745747
case OpSqrt:
746748
return rewriteValueAMD64_OpSqrt(v, config)
747749
case OpStaticCall:
@@ -18101,6 +18103,27 @@ func rewriteValueAMD64_OpSignExt8to64(v *Value, config *Config) bool {
1810118103
return true
1810218104
}
1810318105
}
18106+
func rewriteValueAMD64_OpSlicemask(v *Value, config *Config) bool {
18107+
b := v.Block
18108+
_ = b
18109+
// match: (Slicemask <t> x)
18110+
// cond:
18111+
// result: (XORQconst [-1] (SARQconst <t> (SUBQconst <t> x [1]) [63]))
18112+
for {
18113+
t := v.Type
18114+
x := v.Args[0]
18115+
v.reset(OpAMD64XORQconst)
18116+
v.AuxInt = -1
18117+
v0 := b.NewValue0(v.Line, OpAMD64SARQconst, t)
18118+
v0.AuxInt = 63
18119+
v1 := b.NewValue0(v.Line, OpAMD64SUBQconst, t)
18120+
v1.AuxInt = 1
18121+
v1.AddArg(x)
18122+
v0.AddArg(v1)
18123+
v.AddArg(v0)
18124+
return true
18125+
}
18126+
}
1810418127
func rewriteValueAMD64_OpSqrt(v *Value, config *Config) bool {
1810518128
b := v.Block
1810618129
_ = b

src/cmd/compile/internal/ssa/rewriteARM.go

+22
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,8 @@ func rewriteValueARM(v *Value, config *Config) bool {
684684
return rewriteValueARM_OpSignExt8to32(v, config)
685685
case OpSignmask:
686686
return rewriteValueARM_OpSignmask(v, config)
687+
case OpSlicemask:
688+
return rewriteValueARM_OpSlicemask(v, config)
687689
case OpSqrt:
688690
return rewriteValueARM_OpSqrt(v, config)
689691
case OpStaticCall:
@@ -16530,6 +16532,26 @@ func rewriteValueARM_OpSignmask(v *Value, config *Config) bool {
1653016532
return true
1653116533
}
1653216534
}
16535+
func rewriteValueARM_OpSlicemask(v *Value, config *Config) bool {
16536+
b := v.Block
16537+
_ = b
16538+
// match: (Slicemask <t> x)
16539+
// cond:
16540+
// result: (MVN (SRAconst <t> (SUBconst <t> x [1]) [31]))
16541+
for {
16542+
t := v.Type
16543+
x := v.Args[0]
16544+
v.reset(OpARMMVN)
16545+
v0 := b.NewValue0(v.Line, OpARMSRAconst, t)
16546+
v0.AuxInt = 31
16547+
v1 := b.NewValue0(v.Line, OpARMSUBconst, t)
16548+
v1.AuxInt = 1
16549+
v1.AddArg(x)
16550+
v0.AddArg(v1)
16551+
v.AddArg(v0)
16552+
return true
16553+
}
16554+
}
1653316555
func rewriteValueARM_OpSqrt(v *Value, config *Config) bool {
1653416556
b := v.Block
1653516557
_ = b

0 commit comments

Comments
 (0)