Skip to content

Commit e45c125

Browse files
sophie-zhaoabner-chenc
authored andcommitted
cmd/compile: add patterns for bitfield opcodes on loong64
goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | LeadingZeros 1.0095n ± 0% 0.8011n ± 0% -20.64% (p=0.000 n=10) LeadingZeros8 1.201n ± 0% 1.167n ± 0% -2.83% (p=0.000 n=10) LeadingZeros16 1.201n ± 0% 1.167n ± 0% -2.83% (p=0.000 n=10) LeadingZeros32 1.201n ± 0% 1.134n ± 0% -5.58% (p=0.000 n=10) LeadingZeros64 0.8007n ± 0% 1.0115n ± 0% +26.32% (p=0.000 n=10) TrailingZeros 0.8054n ± 0% 0.8106n ± 1% +0.65% (p=0.000 n=10) TrailingZeros8 1.067n ± 0% 1.002n ± 1% -6.09% (p=0.000 n=10) TrailingZeros16 1.0540n ± 0% 0.8389n ± 0% -20.40% (p=0.000 n=10) TrailingZeros32 0.8014n ± 0% 0.8117n ± 0% +1.29% (p=0.000 n=10) TrailingZeros64 0.8015n ± 0% 0.8124n ± 1% +1.36% (p=0.000 n=10) OnesCount 3.418n ± 0% 3.417n ± 0% ~ (p=0.911 n=10) OnesCount8 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) OnesCount16 1.440n ± 0% 1.299n ± 0% -9.79% (p=0.000 n=10) OnesCount32 2.969n ± 0% 2.940n ± 0% -0.94% (p=0.000 n=10) OnesCount64 3.563n ± 0% 3.558n ± 0% -0.14% (p=0.000 n=10) RotateLeft 0.6677n ± 0% 0.6670n ± 0% ~ (p=0.055 n=10) RotateLeft8 1.318n ± 1% 1.321n ± 0% ~ (p=0.117 n=10) RotateLeft16 0.8457n ± 1% 0.8442n ± 0% ~ (p=0.325 n=10) RotateLeft32 0.8004n ± 0% 0.8004n ± 0% ~ (p=0.837 n=10) RotateLeft64 0.6678n ± 0% 0.6670n ± 0% -0.13% (p=0.000 n=10) Reverse 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) Reverse8 0.6989n ± 0% 0.6969n ± 1% ~ (p=0.138 n=10) Reverse16 0.6998n ± 1% 0.7004n ± 1% ~ (p=0.985 n=10) Reverse32 0.4158n ± 1% 0.4159n ± 1% ~ (p=0.870 n=10) Reverse64 0.4165n ± 1% 0.4194n ± 2% ~ (p=0.093 n=10) ReverseBytes 0.8004n ± 0% 0.8004n ± 0% ~ (p=1.000 n=10) ReverseBytes16 0.4183n ± 2% 0.4148n ± 1% ~ (p=0.055 n=10) ReverseBytes32 0.4143n ± 2% 0.4153n ± 1% ~ (p=0.869 n=10) ReverseBytes64 0.4168n ± 1% 0.4177n ± 1% ~ (p=0.184 n=10) Add 1.201n ± 0% 1.201n ± 0% ~ (p=0.087 n=10) Add32 1.603n ± 0% 1.601n ± 0% -0.12% (p=0.000 n=10) Add64 1.201n ± 0% 1.201n ± 0% ~ (p=0.211 n=10) Add64multiple 1.839n ± 0% 1.835n ± 0% -0.24% (p=0.001 n=10) Sub 1.202n ± 0% 1.201n ± 0% -0.04% (p=0.033 n=10) Sub32 2.401n ± 0% 1.601n ± 0% -33.32% (p=0.000 n=10) Sub64 1.201n ± 0% 1.201n ± 0% ~ (p=1.000 n=10) Sub64multiple 2.105n ± 0% 2.096n ± 0% -0.40% (p=0.000 n=10) Mul 0.8008n ± 0% 0.8004n ± 0% -0.05% (p=0.000 n=10) Mul32 0.8041n ± 0% 0.8014n ± 0% -0.34% (p=0.000 n=10) Mul64 0.8008n ± 0% 0.8004n ± 0% -0.05% (p=0.000 n=10) Div 8.977n ± 0% 8.945n ± 0% -0.36% (p=0.000 n=10) Div32 4.084n ± 0% 4.086n ± 0% ~ (p=0.445 n=10) Div64 9.316n ± 0% 9.301n ± 0% -0.17% (p=0.000 n=10) geomean 1.141n 1.117n -2.09% Change-Id: I4dc1eaab6728f771bc722ed331fe5c6429bd1037 Reviewed-on: https://go-review.googlesource.com/c/go/+/618475 Reviewed-by: Dmitri Shuralyov <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Reviewed-by: Meidan Li <[email protected]> Reviewed-by: abner chenc <[email protected]> Reviewed-by: Qiqi Huang <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent ef3e1da commit e45c125

File tree

5 files changed

+189
-0
lines changed

5 files changed

+189
-0
lines changed

src/cmd/compile/internal/loong64/ssa.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,21 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
186186
p.To.Type = obj.TYPE_REG
187187
p.To.Reg = v.Reg()
188188

189+
case ssa.OpLOONG64BSTRPICKV,
190+
ssa.OpLOONG64BSTRPICKW:
191+
p := s.Prog(v.Op.Asm())
192+
p.From.Type = obj.TYPE_CONST
193+
if v.Op == ssa.OpLOONG64BSTRPICKW {
194+
p.From.Offset = v.AuxInt >> 5
195+
p.AddRestSourceConst(v.AuxInt & 0x1f)
196+
} else {
197+
p.From.Offset = v.AuxInt >> 6
198+
p.AddRestSourceConst(v.AuxInt & 0x3f)
199+
}
200+
p.Reg = v.Args[0].Reg()
201+
p.To.Type = obj.TYPE_REG
202+
p.To.Reg = v.Reg()
203+
189204
case ssa.OpLOONG64FMINF,
190205
ssa.OpLOONG64FMIND,
191206
ssa.OpLOONG64FMAXF,
@@ -334,6 +349,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
334349
}
335350
p.To.Type = obj.TYPE_REG
336351
p.To.Reg = v.Reg()
352+
337353
case ssa.OpLOONG64MOVBloadidx,
338354
ssa.OpLOONG64MOVBUloadidx,
339355
ssa.OpLOONG64MOVHloadidx,
@@ -350,6 +366,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
350366
p.From.Index = v.Args[1].Reg()
351367
p.To.Type = obj.TYPE_REG
352368
p.To.Reg = v.Reg()
369+
353370
case ssa.OpLOONG64MOVBstoreidx,
354371
ssa.OpLOONG64MOVHstoreidx,
355372
ssa.OpLOONG64MOVWstoreidx,
@@ -363,6 +380,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
363380
p.To.Name = obj.NAME_NONE
364381
p.To.Reg = v.Args[0].Reg()
365382
p.To.Index = v.Args[1].Reg()
383+
366384
case ssa.OpLOONG64MOVBstorezeroidx,
367385
ssa.OpLOONG64MOVHstorezeroidx,
368386
ssa.OpLOONG64MOVWstorezeroidx,
@@ -374,6 +392,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
374392
p.To.Name = obj.NAME_NONE
375393
p.To.Reg = v.Args[0].Reg()
376394
p.To.Index = v.Args[1].Reg()
395+
377396
case ssa.OpLOONG64MOVBload,
378397
ssa.OpLOONG64MOVBUload,
379398
ssa.OpLOONG64MOVHload,

src/cmd/compile/internal/ssa/_gen/LOONG64.rules

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,20 @@
117117
(Rsh8x16 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
118118
(Rsh8x8 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64 y)))
119119

120+
// bitfield ops
121+
122+
// bstrpickv
123+
// (x << lc) >> rc
124+
(SRLVconst [rc] (SLLVconst [lc] x)) && lc <= rc => (BSTRPICKV [rc-lc + ((64-lc)-1)<<6] x)
125+
// uint64(x) >> rc
126+
(SRLVconst [rc] (MOVWUreg x)) && rc < 32 => (BSTRPICKV [rc + 31<<6] x)
127+
(SRLVconst [rc] (MOVHUreg x)) && rc < 16 => (BSTRPICKV [rc + 15<<6] x)
128+
(SRLVconst [rc] (MOVBUreg x)) && rc < 8 => (BSTRPICKV [rc + 7<<6] x)
129+
// uint64(x >> rc)
130+
(MOVWUreg (SRLVconst [rc] x)) && rc < 32 => (BSTRPICKV [rc + (31+rc)<<6] x)
131+
(MOVHUreg (SRLVconst [rc] x)) && rc < 16 => (BSTRPICKV [rc + (15+rc)<<6] x)
132+
(MOVBUreg (SRLVconst [rc] x)) && rc < 8 => (BSTRPICKV [rc + (7+rc)<<6] x)
133+
120134
// rotates
121135
(RotateLeft8 <t> x (MOVVconst [c])) => (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7])))
122136
(RotateLeft8 <t> x y) => (OR <t> (SLLV <t> x (ANDconst <typ.Int64> [7] y)) (SRLV <t> (ZeroExt8to64 x) (ANDconst <typ.Int64> [7] (NEGV <typ.Int64> y))))

src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,12 @@ func init() {
235235
{name: "CMPGTF", argLength: 2, reg: fp2flags, asm: "CMPGTF", typ: "Flags"}, // flags=true if arg0 > arg1, float32
236236
{name: "CMPGTD", argLength: 2, reg: fp2flags, asm: "CMPGTD", typ: "Flags"}, // flags=true if arg0 > arg1, float64
237237

238+
// bitfield ops
239+
// for bstrpick.w msbw is auxInt>>5, lsbw is auxInt&0x1f
240+
// for bstrpick.d msbd is auxInt>>6, lsbd is auxInt&0x3f
241+
{name: "BSTRPICKW", argLength: 1, reg: gp11, asm: "BSTRPICKW", aux: "Int64"},
242+
{name: "BSTRPICKV", argLength: 1, reg: gp11, asm: "BSTRPICKV", aux: "Int64"},
243+
238244
// moves
239245
{name: "MOVVconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVV", typ: "UInt64", rematerializeable: true}, // auxint
240246
{name: "MOVFconst", argLength: 0, reg: fp01, aux: "Float64", asm: "MOVF", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteLOONG64.go

Lines changed: 120 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)