Skip to content

Commit 3023d7d

Browse files
committed
cmd/compile/internal, cmd/internal/obj/ppc64: generate new count trailing zeros instructions on POWER9
This change adds new POWER9 instructions for counting trailing zeros (CNTTZW/CNTTZD) to the assembler and generates them in SSA when GOPPC64=power9. name old time/op new time/op delta TrailingZeros-160 1.59ns ±20% 1.45ns ±10% -8.81% (p=0.000 n=14+13) TrailingZeros8-160 1.55ns ±23% 1.62ns ±44% ~ (p=0.593 n=13+15) TrailingZeros16-160 1.78ns ±23% 1.62ns ±38% -9.31% (p=0.003 n=14+14) TrailingZeros32-160 1.64ns ±10% 1.49ns ± 9% -9.15% (p=0.000 n=13+14) TrailingZeros64-160 1.53ns ± 6% 1.45ns ± 5% -5.38% (p=0.000 n=15+13) Change-Id: I365e6ff79f3ce4d8ebe089a6a86b1771853eb596 Reviewed-on: https://go-review.googlesource.com/c/go/+/167517 Reviewed-by: Lynn Boger <[email protected]>
1 parent 23b476a commit 3023d7d

File tree

9 files changed

+109
-19
lines changed

9 files changed

+109
-19
lines changed

src/cmd/compile/internal/ppc64/ssa.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
620620
p.To.Type = obj.TYPE_REG
621621
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
622622

623-
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, ssa.OpPPC64FROUND:
623+
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS, ssa.OpPPC64FROUND, ssa.OpPPC64CNTTZW, ssa.OpPPC64CNTTZD:
624624
r := v.Reg()
625625
p := s.Prog(v.Op.Asm())
626626
p.To.Type = obj.TYPE_REG

src/cmd/compile/internal/ssa/gen/PPC64.rules

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,10 +303,12 @@
303303
(Ctz32NonZero x) -> (Ctz32 x)
304304
(Ctz64NonZero x) -> (Ctz64 x)
305305

306-
(Ctz64 x) -> (POPCNTD (ANDN <typ.Int64> (ADDconst <typ.Int64> [-1] x) x))
307-
(Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN <typ.Int> (ADDconst <typ.Int> [-1] x) x)))
306+
(Ctz64 x) && objabi.GOPPC64<=8 -> (POPCNTD (ANDN <typ.Int64> (ADDconst <typ.Int64> [-1] x) x))
307+
(Ctz64 x) -> (CNTTZD x)
308+
(Ctz32 x) && objabi.GOPPC64<=8 -> (POPCNTW (MOVWZreg (ANDN <typ.Int> (ADDconst <typ.Int> [-1] x) x)))
309+
(Ctz32 x) -> (CNTTZW (MOVWZreg x))
308310
(Ctz16 x) -> (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
309-
(Ctz8 x) -> (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
311+
(Ctz8 x) -> (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
310312

311313
(BitLen64 x) -> (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
312314
(BitLen32 x) -> (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
@@ -339,7 +341,7 @@
339341
// Sign extension dependence on operand sign sets up for sign/zero-extension elision later
340342
(Eq8 x y) && isSigned(x.Type) && isSigned(y.Type) -> (Equal (CMPW (SignExt8to32 x) (SignExt8to32 y)))
341343
(Eq16 x y) && isSigned(x.Type) && isSigned(y.Type) -> (Equal (CMPW (SignExt16to32 x) (SignExt16to32 y)))
342-
(Eq8 x y) -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
344+
(Eq8 x y) -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
343345
(Eq16 x y) -> (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
344346
(Eq32 x y) -> (Equal (CMPW x y))
345347
(Eq64 x y) -> (Equal (CMP x y))

src/cmd/compile/internal/ssa/gen/PPC64Ops.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ func init() {
215215
{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
216216
{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
217217

218+
{name: "CNTTZD", argLength: 1, reg: gp11, asm: "CNTTZD"}, // count trailing zeros
219+
{name: "CNTTZW", argLength: 1, reg: gp11, asm: "CNTTZW"}, // count trailing zeros (32 bit)
220+
218221
{name: "POPCNTD", argLength: 1, reg: gp11, asm: "POPCNTD"}, // number of set bits in arg0
219222
{name: "POPCNTW", argLength: 1, reg: gp11, asm: "POPCNTW"}, // number of set bits in each word of arg0 placed in corresponding word
220223
{name: "POPCNTB", argLength: 1, reg: gp11, asm: "POPCNTB"}, // number of set bits in each byte of arg0 placed in corresonding byte

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 28 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewritePPC64.go

Lines changed: 28 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/ppc64/a.out.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,10 @@ const (
749749
APOPCNTD
750750
APOPCNTW
751751
APOPCNTB
752+
ACNTTZW
753+
ACNTTZWCC
754+
ACNTTZD
755+
ACNTTZDCC
752756
ACOPY
753757
APASTECC
754758
ADARN

src/cmd/internal/obj/ppc64/anames.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/ppc64/asm9.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,10 @@ var optab = []Optab{
389389
{AMOVWZ, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsr */
390390

391391
/* Other ISA 2.05+ instructions */
392-
{APOPCNTD, C_REG, C_NONE, C_NONE, C_REG, 93, 4, 0}, /* population count, x-form */
393-
{ACMPB, C_REG, C_REG, C_NONE, C_REG, 92, 4, 0}, /* compare byte, x-form */
394-
{ACMPEQB, C_REG, C_REG, C_NONE, C_CREG, 92, 4, 0}, /* compare equal byte, x-form */
392+
{APOPCNTD, C_REG, C_NONE, C_NONE, C_REG, 93, 4, 0}, /* population count, x-form */
393+
{ACMPB, C_REG, C_REG, C_NONE, C_REG, 92, 4, 0}, /* compare byte, x-form */
394+
{ACMPEQB, C_REG, C_REG, C_NONE, C_CREG, 92, 4, 0}, /* compare equal byte, x-form, ISA 3.0 */
395+
{ACMPEQB, C_REG, C_NONE, C_NONE, C_REG, 70, 4, 0},
395396
{AFTDIV, C_FREG, C_FREG, C_NONE, C_SCON, 92, 4, 0}, /* floating test for sw divide, x-form */
396397
{AFTSQRT, C_FREG, C_NONE, C_NONE, C_SCON, 93, 4, 0}, /* floating test for sw square root, x-form */
397398
{ACOPY, C_REG, C_NONE, C_NONE, C_REG, 92, 4, 0}, /* copy/paste facility, x-form */
@@ -1304,9 +1305,13 @@ func buildop(ctxt *obj.Link) {
13041305
opset(ADIVDUVCC, r0)
13051306
opset(ADIVDUCC, r0)
13061307

1307-
case APOPCNTD:
1308+
case APOPCNTD: /* popcntd, popcntw, popcntb, cnttzw, cnttzd */
13081309
opset(APOPCNTW, r0)
13091310
opset(APOPCNTB, r0)
1311+
opset(ACNTTZW, r0)
1312+
opset(ACNTTZWCC, r0)
1313+
opset(ACNTTZD, r0)
1314+
opset(ACNTTZDCC, r0)
13101315

13111316
case ACOPY: /* copy, paste. */
13121317
opset(APASTECC, r0)
@@ -3760,6 +3765,8 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
37603765
return OPVCC(31, 32, 0, 0)
37613766
case ACMPB:
37623767
return OPVCC(31, 508, 0, 0) /* cmpb - v2.05 */
3768+
case ACMPEQB:
3769+
return OPVCC(31, 224, 0, 0) /* cmpeqb - v3.00 */
37633770

37643771
case ACNTLZW:
37653772
return OPVCC(31, 26, 0, 0)
@@ -4118,6 +4125,14 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
41184125
return OPVCC(31, 378, 0, 0) /* popcntw - v2.06 */
41194126
case APOPCNTB:
41204127
return OPVCC(31, 122, 0, 0) /* popcntb - v2.02 */
4128+
case ACNTTZW:
4129+
return OPVCC(31, 538, 0, 0) /* cnttzw - v3.00 */
4130+
case ACNTTZWCC:
4131+
return OPVCC(31, 538, 0, 1) /* cnttzw. - v3.00 */
4132+
case ACNTTZD:
4133+
return OPVCC(31, 570, 0, 0) /* cnttzd - v3.00 */
4134+
case ACNTTZDCC:
4135+
return OPVCC(31, 570, 0, 1) /* cnttzd. - v3.00 */
41214136

41224137
case ARFI:
41234138
return OPVCC(19, 50, 0, 0)

test/codegen/mathbits.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,10 @@ func TrailingZeros(n uint) int {
261261
// arm:"CLZ"
262262
// arm64:"RBIT","CLZ"
263263
// s390x:"FLOGR"
264-
// ppc64:"ANDN","POPCNTD"
265-
// ppc64le:"ANDN","POPCNTD"
264+
// ppc64/power8:"ANDN","POPCNTD"
265+
// ppc64le/power8:"ANDN","POPCNTD"
266+
// ppc64/power9: "CNTTZD"
267+
// ppc64le/power9: "CNTTZD"
266268
// wasm:"I64Ctz"
267269
return bits.TrailingZeros(n)
268270
}
@@ -271,8 +273,10 @@ func TrailingZeros64(n uint64) int {
271273
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
272274
// arm64:"RBIT","CLZ"
273275
// s390x:"FLOGR"
274-
// ppc64:"ANDN","POPCNTD"
275-
// ppc64le:"ANDN","POPCNTD"
276+
// ppc64/power8:"ANDN","POPCNTD"
277+
// ppc64le/power8:"ANDN","POPCNTD"
278+
// ppc64/power9: "CNTTZD"
279+
// ppc64le/power9: "CNTTZD"
276280
// wasm:"I64Ctz"
277281
return bits.TrailingZeros64(n)
278282
}
@@ -282,8 +286,10 @@ func TrailingZeros32(n uint32) int {
282286
// arm:"CLZ"
283287
// arm64:"RBITW","CLZW"
284288
// s390x:"FLOGR","MOVWZ"
285-
// ppc64:"ANDN","POPCNTW"
286-
// ppc64le:"ANDN","POPCNTW"
289+
// ppc64/power8:"ANDN","POPCNTW"
290+
// ppc64le/power8:"ANDN","POPCNTW"
291+
// ppc64/power9: "CNTTZW"
292+
// ppc64le/power9: "CNTTZW"
287293
// wasm:"I64Ctz"
288294
return bits.TrailingZeros32(n)
289295
}
@@ -293,8 +299,10 @@ func TrailingZeros16(n uint16) int {
293299
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
294300
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
295301
// s390x:"FLOGR","OR\t\\$65536"
296-
// ppc64:"POPCNTD","OR\\t\\$65536"
297-
// ppc64le:"POPCNTD","OR\\t\\$65536"
302+
// ppc64/power8:"POPCNTD","OR\\t\\$65536"
303+
// ppc64le/power8:"POPCNTD","OR\\t\\$65536"
304+
// ppc64/power9:"CNTTZD","OR\\t\\$65536"
305+
// ppc64le/power9:"CNTTZD","OR\\t\\$65536"
298306
// wasm:"I64Ctz"
299307
return bits.TrailingZeros16(n)
300308
}

0 commit comments

Comments
 (0)