Skip to content

Commit 4d0151e

Browse files
committed
cmd/compile,cmd/internal/obj/ppc64: make math.Abs,math.Copysign instrinsics on ppc64x
This adds support for math Abs, Copysign to be instrinsics on ppc64x. New instruction FCPSGN is added to generate fcpsgn. Some new rules are added to improve the int<->float conversions that are generated mainly due to the Float64bits and Float64frombits in the math package. PPC64.rules is also modified as suggested in the review for CL 63290. Improvements: benchmark old ns/op new ns/op delta BenchmarkAbs-16 1.12 0.69 -38.39% BenchmarkCopysign-16 1.30 0.93 -28.46% BenchmarkNextafter32-16 9.34 8.05 -13.81% BenchmarkFrexp-16 8.81 7.60 -13.73% Others that used Copysign also saw smaller improvements. I attempted to make this work using rules since that seems to be preferred, but due to the use of Float64bits and Float64frombits in these functions, several rules had to be added and even then not all cases were matched. Using rules became too complicated and seemed too fragile for these. Updates #21390 Change-Id: Ia265da9a18355e08000818a4fba1a40e9e031995 Reviewed-on: https://go-review.googlesource.com/67130 Run-TryBot: Lynn Boger <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 58de9f3 commit 4d0151e

File tree

12 files changed

+378
-8
lines changed

12 files changed

+378
-8
lines changed

src/cmd/asm/internal/asm/testdata/ppc64.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,9 @@ label1:
734734
POPCNTW R1,R2
735735
POPCNTB R1,R2
736736

737+
// Copysign
738+
FCPSGN F1,F2,F3
739+
737740
// Random number generator, X-form
738741
// DARN L,RT produces
739742
// darn RT,L

src/cmd/compile/internal/gc/asm_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,6 +2219,24 @@ var linuxPPC64LETests = []*asmTest{
22192219
pos: []string{"\tROTL\t"},
22202220
},
22212221

2222+
{
2223+
fn: `
2224+
func f12(a, b float64) float64 {
2225+
return math.Copysign(a, b)
2226+
}
2227+
`,
2228+
pos: []string{"\tFCPSGN\t"},
2229+
},
2230+
2231+
{
2232+
fn: `
2233+
func f13(a float64) float64 {
2234+
return math.Abs(a)
2235+
}
2236+
`,
2237+
pos: []string{"\tFABS\t"},
2238+
},
2239+
22222240
{
22232241
// check that stack store is optimized away
22242242
fn: `

src/cmd/compile/internal/gc/ssa.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2806,6 +2806,16 @@ func init() {
28062806
return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
28072807
},
28082808
sys.S390X)
2809+
addF("math", "Abs",
2810+
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
2811+
return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
2812+
},
2813+
sys.PPC64)
2814+
addF("math", "Copysign",
2815+
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
2816+
return s.newValue2(ssa.OpCopysign, types.Types[TFLOAT64], args[0], args[1])
2817+
},
2818+
sys.PPC64)
28092819

28102820
/******** math/bits ********/
28112821
addF("math/bits", "TrailingZeros64",

src/cmd/compile/internal/ppc64/ssa.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
530530
ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
531531
ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
532532
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
533-
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS,
533+
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
534534
ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
535535
r := v.Reg()
536536
r1 := v.Args[0].Reg()
@@ -581,7 +581,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
581581
p.To.Type = obj.TYPE_REG
582582
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
583583

584-
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD:
584+
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD, ssa.OpPPC64FABS, ssa.OpPPC64FNABS:
585585
r := v.Reg()
586586
p := s.Prog(v.Op.Asm())
587587
p.To.Type = obj.TYPE_REG

src/cmd/compile/internal/ssa/gen/PPC64.rules

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,19 +70,15 @@
7070
(Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64
7171
(Cvt64Fto32F x) -> (FRSP x)
7272

73-
(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) -> (MFVSRD x)
74-
(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _)) -> (MTVSRD x)
75-
76-
(FMOVDstore [off] {sym} ptr (MTVSRD x) mem) -> (MOVDstore [off] {sym} ptr x mem)
77-
(MOVDstore [off] {sym} ptr (MFVSRD x) mem) -> (FMOVDstore [off] {sym} ptr x mem)
78-
7973
(Round32F x) -> (LoweredRound32F x)
8074
(Round64F x) -> (LoweredRound64F x)
8175

8276
(Sqrt x) -> (FSQRT x)
8377
(Floor x) -> (FFLOOR x)
8478
(Ceil x) -> (FCEIL x)
8579
(Trunc x) -> (FTRUNC x)
80+
(Copysign x y) -> (FCPSGN y x)
81+
(Abs x) -> (FABS x)
8682

8783
// Lowering constants
8884
(Const8 [val]) -> (MOVDconst [val])
@@ -94,6 +90,13 @@
9490
(ConstNil) -> (MOVDconst [0])
9591
(ConstBool [b]) -> (MOVDconst [b])
9692

93+
// Constant folding
94+
(FABS (FMOVDconst [x])) -> (FMOVDconst [f2i(math.Abs(i2f(x)))])
95+
(FSQRT (FMOVDconst [x])) -> (FMOVDconst [f2i(math.Sqrt(i2f(x)))])
96+
(FFLOOR (FMOVDconst [x])) -> (FMOVDconst [f2i(math.Floor(i2f(x)))])
97+
(FCEIL (FMOVDconst [x])) -> (FMOVDconst [f2i(math.Ceil(i2f(x)))])
98+
(FTRUNC (FMOVDconst [x])) -> (FMOVDconst [f2i(math.Trunc(i2f(x)))])
99+
97100
// Rotate generation with const shift
98101
(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
99102
( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
@@ -734,6 +737,20 @@
734737

735738
(ADDconst [c] (MOVDaddr [d] {sym} x)) -> (MOVDaddr [c+d] {sym} x)
736739

740+
// Use register moves instead of stores and loads to move int<->float values
741+
// Common with math Float64bits, Float64frombits
742+
(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) -> (MFVSRD x)
743+
(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _)) -> (MTVSRD x)
744+
745+
(FMOVDstore [off] {sym} ptr (MTVSRD x) mem) -> (MOVDstore [off] {sym} ptr x mem)
746+
(MOVDstore [off] {sym} ptr (MFVSRD x) mem) -> (FMOVDstore [off] {sym} ptr x mem)
747+
748+
(MTVSRD (MOVDconst [c])) -> (FMOVDconst [c])
749+
(MFVSRD (FMOVDconst [c])) -> (MOVDconst [c])
750+
751+
(MTVSRD x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (FMOVDload [off] {sym} ptr mem)
752+
(MFVSRD x:(FMOVDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDload [off] {sym} ptr mem)
753+
737754
// Fold offsets for stores.
738755
(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} x val mem)
739756
(MOVWstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} x val mem)
@@ -896,6 +913,10 @@
896913
(AND (MOVDconst [c]) x:(MOVBZload _ _)) -> (ANDconst [c&0xFF] x)
897914
(AND x:(MOVBZload _ _) (MOVDconst [c])) -> (ANDconst [c&0xFF] x)
898915

916+
// floating point negative abs
917+
(FNEG (FABS x)) -> (FNABS x)
918+
(FNEG (FNABS x)) -> (FABS x)
919+
899920
// floating-point fused multiply-add/sub
900921
(FADD (FMUL x y) z) -> (FMADD x y z)
901922
(FSUB (FMUL x y) z) -> (FMSUB x y z)

src/cmd/compile/internal/ssa/gen/PPC64Ops.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,9 @@ func init() {
249249
{name: "FFLOOR", argLength: 1, reg: fp11, asm: "FRIM"}, // floor(arg0), float64
250250
{name: "FCEIL", argLength: 1, reg: fp11, asm: "FRIP"}, // ceil(arg0), float64
251251
{name: "FTRUNC", argLength: 1, reg: fp11, asm: "FRIZ"}, // trunc(arg0), float64
252+
{name: "FABS", argLength: 1, reg: fp11, asm: "FABS"}, // abs(arg0), float64
253+
{name: "FNABS", argLength: 1, reg: fp11, asm: "FNABS"}, // -abs(arg0), float64
254+
{name: "FCPSGN", argLength: 2, reg: fp21, asm: "FCPSGN"}, // copysign arg0 -> arg1, float64
252255

253256
{name: "ORconst", argLength: 1, reg: gp11, asm: "OR", aux: "Int64"}, // arg0|aux
254257
{name: "XORconst", argLength: 1, reg: gp11, asm: "XOR", aux: "Int64"}, // arg0^aux

src/cmd/compile/internal/ssa/gen/genericOps.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,10 @@ var genericOps = []opData{
273273
{name: "Trunc", argLength: 1}, // round arg0 toward 0
274274
{name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0
275275

276+
// Modify the sign bit
277+
{name: "Abs", argLength: 1}, // absolute value arg0
278+
{name: "Copysign", argLength: 2}, // copy sign from arg0 to arg1
279+
276280
// Data movement, max argument length for Phi is indefinite so just pick
277281
// a really large number
278282
{name: "Phi", argLength: -1}, // select an argument based on which predecessor block we came from

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 55 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)