Skip to content

Commit d08010f

Browse files
committed
[dev.ssa] cmd/compile: PPC64, FP to/from int conversions.
Passes ssa_test. Requires a few new instructions and some scratchpad memory to move data between G and F registers. Also fixed comparisons to be correct in case of NaN. Added missing instructions for run.bash. Removed some FP registers that are apparently "reserved" (but that are also apparently also unused except for a gratuitous multiplication by two when y = x+x would work just as well). Currently failing stack splits. Updates #16010. Change-Id: I73b161bfff54445d72bd7b813b1479f89fc72602 Reviewed-on: https://go-review.googlesource.com/26813 Run-TryBot: David Chase <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Cherry Zhang <[email protected]>
1 parent d99cee7 commit d08010f

File tree

13 files changed

+863
-157
lines changed

13 files changed

+863
-157
lines changed

src/cmd/compile/internal/gc/ssa.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3953,8 +3953,9 @@ type SSAGenState struct {
39533953
bstart []*obj.Prog
39543954

39553955
// 387 port: maps from SSE registers (REG_X?) to 387 registers (REG_F?)
3956-
SSEto387 map[int16]int16
3957-
Scratch387 *Node
3956+
SSEto387 map[int16]int16
3957+
// Some architectures require a 64-bit temporary for FP-related register shuffling. Examples include x86-387, PPC, and Sparc V8.
3958+
ScratchFpMem *Node
39583959
}
39593960

39603961
// Pc returns the current Prog.
@@ -3993,7 +3994,9 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) {
39933994

39943995
if Thearch.Use387 {
39953996
s.SSEto387 = map[int16]int16{}
3996-
s.Scratch387 = temp(Types[TUINT64])
3997+
}
3998+
if f.Config.NeedsFpScratch {
3999+
s.ScratchFpMem = temp(Types[TUINT64])
39974000
}
39984001

39994002
// Emit basic blocks

src/cmd/compile/internal/ppc64/prog.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{
8282
ppc64.AFDIV & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite},
8383
ppc64.AFDIVS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
8484
ppc64.AFCTIDZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
85+
ppc64.AFCTIWZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
8586
ppc64.AFCFID & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
8687
ppc64.AFCFIDU & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite},
8788
ppc64.AFCMPU & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightRead},
@@ -104,6 +105,8 @@ var progtable = [ppc64.ALAST & obj.AMask]obj.ProgInfo{
104105
ppc64.AMOVD & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.Move},
105106
ppc64.AMOVDU & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.Move | gc.PostInc},
106107
ppc64.AFMOVS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
108+
ppc64.AFMOVSX & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
109+
ppc64.AFMOVSZ & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | gc.RightWrite | gc.Move | gc.Conv},
107110
ppc64.AFMOVD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Move},
108111

109112
// Jumps

src/cmd/compile/internal/ppc64/ssa.go

Lines changed: 128 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ var ssaRegToReg = []int16{
7373
ppc64.REG_F24,
7474
ppc64.REG_F25,
7575
ppc64.REG_F26,
76-
ppc64.REG_F27,
77-
ppc64.REG_F28,
78-
ppc64.REG_F29,
79-
ppc64.REG_F30,
80-
ppc64.REG_F31,
76+
// ppc64.REG_F27, // reserved for "floating conversion constant"
77+
// ppc64.REG_F28, // 0.0
78+
// ppc64.REG_F29, // 0.5
79+
// ppc64.REG_F30, // 1.0
80+
// ppc64.REG_F31, // 2.0
8181

8282
// ppc64.REG_CR0,
8383
// ppc64.REG_CR1,
@@ -88,38 +88,24 @@ var ssaRegToReg = []int16{
8888
// ppc64.REG_CR6,
8989
// ppc64.REG_CR7,
9090

91-
ppc64.REG_CR,
91+
// ppc64.REG_CR,
9292
// ppc64.REG_XER,
9393
// ppc64.REG_LR,
9494
// ppc64.REG_CTR,
9595
}
9696

97-
// Associated condition bit
98-
var condBits = map[ssa.Op]uint8{
99-
ssa.OpPPC64Equal: ppc64.C_COND_EQ,
100-
ssa.OpPPC64NotEqual: ppc64.C_COND_EQ,
101-
ssa.OpPPC64LessThan: ppc64.C_COND_LT,
102-
ssa.OpPPC64GreaterEqual: ppc64.C_COND_LT,
103-
ssa.OpPPC64GreaterThan: ppc64.C_COND_GT,
104-
ssa.OpPPC64LessEqual: ppc64.C_COND_GT,
105-
}
10697
var condOps = map[ssa.Op]obj.As{
10798
ssa.OpPPC64Equal: ppc64.ABEQ,
10899
ssa.OpPPC64NotEqual: ppc64.ABNE,
109100
ssa.OpPPC64LessThan: ppc64.ABLT,
110101
ssa.OpPPC64GreaterEqual: ppc64.ABGE,
111102
ssa.OpPPC64GreaterThan: ppc64.ABGT,
112103
ssa.OpPPC64LessEqual: ppc64.ABLE,
113-
}
114104

115-
// Is the condition bit set? 1=yes 0=no
116-
var condBitSet = map[ssa.Op]uint8{
117-
ssa.OpPPC64Equal: 1,
118-
ssa.OpPPC64NotEqual: 0,
119-
ssa.OpPPC64LessThan: 1,
120-
ssa.OpPPC64GreaterEqual: 0,
121-
ssa.OpPPC64GreaterThan: 1,
122-
ssa.OpPPC64LessEqual: 0,
105+
ssa.OpPPC64FLessThan: ppc64.ABLT, // 1 branch for FCMP
106+
ssa.OpPPC64FGreaterThan: ppc64.ABGT, // 1 branch for FCMP
107+
ssa.OpPPC64FLessEqual: ppc64.ABLT, // 2 branches for FCMP <=, second is BEQ
108+
ssa.OpPPC64FGreaterEqual: ppc64.ABGT, // 2 branches for FCMP >=, second is BEQ
123109
}
124110

125111
// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
@@ -205,38 +191,83 @@ func storeByType(t ssa.Type) obj.As {
205191
panic("bad store type")
206192
}
207193

194+
// scratchFpMem initializes an Addr (field of a Prog)
195+
// to reference the scratchpad memory for movement between
196+
// F and G registers for FP conversions.
197+
func scratchFpMem(s *gc.SSAGenState, a *obj.Addr) {
198+
a.Type = obj.TYPE_MEM
199+
a.Name = obj.NAME_AUTO
200+
a.Node = s.ScratchFpMem
201+
a.Sym = gc.Linksym(s.ScratchFpMem.Sym)
202+
a.Reg = ppc64.REGSP
203+
}
204+
208205
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
209206
s.SetLineno(v.Line)
210207
switch v.Op {
211208
case ssa.OpInitMem:
212209
// memory arg needs no code
213210
case ssa.OpArg:
214211
// input args need no code
215-
case ssa.OpSP, ssa.OpSB:
212+
case ssa.OpSP, ssa.OpSB, ssa.OpGetG:
216213
// nothing to do
217214

218215
case ssa.OpCopy, ssa.OpPPC64MOVDconvert:
219-
// TODO: copy of floats
220-
if v.Type.IsMemory() {
216+
t := v.Type
217+
if t.IsMemory() {
221218
return
222219
}
223220
x := gc.SSARegNum(v.Args[0])
224221
y := gc.SSARegNum(v)
225222
if x != y {
226-
p := gc.Prog(ppc64.AMOVD)
223+
rt := obj.TYPE_REG
224+
op := ppc64.AMOVD
225+
226+
if t.IsFloat() {
227+
op = ppc64.AFMOVD
228+
}
229+
p := gc.Prog(op)
230+
p.From.Type = rt
231+
p.From.Reg = x
232+
p.To.Type = rt
233+
p.To.Reg = y
234+
}
235+
236+
case ssa.OpPPC64Xf2i64:
237+
{
238+
x := gc.SSARegNum(v.Args[0])
239+
y := gc.SSARegNum(v)
240+
p := gc.Prog(ppc64.AFMOVD)
227241
p.From.Type = obj.TYPE_REG
228242
p.From.Reg = x
243+
scratchFpMem(s, &p.To)
244+
p = gc.Prog(ppc64.AMOVD)
245+
p.To.Type = obj.TYPE_REG
229246
p.To.Reg = y
247+
scratchFpMem(s, &p.From)
248+
}
249+
case ssa.OpPPC64Xi2f64:
250+
{
251+
x := gc.SSARegNum(v.Args[0])
252+
y := gc.SSARegNum(v)
253+
p := gc.Prog(ppc64.AMOVD)
254+
p.From.Type = obj.TYPE_REG
255+
p.From.Reg = x
256+
scratchFpMem(s, &p.To)
257+
p = gc.Prog(ppc64.AFMOVD)
230258
p.To.Type = obj.TYPE_REG
259+
p.To.Reg = y
260+
scratchFpMem(s, &p.From)
231261
}
232262

233263
case ssa.OpPPC64LoweredGetClosurePtr:
234264
// Closure pointer is R11 (already)
235265
gc.CheckLoweredGetClosurePtr(v)
236266

237267
case ssa.OpLoadReg:
238-
p := gc.Prog(loadByType(v.Type))
268+
loadOp := loadByType(v.Type)
239269
n, off := gc.AutoVar(v.Args[0])
270+
p := gc.Prog(loadOp)
240271
p.From.Type = obj.TYPE_MEM
241272
p.From.Node = n
242273
p.From.Sym = gc.Linksym(n.Sym)
@@ -251,10 +282,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
251282
p.To.Reg = gc.SSARegNum(v)
252283

253284
case ssa.OpStoreReg:
254-
p := gc.Prog(storeByType(v.Type))
285+
storeOp := storeByType(v.Type)
286+
n, off := gc.AutoVar(v)
287+
p := gc.Prog(storeOp)
255288
p.From.Type = obj.TYPE_REG
256289
p.From.Reg = gc.SSARegNum(v.Args[0])
257-
n, off := gc.AutoVar(v)
258290
p.To.Type = obj.TYPE_MEM
259291
p.To.Node = n
260292
p.To.Sym = gc.Linksym(n.Sym)
@@ -376,7 +408,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
376408
p.To.Type = obj.TYPE_REG
377409
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
378410

379-
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG:
411+
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP:
380412
r := gc.SSARegNum(v)
381413
p := gc.Prog(v.Op.Asm())
382414
p.To.Type = obj.TYPE_REG
@@ -510,8 +542,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
510542
case ssa.OpPPC64Equal,
511543
ssa.OpPPC64NotEqual,
512544
ssa.OpPPC64LessThan,
545+
ssa.OpPPC64FLessThan,
513546
ssa.OpPPC64LessEqual,
514547
ssa.OpPPC64GreaterThan,
548+
ssa.OpPPC64FGreaterThan,
515549
ssa.OpPPC64GreaterEqual:
516550
// On Power7 or later, can use isel instruction:
517551
// for a < b, a > b, a = b:
@@ -549,6 +583,30 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
549583
p = gc.Prog(obj.ANOP)
550584
gc.Patch(pb, p)
551585

586+
case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion
587+
ssa.OpPPC64FGreaterEqual:
588+
589+
p := gc.Prog(ppc64.AMOVW)
590+
p.From.Type = obj.TYPE_CONST
591+
p.From.Offset = 1
592+
p.To.Type = obj.TYPE_REG
593+
p.To.Reg = gc.SSARegNum(v)
594+
595+
pb0 := gc.Prog(condOps[v.Op])
596+
pb0.To.Type = obj.TYPE_BRANCH
597+
pb1 := gc.Prog(ppc64.ABEQ)
598+
pb1.To.Type = obj.TYPE_BRANCH
599+
600+
p = gc.Prog(ppc64.AMOVW)
601+
p.From.Type = obj.TYPE_CONST
602+
p.From.Offset = 0
603+
p.To.Type = obj.TYPE_REG
604+
p.To.Reg = gc.SSARegNum(v)
605+
606+
p = gc.Prog(obj.ANOP)
607+
gc.Patch(pb0, p)
608+
gc.Patch(pb1, p)
609+
552610
case ssa.OpPPC64LoweredZero:
553611
// Similar to how this is done on ARM,
554612
// except that PPC MOVDU x,off(y) is *(y+off) = x; y=y+off
@@ -843,20 +901,22 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
843901
}
844902

845903
var blockJump = [...]struct {
846-
asm, invasm obj.As
904+
asm, invasm obj.As
905+
asmeq, invasmeq bool
847906
}{
848-
ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE},
849-
ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ},
850-
851-
ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE},
852-
ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT},
853-
ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT},
854-
ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE},
855-
856-
ssa.BlockPPC64ULT: {ppc64.ABLT, ppc64.ABGE},
857-
ssa.BlockPPC64UGE: {ppc64.ABGE, ppc64.ABLT},
858-
ssa.BlockPPC64ULE: {ppc64.ABLE, ppc64.ABGT},
859-
ssa.BlockPPC64UGT: {ppc64.ABGT, ppc64.ABLE},
907+
ssa.BlockPPC64EQ: {ppc64.ABEQ, ppc64.ABNE, false, false},
908+
ssa.BlockPPC64NE: {ppc64.ABNE, ppc64.ABEQ, false, false},
909+
910+
ssa.BlockPPC64LT: {ppc64.ABLT, ppc64.ABGE, false, false},
911+
ssa.BlockPPC64GE: {ppc64.ABGE, ppc64.ABLT, false, false},
912+
ssa.BlockPPC64LE: {ppc64.ABLE, ppc64.ABGT, false, false},
913+
ssa.BlockPPC64GT: {ppc64.ABGT, ppc64.ABLE, false, false},
914+
915+
// TODO: need to work FP comparisons into block jumps
916+
ssa.BlockPPC64FLT: {ppc64.ABLT, ppc64.ABGT, false, true},
917+
ssa.BlockPPC64FGE: {ppc64.ABGT, ppc64.ABLT, true, false},
918+
ssa.BlockPPC64FLE: {ppc64.ABLT, ppc64.ABGT, true, false},
919+
ssa.BlockPPC64FGT: {ppc64.ABGT, ppc64.ABLT, false, true},
860920
}
861921

862922
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
@@ -893,12 +953,17 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
893953
gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here
894954
case ssa.BlockRet:
895955
gc.Prog(obj.ARET)
956+
case ssa.BlockRetJmp:
957+
p := gc.Prog(obj.AJMP)
958+
p.To.Type = obj.TYPE_MEM
959+
p.To.Name = obj.NAME_EXTERN
960+
p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym))
896961

897962
case ssa.BlockPPC64EQ, ssa.BlockPPC64NE,
898963
ssa.BlockPPC64LT, ssa.BlockPPC64GE,
899964
ssa.BlockPPC64LE, ssa.BlockPPC64GT,
900-
ssa.BlockPPC64ULT, ssa.BlockPPC64UGT,
901-
ssa.BlockPPC64ULE, ssa.BlockPPC64UGE:
965+
ssa.BlockPPC64FLT, ssa.BlockPPC64FGE,
966+
ssa.BlockPPC64FLE, ssa.BlockPPC64FGT:
902967
jmp := blockJump[b.Kind]
903968
likely := b.Likely
904969
var p *obj.Prog
@@ -908,14 +973,30 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
908973
likely *= -1
909974
p.To.Type = obj.TYPE_BRANCH
910975
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
976+
if jmp.invasmeq {
977+
// TODO: The second branch is probably predict-not-taken since it is for FP equality
978+
q := gc.Prog(ppc64.ABEQ)
979+
q.To.Type = obj.TYPE_BRANCH
980+
s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
981+
}
911982
case b.Succs[1].Block():
912983
p = gc.Prog(jmp.asm)
913984
p.To.Type = obj.TYPE_BRANCH
914985
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
986+
if jmp.asmeq {
987+
q := gc.Prog(ppc64.ABEQ)
988+
q.To.Type = obj.TYPE_BRANCH
989+
s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[0].Block()})
990+
}
915991
default:
916992
p = gc.Prog(jmp.asm)
917993
p.To.Type = obj.TYPE_BRANCH
918994
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
995+
if jmp.asmeq {
996+
q := gc.Prog(ppc64.ABEQ)
997+
q.To.Type = obj.TYPE_BRANCH
998+
s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[0].Block()})
999+
}
9191000
q := gc.Prog(obj.AJMP)
9201001
q.To.Type = obj.TYPE_BRANCH
9211002
s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})

src/cmd/compile/internal/ssa/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type Config struct {
3131
noDuffDevice bool // Don't use Duff's device
3232
nacl bool // GOOS=nacl
3333
use387 bool // GO386=387
34+
NeedsFpScratch bool // No direct move between GP and FP register sets
3435
sparsePhiCutoff uint64 // Sparse phi location algorithm used above this #blocks*#variables score
3536
curFunc *Func
3637

@@ -190,6 +191,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
190191
c.fpRegMask = fpRegMaskPPC64
191192
c.FPReg = framepointerRegPPC64
192193
c.noDuffDevice = true // TODO: Resolve PPC64 DuffDevice (has zero, but not copy)
194+
c.NeedsFpScratch = true
193195
c.hasGReg = true
194196
default:
195197
fe.Unimplementedf(0, "arch %s not implemented", arch)
@@ -245,6 +247,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
245247
}
246248

247249
func (c *Config) Set387(b bool) {
250+
c.NeedsFpScratch = b
248251
c.use387 = b
249252
}
250253

0 commit comments

Comments
 (0)