Skip to content

Commit 0a17b2c

Browse files
committed
cmd/internal/obj/arm64: load large constants into vector registers from rodata
Load large constants into vector registers from rodata, instead of placing them in the literal pool. This treats VMOVQ/VMOVD/VMOVS the same as FMOVD/FMOVS and makes use of the existing mechanism for storing values in rodata. Two additional instructions are required for a load, however these instructions are used infrequently and already have a high latency. Updates #59615 Change-Id: I54226730267689963d73321e548733ae2d66740e Reviewed-on: https://go-review.googlesource.com/c/go/+/515617 Reviewed-by: Eric Fang <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> Run-TryBot: Joel Sing <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent a5ab4a9 commit 0a17b2c

File tree

3 files changed

+66
-47
lines changed

3 files changed

+66
-47
lines changed

src/cmd/internal/obj/arm64/asm7.go

+9-44
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,6 @@ func MOVCONST(d int64, s int, rt int) uint32 {
282282
const (
283283
// Optab.flag
284284
LFROM = 1 << iota // p.From uses constant pool
285-
LFROM128 // p.From3<<64+p.From forms a 128-bit constant in literal pool
286285
LTO // p.To uses constant pool
287286
NOTUSETMP // p expands to multiple instructions, but does NOT use REGTMP
288287
BRANCH14BITS // branch instruction encodes 14 bits
@@ -423,10 +422,10 @@ var optab = []Optab{
423422
/* load long effective stack address (load int32 offset and add) */
424423
{AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, C_NONE, 34, 8, REGSP, LFROM, 0},
425424

426-
// Move a large constant to a vector register.
427-
{AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, C_NONE, 101, 4, 0, LFROM128, 0},
428-
{AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, C_NONE, 101, 4, 0, LFROM, 0},
429-
{AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, C_NONE, 101, 4, 0, LFROM, 0},
425+
// Load a large constant into a vector register.
426+
{AVMOVS, C_ADDR, C_NONE, C_NONE, C_VREG, C_NONE, 65, 12, 0, 0, 0},
427+
{AVMOVD, C_ADDR, C_NONE, C_NONE, C_VREG, C_NONE, 65, 12, 0, 0, 0},
428+
{AVMOVQ, C_ADDR, C_NONE, C_NONE, C_VREG, C_NONE, 65, 12, 0, 0, 0},
430429

431430
/* jump operations */
432431
{AB, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0},
@@ -1117,9 +1116,6 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
11171116
if o.flag&LFROM != 0 {
11181117
c.addpool(p, &p.From)
11191118
}
1120-
if o.flag&LFROM128 != 0 {
1121-
c.addpool128(p, &p.From, p.GetFrom3())
1122-
}
11231119
if o.flag&LTO != 0 {
11241120
c.addpool(p, &p.To)
11251121
}
@@ -1321,34 +1317,6 @@ func (c *ctxt7) flushpool(p *obj.Prog) {
13211317
c.pool.start = 0
13221318
}
13231319

1324-
// addpool128 adds a 128-bit constant to literal pool by two consecutive DWORD
1325-
// instructions, the 128-bit constant is formed by ah.Offset<<64+al.Offset.
1326-
func (c *ctxt7) addpool128(p *obj.Prog, al, ah *obj.Addr) {
1327-
q := c.newprog()
1328-
q.As = ADWORD
1329-
q.To.Type = obj.TYPE_CONST
1330-
q.To.Offset = al.Offset // q.Pc is lower than t.Pc, so al.Offset is stored in q.
1331-
1332-
t := c.newprog()
1333-
t.As = ADWORD
1334-
t.To.Type = obj.TYPE_CONST
1335-
t.To.Offset = ah.Offset
1336-
1337-
q.Link = t
1338-
1339-
if c.blitrl == nil {
1340-
c.blitrl = q
1341-
c.pool.start = uint32(p.Pc)
1342-
} else {
1343-
c.elitrl.Link = q
1344-
}
1345-
1346-
c.elitrl = t
1347-
c.pool.size = roundUp(c.pool.size, 16)
1348-
c.pool.size += 16
1349-
p.Pool = q
1350-
}
1351-
13521320
/*
13531321
* MOVD foo(SB), R is actually
13541322
* MOVD addr, REGTMP
@@ -1365,8 +1333,8 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) {
13651333
sz := 4
13661334

13671335
if a.Type == obj.TYPE_CONST {
1368-
if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD {
1369-
// out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit.
1336+
if lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit)) {
1337+
// out of range -0x80000000 ~ 0xffffffff, must store 64-bit.
13701338
t.As = ADWORD
13711339
sz = 8
13721340
} // else store 32-bit
@@ -5660,9 +5628,6 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
56605628
o1 = q<<30 | 0xe<<24 | len<<13 | op<<12
56615629
o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31)
56625630

5663-
case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool.
5664-
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
5665-
56665631
case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
56675632
o1 = c.opirr(p, p.As)
56685633
rf := p.Reg
@@ -7187,13 +7152,13 @@ func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 {
71877152
case AMOVBU:
71887153
return LDSTR(0, 0, 1)
71897154

7190-
case AFMOVS:
7155+
case AFMOVS, AVMOVS:
71917156
return LDSTR(2, 1, 1)
71927157

7193-
case AFMOVD:
7158+
case AFMOVD, AVMOVD:
71947159
return LDSTR(3, 1, 1)
71957160

7196-
case AFMOVQ:
7161+
case AFMOVQ, AVMOVQ:
71977162
return LDSTR(0, 1, 3)
71987163
}
71997164

src/cmd/internal/obj/arm64/obj7.go

+26-3
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,33 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
329329
break
330330
}
331331

332-
// Rewrite float constants to values stored in memory.
332+
// Rewrite float and vector constants to values stored in memory.
333333
switch p.As {
334+
case AVMOVS:
335+
if p.From.Type == obj.TYPE_CONST {
336+
p.From.Type = obj.TYPE_MEM
337+
p.From.Sym = c.ctxt.Int32Sym(p.From.Offset)
338+
p.From.Name = obj.NAME_EXTERN
339+
p.From.Offset = 0
340+
}
341+
342+
case AVMOVD:
343+
if p.From.Type == obj.TYPE_CONST {
344+
p.From.Type = obj.TYPE_MEM
345+
p.From.Sym = c.ctxt.Int64Sym(p.From.Offset)
346+
p.From.Name = obj.NAME_EXTERN
347+
p.From.Offset = 0
348+
}
349+
350+
case AVMOVQ:
351+
if p.From.Type == obj.TYPE_CONST {
352+
p.From.Type = obj.TYPE_MEM
353+
p.From.Sym = c.ctxt.Int128Sym(p.GetFrom3().Offset, p.From.Offset)
354+
p.From.Name = obj.NAME_EXTERN
355+
p.From.Offset = 0
356+
p.RestArgs = nil
357+
}
358+
334359
case AFMOVS:
335360
if p.From.Type == obj.TYPE_FCONST {
336361
f64 := p.From.Val.(float64)
@@ -365,8 +390,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
365390
p.From.Name = obj.NAME_EXTERN
366391
p.From.Offset = 0
367392
}
368-
369-
break
370393
}
371394

372395
if c.ctxt.Flag_dynlink {

src/cmd/internal/obj/sym.go

+31
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
"cmd/internal/notsha256"
3737
"cmd/internal/objabi"
3838
"encoding/base64"
39+
"encoding/binary"
3940
"fmt"
4041
"internal/buildcfg"
4142
"log"
@@ -162,6 +163,18 @@ func (ctxt *Link) Float64Sym(f float64) *LSym {
162163
})
163164
}
164165

166+
func (ctxt *Link) Int32Sym(i int64) *LSym {
167+
name := fmt.Sprintf("$i32.%08x", uint64(i))
168+
return ctxt.LookupInit(name, func(s *LSym) {
169+
s.Size = 4
170+
s.WriteInt(ctxt, 0, 4, i)
171+
s.Type = objabi.SRODATA
172+
s.Set(AttrLocal, true)
173+
s.Set(AttrContentAddressable, true)
174+
ctxt.constSyms = append(ctxt.constSyms, s)
175+
})
176+
}
177+
165178
func (ctxt *Link) Int64Sym(i int64) *LSym {
166179
name := fmt.Sprintf("$i64.%016x", uint64(i))
167180
return ctxt.LookupInit(name, func(s *LSym) {
@@ -174,6 +187,24 @@ func (ctxt *Link) Int64Sym(i int64) *LSym {
174187
})
175188
}
176189

190+
func (ctxt *Link) Int128Sym(hi, lo int64) *LSym {
191+
name := fmt.Sprintf("$i128.%016x%016x", uint64(hi), uint64(lo))
192+
return ctxt.LookupInit(name, func(s *LSym) {
193+
s.Size = 16
194+
if ctxt.Arch.ByteOrder == binary.LittleEndian {
195+
s.WriteInt(ctxt, 0, 8, lo)
196+
s.WriteInt(ctxt, 8, 8, hi)
197+
} else {
198+
s.WriteInt(ctxt, 0, 8, hi)
199+
s.WriteInt(ctxt, 8, 8, lo)
200+
}
201+
s.Type = objabi.SRODATA
202+
s.Set(AttrLocal, true)
203+
s.Set(AttrContentAddressable, true)
204+
ctxt.constSyms = append(ctxt.constSyms, s)
205+
})
206+
}
207+
177208
// GCLocalsSym generates a content-addressable sym containing data.
178209
func (ctxt *Link) GCLocalsSym(data []byte) *LSym {
179210
sum := notsha256.Sum256(data)

0 commit comments

Comments
 (0)