Skip to content

Commit fa04d48

Browse files
committed
cmd/asm: fix the issue of moving 128-bit integers to vector registers on arm64
The CL 249758 added `FMOVQ $vcon, Vd` instruction and assembler used 128-bit simd literal-loading to load `$vcon` from pool into 128-bit vector register `Vd`. Because Go does not have 128-bit integers for now, the assembler will report an error of `immediate out of range` when assembleing `FMOVQ $0x123456789abcdef0123456789abcdef, V0` instruction. This patch lets 128-bit integers take two 64-bit operands, for the high and low parts separately and adds `VMOVQ $hi, $lo, Vd` instruction to move `$hi<<64+$lo' into 128-bit register `Vd`. In addition, this patch renames `FMOVQ/FMOVD/FMOVS` ops to 'VMOVQ/VMOVD/VMOVS' and uses them to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively Update the go doc. Fixes #40725 Change-Id: Ia3c83bb6463f104d2bee960905053a97299e0a3a Reviewed-on: https://go-review.googlesource.com/c/go/+/255900 Trust: fannie zhang <[email protected]> Reviewed-by: Cherry Zhang <[email protected]>
1 parent ea106cc commit fa04d48

File tree

7 files changed

+60
-41
lines changed

7 files changed

+60
-41
lines changed

src/cmd/asm/internal/arch/arm64.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,17 @@ func IsARM64STLXR(op obj.As) bool {
8282
return false
8383
}
8484

85+
// IsARM64TBL reports whether the op (as defined by an arm64.A*
86+
// constant) is one of the TBL-like instructions and one of its
87+
// inputs does not fit into prog.Reg, so require special handling.
88+
func IsARM64TBL(op obj.As) bool {
89+
switch op {
90+
case arm64.AVTBL, arm64.AVMOVQ:
91+
return true
92+
}
93+
return false
94+
}
95+
8596
// ARM64Suffix handles the special suffix for the ARM64.
8697
// It returns a boolean to indicate success; failure means
8798
// cond was unrecognized.
@@ -125,13 +136,6 @@ func arm64RegisterNumber(name string, n int16) (int16, bool) {
125136
return 0, false
126137
}
127138

128-
// IsARM64TBL reports whether the op (as defined by an arm64.A*
129-
// constant) is one of the table lookup instructions that require special
130-
// handling.
131-
func IsARM64TBL(op obj.As) bool {
132-
return op == arm64.AVTBL
133-
}
134-
135139
// ARM64RegisterExtension parses an ARM64 register with extension or arrangement.
136140
func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error {
137141
Rnum := (reg & 31) + int16(num<<5)

src/cmd/asm/internal/asm/asm.go

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -622,29 +622,26 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
622622
prog.SetFrom3(a[1])
623623
prog.To = a[2]
624624
case sys.ARM64:
625-
// ARM64 instructions with one input and two outputs.
626-
if arch.IsARM64STLXR(op) {
625+
switch {
626+
case arch.IsARM64STLXR(op):
627+
// ARM64 instructions with one input and two outputs.
627628
prog.From = a[0]
628629
prog.To = a[1]
629630
if a[2].Type != obj.TYPE_REG {
630631
p.errorf("invalid addressing modes for third operand to %s instruction, must be register", op)
631632
return
632633
}
633634
prog.RegTo2 = a[2].Reg
634-
break
635-
}
636-
if arch.IsARM64TBL(op) {
635+
case arch.IsARM64TBL(op):
636+
// one of its inputs does not fit into prog.Reg.
637637
prog.From = a[0]
638-
if a[1].Type != obj.TYPE_REGLIST {
639-
p.errorf("%s: expected list; found %s", op, obj.Dconv(prog, &a[1]))
640-
}
641638
prog.SetFrom3(a[1])
642639
prog.To = a[2]
643-
break
640+
default:
641+
prog.From = a[0]
642+
prog.Reg = p.getRegister(prog, op, &a[1])
643+
prog.To = a[2]
644644
}
645-
prog.From = a[0]
646-
prog.Reg = p.getRegister(prog, op, &a[1])
647-
prog.To = a[2]
648645
case sys.I386:
649646
prog.From = a[0]
650647
prog.SetFrom3(a[1])

src/cmd/asm/internal/asm/testdata/arm64.s

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
218218
FMOVD $(28.0), F4 // 0490671e
219219

220220
// move a large constant to a Vd.
221-
FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20
222-
FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29
221+
VMOVS $0x80402010, V11 // VMOVS $2151686160, V11
222+
VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20
223+
VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10
224+
VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20
223225

224226
FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
225227
FMOVS (R2)(R6<<2), F4 // 447866bc

src/cmd/internal/obj/arm64/a.out.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,9 @@ const (
875875
AFLDPS
876876
AFMOVD
877877
AFMOVS
878-
AFMOVQ
878+
AVMOVQ
879+
AVMOVD
880+
AVMOVS
879881
AFMULD
880882
AFMULS
881883
AFNEGD

src/cmd/internal/obj/arm64/anames.go

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/arm64/asm7.go

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,9 @@ func MOVCONST(d int64, s int, rt int) uint32 {
260260
const (
261261
// Optab.flag
262262
LFROM = 1 << 0 // p.From uses constant pool
263-
LTO = 1 << 1 // p.To uses constant pool
264-
NOTUSETMP = 1 << 2 // p expands to multiple instructions, but does NOT use REGTMP
263+
LFROM3 = 1 << 1 // p.From3 uses constant pool
264+
LTO = 1 << 2 // p.To uses constant pool
265+
NOTUSETMP = 1 << 3 // p expands to multiple instructions, but does NOT use REGTMP
265266
)
266267

267268
var optab = []Optab{
@@ -397,10 +398,10 @@ var optab = []Optab{
397398
/* load long effective stack address (load int32 offset and add) */
398399
{AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0},
399400

400-
// Move a large constant to a Vn.
401-
{AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
402-
{AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
403-
{AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
401+
// Move a large constant to a vector register.
402+
{AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM | LFROM3, 0},
403+
{AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
404+
{AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
404405

405406
/* jump operations */
406407
{AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
@@ -950,13 +951,14 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
950951
c.ctxt.Diag("zero-width instruction\n%v", p)
951952
}
952953
}
953-
switch o.flag & (LFROM | LTO) {
954-
case LFROM:
954+
if o.flag&LFROM != 0 {
955955
c.addpool(p, &p.From)
956-
957-
case LTO:
956+
}
957+
if o.flag&LFROM3 != 0 {
958+
c.addpool(p, p.GetFrom3())
959+
}
960+
if o.flag&LTO != 0 {
958961
c.addpool(p, &p.To)
959-
break
960962
}
961963

962964
if p.As == AB || p.As == obj.ARET || p.As == AERET { /* TODO: other unconditional operations */
@@ -1174,8 +1176,8 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) {
11741176
sz := 4
11751177

11761178
if a.Type == obj.TYPE_CONST {
1177-
if lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit)) {
1178-
// out of range -0x80000000 ~ 0xffffffff, must store 64-bit
1179+
if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD {
1180+
// out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit.
11791181
t.As = ADWORD
11801182
sz = 8
11811183
} // else store 32-bit
@@ -2675,7 +2677,7 @@ func buildop(ctxt *obj.Link) {
26752677
case AFCSELD:
26762678
oprangeset(AFCSELS, t)
26772679

2678-
case AFMOVS, AFMOVD, AFMOVQ:
2680+
case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS:
26792681
break
26802682

26812683
case AFCVTZSD:
@@ -5142,7 +5144,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
51425144
o1 = q<<30 | 0xe<<24 | len<<13
51435145
o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31)
51445146

5145-
case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool.
5147+
case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool.
51465148
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
51475149

51485150
case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
@@ -6672,15 +6674,15 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 {
66726674
} else {
66736675
fp, w := 0, 0
66746676
switch as {
6675-
case AFMOVS:
6677+
case AFMOVS, AVMOVS:
66766678
fp = 1
66776679
w = 0 /* 32-bit SIMD/FP */
66786680

6679-
case AFMOVD:
6681+
case AFMOVD, AVMOVD:
66806682
fp = 1
66816683
w = 1 /* 64-bit SIMD/FP */
66826684

6683-
case AFMOVQ:
6685+
case AVMOVQ:
66846686
fp = 1
66856687
w = 2 /* 128-bit SIMD/FP */
66866688

src/cmd/internal/obj/arm64/doc.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ In the following example, PCALIGN at the entry of the function Add will align it
8686
MOVD $1, R1
8787
RET
8888
89+
7. Move large constants to vector registers.
90+
91+
Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively.
92+
And for a 128-bit interger, it take two 64-bit operands, for the high and low parts separately.
93+
94+
Examples:
95+
VMOVS $0x11223344, V0
96+
VMOVD $0x1122334455667788, V1
97+
VMOVQ $0x1122334455667788, $8877665544332211, V2 // V2=0x11223344556677888877665544332211
98+
8999
Special Cases.
90100
91101
(1) umov is written as VMOV.

0 commit comments

Comments
 (0)