Skip to content

Commit dfdc388

Browse files
committed
cmd/internal/obj/arm64: enable some SIMD instructions
Enable VBSL, VBIT, VCMTST, VUXTL VUXTL2 and FMOVQ SIMD instructions required by the issue #40725. And FMOVQ instrucion is used to move a large constant to a Vn register. Add test cases. Fixes #40725 Change-Id: I1cac1922a0a0165d698a4b73a41f7a5f0a0ad549 Reviewed-on: https://go-review.googlesource.com/c/go/+/249758 Reviewed-by: Cherry Zhang <[email protected]>
1 parent aa476ba commit dfdc388

File tree

5 files changed

+139
-14
lines changed

5 files changed

+139
-14
lines changed

src/cmd/asm/internal/asm/testdata/arm64.s

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,17 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
145145
VZIP2 V10.D2, V13.D2, V3.D2 // a379ca4e
146146
VZIP1 V17.S2, V4.S2, V26.S2 // 9a38910e
147147
VZIP2 V25.S2, V14.S2, V25.S2 // d979990e
148+
VUXTL V30.B8, V30.H8 // dea7082f
149+
VUXTL V30.H4, V29.S4 // dda7102f
150+
VUXTL V29.S2, V2.D2 // a2a7202f
151+
VUXTL2 V30.H8, V30.S4 // dea7106f
152+
VUXTL2 V29.S4, V2.D2 // a2a7206f
153+
VUXTL2 V30.B16, V2.H8 // c2a7086f
154+
VBIT V21.B16, V25.B16, V4.B16 // 241fb56e
155+
VBSL V23.B16, V3.B16, V7.B16 // 671c776e
156+
VCMTST V2.B8, V29.B8, V2.B8 // a28f220e
157+
VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e
158+
VSUB V2.B8, V30.B8, V30.B8 // de87222e
148159
MOVD (R2)(R6.SXTW), R4 // 44c866f8
149160
MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8
150161
MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8
@@ -186,6 +197,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
186197
FMOVS $(0.96875), F3 // 03f02d1e
187198
FMOVD $(28.0), F4 // 0490671e
188199

200+
// move a large constant to a Vd.
201+
FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20
202+
FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29
203+
189204
FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
190205
FMOVS (R2)(R6<<2), F4 // 447866bc
191206
FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc

src/cmd/asm/internal/asm/testdata/arm64error.s

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,4 +340,9 @@ TEXT errors(SB),$0
340340
MRS PMSWINC_EL0, R3 // ERROR "system register is not readable"
341341
MRS OSLAR_EL1, R3 // ERROR "system register is not readable"
342342
VLD3R.P 24(R15), [V15.H4,V16.H4,V17.H4] // ERROR "invalid post-increment offset"
343+
VBIT V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement"
344+
VBSL V1.D2, V12.D2, V3.D2 // ERROR "invalid arrangement"
345+
VUXTL V30.D2, V30.H8 // ERROR "operand mismatch"
346+
VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch"
347+
VUXTL V3.D2, V4.B8 // ERROR "operand mismatch"
343348
RET

src/cmd/internal/obj/arm64/a.out.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,7 @@ const (
874874
AFLDPS
875875
AFMOVD
876876
AFMOVS
877+
AFMOVQ
877878
AFMULD
878879
AFMULS
879880
AFNEGD
@@ -987,9 +988,14 @@ const (
987988
AVUSHR
988989
AVSHL
989990
AVSRI
991+
AVBSL
992+
AVBIT
990993
AVTBL
991994
AVZIP1
992995
AVZIP2
996+
AVCMTST
997+
AVUXTL
998+
AVUXTL2
993999
ALAST
9941000
AB = obj.AJMP
9951001
ABL = obj.ACALL

src/cmd/internal/obj/arm64/anames.go

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/arm64/asm7.go

Lines changed: 107 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,11 @@ var optab = []Optab{
393393
{AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0},
394394
{AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0},
395395

396+
// Move a large constant to a Vn.
397+
{AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
398+
{AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
399+
{AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
400+
396401
/* jump operations */
397402
{AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
398403
{ABL, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
@@ -403,12 +408,14 @@ var optab = []Optab{
403408
{obj.ARET, C_NONE, C_NONE, C_NONE, C_REG, 6, 4, 0, 0, 0},
404409
{obj.ARET, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0},
405410
{ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 7, 4, 0, 0, 0},
406-
{AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0},
407-
{AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0},
408411
{ACBZ, C_REG, C_NONE, C_NONE, C_SBRA, 39, 4, 0, 0, 0},
409412
{ATBZ, C_VCON, C_REG, C_NONE, C_SBRA, 40, 4, 0, 0, 0},
410413
{AERET, C_NONE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0},
411414

415+
// get a PC-relative address
416+
{AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0},
417+
{AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0},
418+
412419
{ACLREX, C_NONE, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0},
413420
{ACLREX, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0},
414421
{ABFM, C_VCON, C_REG, C_VCON, C_REG, 42, 4, 0, 0, 0},
@@ -473,6 +480,7 @@ var optab = []Optab{
473480
{AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0},
474481
{AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0},
475482
{AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0},
483+
{AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0},
476484

477485
/* conditional operations */
478486
{ACSEL, C_COND, C_REG, C_REG, C_REG, 18, 4, 0, 0, 0},
@@ -2657,7 +2665,7 @@ func buildop(ctxt *obj.Link) {
26572665
case AFCSELD:
26582666
oprangeset(AFCSELS, t)
26592667

2660-
case AFMOVS, AFMOVD:
2668+
case AFMOVS, AFMOVD, AFMOVQ:
26612669
break
26622670

26632671
case AFCVTZSD:
@@ -2740,6 +2748,9 @@ func buildop(ctxt *obj.Link) {
27402748
oprangeset(AVCMEQ, t)
27412749
oprangeset(AVORR, t)
27422750
oprangeset(AVEOR, t)
2751+
oprangeset(AVBSL, t)
2752+
oprangeset(AVBIT, t)
2753+
oprangeset(AVCMTST, t)
27432754

27442755
case AVADD:
27452756
oprangeset(AVSUB, t)
@@ -2787,6 +2798,9 @@ func buildop(ctxt *obj.Link) {
27872798
case AVZIP1:
27882799
oprangeset(AVZIP2, t)
27892800

2801+
case AVUXTL:
2802+
oprangeset(AVUXTL2, t)
2803+
27902804
case AVLD1R:
27912805
oprangeset(AVLD2, t)
27922806
oprangeset(AVLD2R, t)
@@ -4163,7 +4177,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
41634177
rel.Add = 0
41644178
rel.Type = objabi.R_ARM64_GOTPCREL
41654179

4166-
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls Vm.<T>, Vn.<T>, Vd.<T> */
4180+
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm.<T>, Vn.<T>, Vd.<T> */
41674181
af := int((p.From.Reg >> 5) & 15)
41684182
af3 := int((p.Reg >> 5) & 15)
41694183
at := int((p.To.Reg >> 5) & 15)
@@ -4204,17 +4218,24 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
42044218
c.ctxt.Diag("invalid arrangement: %v", p)
42054219
}
42064220

4207-
if (p.As == AVORR || p.As == AVAND || p.As == AVEOR) &&
4208-
(af != ARNG_16B && af != ARNG_8B) {
4209-
c.ctxt.Diag("invalid arrangement: %v", p)
4210-
} else if (p.As == AVFMLA || p.As == AVFMLS) &&
4211-
(af != ARNG_2D && af != ARNG_2S && af != ARNG_4S) {
4212-
c.ctxt.Diag("invalid arrangement: %v", p)
4213-
} else if p.As == AVORR {
4214-
size = 2
4215-
} else if p.As == AVAND || p.As == AVEOR {
4221+
switch p.As {
4222+
case AVORR, AVAND, AVEOR, AVBIT, AVBSL:
4223+
if af != ARNG_16B && af != ARNG_8B {
4224+
c.ctxt.Diag("invalid arrangement: %v", p)
4225+
}
4226+
case AVFMLA, AVFMLS:
4227+
if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S {
4228+
c.ctxt.Diag("invalid arrangement: %v", p)
4229+
}
4230+
}
4231+
switch p.As {
4232+
case AVAND, AVEOR:
42164233
size = 0
4217-
} else if p.As == AVFMLA || p.As == AVFMLS {
4234+
case AVBSL:
4235+
size = 1
4236+
case AVORR, AVBIT:
4237+
size = 2
4238+
case AVFMLA, AVFMLS:
42184239
if af == ARNG_2D {
42194240
size = 1
42204241
} else {
@@ -5096,6 +5117,59 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
50965117
o1 = q<<30 | 0xe<<24 | len<<13
50975118
o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31)
50985119

5120+
case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool.
5121+
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
5122+
5123+
case 102: // VUXTL{2} Vn.<Tb>, Vd.<Ta>
5124+
af := int((p.From.Reg >> 5) & 15)
5125+
at := int((p.To.Reg >> 5) & 15)
5126+
var Q, immh uint32
5127+
switch at {
5128+
case ARNG_8H:
5129+
if af == ARNG_8B {
5130+
immh = 1
5131+
Q = 0
5132+
} else if af == ARNG_16B {
5133+
immh = 1
5134+
Q = 1
5135+
} else {
5136+
c.ctxt.Diag("operand mismatch: %v\n", p)
5137+
}
5138+
case ARNG_4S:
5139+
if af == ARNG_4H {
5140+
immh = 2
5141+
Q = 0
5142+
} else if af == ARNG_8H {
5143+
immh = 2
5144+
Q = 1
5145+
} else {
5146+
c.ctxt.Diag("operand mismatch: %v\n", p)
5147+
}
5148+
case ARNG_2D:
5149+
if af == ARNG_2S {
5150+
immh = 4
5151+
Q = 0
5152+
} else if af == ARNG_4S {
5153+
immh = 4
5154+
Q = 1
5155+
} else {
5156+
c.ctxt.Diag("operand mismatch: %v\n", p)
5157+
}
5158+
default:
5159+
c.ctxt.Diag("operand mismatch: %v\n", p)
5160+
}
5161+
5162+
if p.As == AVUXTL && Q == 1 {
5163+
c.ctxt.Diag("operand mismatch: %v\n", p)
5164+
}
5165+
if p.As == AVUXTL2 && Q == 0 {
5166+
c.ctxt.Diag("operand mismatch: %v\n", p)
5167+
}
5168+
5169+
o1 = c.oprrr(p, p.As)
5170+
rf := int((p.From.Reg) & 31)
5171+
rt := int((p.To.Reg) & 31)
5172+
o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31)
50995173
}
51005174
out[0] = o1
51015175
out[1] = o2
@@ -5662,6 +5736,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
56625736
case AVADD:
56635737
return 7<<25 | 1<<21 | 1<<15 | 1<<10
56645738

5739+
case AVSUB:
5740+
return 0x17<<25 | 1<<21 | 1<<15 | 1<<10
5741+
56655742
case AVADDP:
56665743
return 7<<25 | 1<<21 | 1<<15 | 15<<10
56675744

@@ -5724,6 +5801,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
57245801

57255802
case AVLD2R, AVLD4R:
57265803
return 0xD<<24 | 3<<21
5804+
5805+
case AVBIT:
5806+
return 1<<29 | 0x75<<21 | 7<<10
5807+
5808+
case AVBSL:
5809+
return 1<<29 | 0x73<<21 | 7<<10
5810+
5811+
case AVCMTST:
5812+
return 0xE<<24 | 1<<21 | 0x23<<10
5813+
5814+
case AVUXTL, AVUXTL2:
5815+
return 0x5e<<23 | 0x29<<10
57275816
}
57285817

57295818
c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
@@ -6566,6 +6655,10 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 {
65666655
fp = 1
65676656
w = 1 /* 64-bit SIMD/FP */
65686657

6658+
case AFMOVQ:
6659+
fp = 1
6660+
w = 2 /* 128-bit SIMD/FP */
6661+
65696662
case AMOVD:
65706663
if p.Pool.As == ADWORD {
65716664
w = 1 /* 64-bit */

0 commit comments

Comments
 (0)