Skip to content

Commit d7ab277

Browse files
lijunchencherrymui
authored andcommitted
cmd/asm: add more SIMD instructions on arm64
This CL adds USHLL, USHLL2, UZP1, UZP2, and BIF instructions requested by #40725. And since UXTL* are aliases of USHLL*, this CL also merges them into one case. Updates #40725 Change-Id: I404a4fdaf953319f72eea548175bec1097a2a816 Reviewed-on: https://go-review.googlesource.com/c/go/+/253659 Reviewed-by: Cherry Zhang <[email protected]> Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 9b2df72 commit d7ab277

File tree

5 files changed

+100
-55
lines changed

5 files changed

+100
-55
lines changed

src/cmd/asm/internal/asm/testdata/arm64.s

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,26 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
156156
VCMTST V2.B8, V29.B8, V2.B8 // a28f220e
157157
VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e
158158
VSUB V2.B8, V30.B8, V30.B8 // de87222e
159+
VUZP1 V0.B8, V30.B8, V1.B8 // c11b000e
160+
VUZP1 V1.B16, V29.B16, V2.B16 // a21b014e
161+
VUZP1 V2.H4, V28.H4, V3.H4 // 831b420e
162+
VUZP1 V3.H8, V27.H8, V4.H8 // 641b434e
163+
VUZP1 V28.S2, V2.S2, V5.S2 // 45189c0e
164+
VUZP1 V29.S4, V1.S4, V6.S4 // 26189d4e
165+
VUZP1 V30.D2, V0.D2, V7.D2 // 0718de4e
166+
VUZP2 V0.D2, V30.D2, V1.D2 // c15bc04e
167+
VUZP2 V30.D2, V0.D2, V29.D2 // 1d58de4e
168+
VUSHLL $0, V30.B8, V30.H8 // dea7082f
169+
VUSHLL $0, V30.H4, V29.S4 // dda7102f
170+
VUSHLL $0, V29.S2, V2.D2 // a2a7202f
171+
VUSHLL2 $0, V30.B16, V2.H8 // c2a7086f
172+
VUSHLL2 $0, V30.H8, V30.S4 // dea7106f
173+
VUSHLL2 $0, V29.S4, V2.D2 // a2a7206f
174+
VUSHLL $7, V30.B8, V30.H8 // dea70f2f
175+
VUSHLL $15, V30.H4, V29.S4 // dda71f2f
176+
VUSHLL2 $31, V30.S4, V2.D2 // c2a73f6f
177+
VBIF V0.B8, V30.B8, V1.B8 // c11fe02e
178+
VBIF V30.B16, V0.B16, V2.B16 // 021cfe6e
159179
MOVD (R2)(R6.SXTW), R4 // 44c866f8
160180
MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8
161181
MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8

src/cmd/asm/internal/asm/testdata/arm64error.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,4 +345,12 @@ TEXT errors(SB),$0
345345
VUXTL V30.D2, V30.H8 // ERROR "operand mismatch"
346346
VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch"
347347
VUXTL V3.D2, V4.B8 // ERROR "operand mismatch"
348+
VUZP1 V0.B8, V30.B8, V1.B16 // ERROR "operand mismatch"
349+
VUZP2 V0.Q1, V30.Q1, V1.Q1 // ERROR "invalid arrangement"
350+
VUSHLL $0, V30.D2, V30.H8 // ERROR "operand mismatch"
351+
VUSHLL2 $0, V20.B8, V21.H8 // ERROR "operand mismatch"
352+
VUSHLL $8, V30.B8, V30.H8 // ERROR "shift amount out of range"
353+
VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range"
354+
VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch"
355+
VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement"
348356
RET

src/cmd/internal/obj/arm64/a.out.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,7 @@ const (
954954
AVADD
955955
AVADDP
956956
AVAND
957+
AVBIF
957958
AVCMEQ
958959
AVCNT
959960
AVEOR
@@ -986,6 +987,12 @@ const (
986987
AVEXT
987988
AVRBIT
988989
AVUSHR
990+
AVUSHLL
991+
AVUSHLL2
992+
AVUXTL
993+
AVUXTL2
994+
AVUZP1
995+
AVUZP2
989996
AVSHL
990997
AVSRI
991998
AVBSL
@@ -994,8 +1001,6 @@ const (
9941001
AVZIP1
9951002
AVZIP2
9961003
AVCMTST
997-
AVUXTL
998-
AVUXTL2
9991004
ALAST
10001005
AB = obj.AJMP
10011006
ABL = obj.ACALL

src/cmd/internal/obj/arm64/anames.go

Lines changed: 7 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/arm64/asm7.go

Lines changed: 58 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ var optab = []Optab{
480480
{AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0},
481481
{AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0},
482482
{AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0},
483+
{AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0},
483484
{AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0},
484485

485486
/* conditional operations */
@@ -2751,6 +2752,9 @@ func buildop(ctxt *obj.Link) {
27512752
oprangeset(AVBSL, t)
27522753
oprangeset(AVBIT, t)
27532754
oprangeset(AVCMTST, t)
2755+
oprangeset(AVUZP1, t)
2756+
oprangeset(AVUZP2, t)
2757+
oprangeset(AVBIF, t)
27542758

27552759
case AVADD:
27562760
oprangeset(AVSUB, t)
@@ -2801,6 +2805,9 @@ func buildop(ctxt *obj.Link) {
28012805
case AVUXTL:
28022806
oprangeset(AVUXTL2, t)
28032807

2808+
case AVUSHLL:
2809+
oprangeset(AVUSHLL2, t)
2810+
28042811
case AVLD1R:
28052812
oprangeset(AVLD2, t)
28062813
oprangeset(AVLD2R, t)
@@ -4177,7 +4184,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
41774184
rel.Add = 0
41784185
rel.Type = objabi.R_ARM64_GOTPCREL
41794186

4180-
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm.<T>, Vn.<T>, Vd.<T> */
4187+
case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm.<T>, Vn.<T>, Vd.<T> */
41814188
af := int((p.From.Reg >> 5) & 15)
41824189
af3 := int((p.Reg >> 5) & 15)
41834190
at := int((p.To.Reg >> 5) & 15)
@@ -4219,7 +4226,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
42194226
}
42204227

42214228
switch p.As {
4222-
case AVORR, AVAND, AVEOR, AVBIT, AVBSL:
4229+
case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF:
42234230
if af != ARNG_16B && af != ARNG_8B {
42244231
c.ctxt.Diag("invalid arrangement: %v", p)
42254232
}
@@ -4233,7 +4240,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
42334240
size = 0
42344241
case AVBSL:
42354242
size = 1
4236-
case AVORR, AVBIT:
4243+
case AVORR, AVBIT, AVBIF:
42374244
size = 2
42384245
case AVFMLA, AVFMLS:
42394246
if af == ARNG_2D {
@@ -5120,56 +5127,44 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
51205127
case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool.
51215128
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
51225129

5123-
case 102: // VUXTL{2} Vn.<Tb>, Vd.<Ta>
5124-
af := int((p.From.Reg >> 5) & 15)
5125-
at := int((p.To.Reg >> 5) & 15)
5126-
var Q, immh uint32
5127-
switch at {
5128-
case ARNG_8H:
5129-
if af == ARNG_8B {
5130-
immh = 1
5131-
Q = 0
5132-
} else if af == ARNG_16B {
5133-
immh = 1
5134-
Q = 1
5135-
} else {
5136-
c.ctxt.Diag("operand mismatch: %v\n", p)
5137-
}
5138-
case ARNG_4S:
5139-
if af == ARNG_4H {
5140-
immh = 2
5141-
Q = 0
5142-
} else if af == ARNG_8H {
5143-
immh = 2
5144-
Q = 1
5145-
} else {
5146-
c.ctxt.Diag("operand mismatch: %v\n", p)
5147-
}
5148-
case ARNG_2D:
5149-
if af == ARNG_2S {
5150-
immh = 4
5151-
Q = 0
5152-
} else if af == ARNG_4S {
5153-
immh = 4
5154-
Q = 1
5155-
} else {
5156-
c.ctxt.Diag("operand mismatch: %v\n", p)
5157-
}
5130+
case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
5131+
o1 = c.opirr(p, p.As)
5132+
rf := p.Reg
5133+
af := uint8((p.Reg >> 5) & 15)
5134+
at := uint8((p.To.Reg >> 5) & 15)
5135+
shift := int(p.From.Offset)
5136+
if p.As == AVUXTL || p.As == AVUXTL2 {
5137+
rf = p.From.Reg
5138+
af = uint8((p.From.Reg >> 5) & 15)
5139+
shift = 0
5140+
}
5141+
5142+
pack := func(q, x, y uint8) uint32 {
5143+
return uint32(q)<<16 | uint32(x)<<8 | uint32(y)
5144+
}
5145+
5146+
var Q uint8 = uint8(o1>>30) & 1
5147+
var immh, width uint8
5148+
switch pack(Q, af, at) {
5149+
case pack(0, ARNG_8B, ARNG_8H):
5150+
immh, width = 1, 8
5151+
case pack(1, ARNG_16B, ARNG_8H):
5152+
immh, width = 1, 8
5153+
case pack(0, ARNG_4H, ARNG_4S):
5154+
immh, width = 2, 16
5155+
case pack(1, ARNG_8H, ARNG_4S):
5156+
immh, width = 2, 16
5157+
case pack(0, ARNG_2S, ARNG_2D):
5158+
immh, width = 4, 32
5159+
case pack(1, ARNG_4S, ARNG_2D):
5160+
immh, width = 4, 32
51585161
default:
51595162
c.ctxt.Diag("operand mismatch: %v\n", p)
51605163
}
5161-
5162-
if p.As == AVUXTL && Q == 1 {
5163-
c.ctxt.Diag("operand mismatch: %v\n", p)
5164+
if !(0 <= shift && shift <= int(width-1)) {
5165+
c.ctxt.Diag("shift amount out of range: %v\n", p)
51645166
}
5165-
if p.As == AVUXTL2 && Q == 0 {
5166-
c.ctxt.Diag("operand mismatch: %v\n", p)
5167-
}
5168-
5169-
o1 = c.oprrr(p, p.As)
5170-
rf := int((p.From.Reg) & 31)
5171-
rt := int((p.To.Reg) & 31)
5172-
o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31)
5167+
o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31)
51735168
}
51745169
out[0] = o1
51755170
out[1] = o2
@@ -5802,6 +5797,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
58025797
case AVLD2R, AVLD4R:
58035798
return 0xD<<24 | 3<<21
58045799

5800+
case AVBIF:
5801+
return 1<<29 | 7<<25 | 7<<21 | 7<<10
5802+
58055803
case AVBIT:
58065804
return 1<<29 | 0x75<<21 | 7<<10
58075805

@@ -5811,8 +5809,11 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
58115809
case AVCMTST:
58125810
return 0xE<<24 | 1<<21 | 0x23<<10
58135811

5814-
case AVUXTL, AVUXTL2:
5815-
return 0x5e<<23 | 0x29<<10
5812+
case AVUZP1:
5813+
return 7<<25 | 3<<11
5814+
5815+
case AVUZP2:
5816+
return 7<<25 | 1<<14 | 3<<11
58165817
}
58175818

58185819
c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
@@ -6011,6 +6012,12 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
60116012

60126013
case AVSRI:
60136014
return 0x5E<<23 | 17<<10
6015+
6016+
case AVUSHLL, AVUXTL:
6017+
return 1<<29 | 15<<24 | 0x29<<10
6018+
6019+
case AVUSHLL2, AVUXTL2:
6020+
return 3<<29 | 15<<24 | 0x29<<10
60146021
}
60156022

60166023
c.ctxt.Diag("%v: bad irr %v", p, a)

0 commit comments

Comments
 (0)