Skip to content

Commit 1d1f2fb

Browse files
TocarIPrsc
authored andcommitted
cmd/internal/obj/x86: add new instructions, cleanup.
Add several instructions that were used via BYTE and use them. Instructions added: PEXTRB, PEXTRD, PEXTRQ, PINSRB, XGETBV, POPCNT. Change-Id: I5a80cd390dc01f3555dbbe856a475f74b5e6df65 Reviewed-on: https://go-review.googlesource.com/18593 Run-TryBot: Ilya Tocar <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Russ Cox <[email protected]>
1 parent ceeb52d commit 1d1f2fb

File tree

7 files changed

+49
-18
lines changed

7 files changed

+49
-18
lines changed

src/cmd/internal/obj/x86/a.out.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ const (
181181
APAUSE
182182
APOPAL
183183
APOPAW
184+
APOPCNT
184185
APOPFL
185186
APOPFW
186187
APOPL
@@ -500,6 +501,7 @@ const (
500501
AXADDQ
501502
AXCHGQ
502503
AXORQ
504+
AXGETBV
503505

504506
// media
505507
AADDPD
@@ -614,6 +616,9 @@ const (
614616
APCMPGTL
615617
APCMPGTW
616618
APEXTRW
619+
APEXTRB
620+
APEXTRD
621+
APEXTRQ
617622
APFACC
618623
APFADD
619624
APFCMPEQ
@@ -632,6 +637,7 @@ const (
632637
APFSUB
633638
APFSUBR
634639
APINSRW
640+
APINSRB
635641
APINSRD
636642
APINSRQ
637643
APMADDWL

src/cmd/internal/obj/x86/anames.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ var Anames = []string{
149149
"PAUSE",
150150
"POPAL",
151151
"POPAW",
152+
"POPCNT",
152153
"POPFL",
153154
"POPFW",
154155
"POPL",
@@ -451,6 +452,7 @@ var Anames = []string{
451452
"XADDQ",
452453
"XCHGQ",
453454
"XORQ",
455+
"XGETBV",
454456
"ADDPD",
455457
"ADDPS",
456458
"ADDSD",
@@ -563,6 +565,9 @@ var Anames = []string{
563565
"PCMPGTL",
564566
"PCMPGTW",
565567
"PEXTRW",
568+
"PEXTRB",
569+
"PEXTRD",
570+
"PEXTRQ",
566571
"PFACC",
567572
"PFADD",
568573
"PFCMPEQ",
@@ -581,6 +586,7 @@ var Anames = []string{
581586
"PFSUB",
582587
"PFSUBR",
583588
"PINSRW",
589+
"PINSRB",
584590
"PINSRD",
585591
"PINSRQ",
586592
"PMADDWL",

src/cmd/internal/obj/x86/asm6.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ const (
187187
Zm_r_xm_nr
188188
Zr_m_xm_nr
189189
Zibm_r /* mmx1,mmx2/mem64,imm8 */
190+
Zibr_m
190191
Zmb_r
191192
Zaut_r
192193
Zo_m
@@ -219,6 +220,7 @@ const (
219220
Pf2 = 0xf2 /* xmm escape 1: f2 0f */
220221
Pf3 = 0xf3 /* xmm escape 2: f3 0f */
221222
Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
223+
Pfw = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
222224
Pvex1 = 0xc5 /* 66.0f escape, vex encoding */
223225
Pvex2 = 0xc6 /* f3.0f escape, vex encoding */
224226
Pvex3 = 0xc7 /* 66.0f38 escape, vex encoding */
@@ -720,6 +722,10 @@ var yextrw = []ytab{
720722
{Yu8, Yxr, Yrl, Zibm_r, 2},
721723
}
722724

725+
var yextr = []ytab{
726+
{Yu8, Yxr, Ymm, Zibr_m, 3},
727+
}
728+
723729
var yinsrw = []ytab{
724730
{Yu8, Yml, Yxr, Zibm_r, 2},
725731
}
@@ -1162,6 +1168,9 @@ var optab =
11621168
{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
11631169
{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
11641170
{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
1171+
{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
1172+
{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
1173+
{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
11651174
{APF2IL, ymfp, Px, [23]uint8{0x1d}},
11661175
{APF2IW, ymfp, Px, [23]uint8{0x1c}},
11671176
{API2FL, ymfp, Px, [23]uint8{0x0d}},
@@ -1183,6 +1192,7 @@ var optab =
11831192
{APFSUB, ymfp, Px, [23]uint8{0x9a}},
11841193
{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
11851194
{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
1195+
{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
11861196
{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
11871197
{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
11881198
{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
@@ -1198,6 +1208,7 @@ var optab =
11981208
{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
11991209
{APOPAL, ynone, P32, [23]uint8{0x61}},
12001210
{APOPAW, ynone, Pe, [23]uint8{0x61}},
1211+
{APOPCNT, yml_rl, Pfw, [23]uint8{0xb8}},
12011212
{APOPFL, ynone, P32, [23]uint8{0x9d}},
12021213
{APOPFQ, ynone, Py, [23]uint8{0x9d}},
12031214
{APOPFW, ynone, Pe, [23]uint8{0x9d}},
@@ -1533,6 +1544,7 @@ var optab =
15331544
{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
15341545
{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
15351546
{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
1547+
{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
15361548
{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
15371549
{obj.ATYPE, nil, 0, [23]uint8{}},
15381550
{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
@@ -3194,6 +3206,15 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
31943206
ctxt.Andptr[0] = Pm
31953207
ctxt.Andptr = ctxt.Andptr[1:]
31963208

3209+
case Pfw: /* first escape, Rex.w, and second escape */
3210+
ctxt.Andptr[0] = Pf3
3211+
ctxt.Andptr = ctxt.Andptr[1:]
3212+
3213+
ctxt.Andptr[0] = Pw
3214+
ctxt.Andptr = ctxt.Andptr[1:]
3215+
ctxt.Andptr[0] = Pm
3216+
ctxt.Andptr = ctxt.Andptr[1:]
3217+
31973218
case Pm: /* opcode escape */
31983219
ctxt.Andptr[0] = Pm
31993220
ctxt.Andptr = ctxt.Andptr[1:]
@@ -3343,7 +3364,7 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
33433364
ctxt.Andptr[0] = byte(op)
33443365
ctxt.Andptr = ctxt.Andptr[1:]
33453366

3346-
case Zibm_r:
3367+
case Zibm_r, Zibr_m:
33473368
for {
33483369
tmp1 := z
33493370
z++
@@ -3354,7 +3375,11 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
33543375
ctxt.Andptr[0] = byte(op)
33553376
ctxt.Andptr = ctxt.Andptr[1:]
33563377
}
3357-
asmand(ctxt, p, p.From3, &p.To)
3378+
if yt.zcase == Zibr_m {
3379+
asmand(ctxt, p, &p.To, p.From3)
3380+
} else {
3381+
asmand(ctxt, p, p.From3, &p.To)
3382+
}
33583383
ctxt.Andptr[0] = byte(p.From.Offset)
33593384
ctxt.Andptr = ctxt.Andptr[1:]
33603385

src/crypto/aes/asm_amd64.s

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,6 @@ Lexp_dec_loop:
217217
MOVUPS X0, 16(DX)
218218
RET
219219

220-
#define PSHUFD_X0_X0_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc0
221-
#define PSHUFD_X1_X1_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xc9
222220
TEXT _expand_key_128<>(SB),NOSPLIT,$0
223221
PSHUFD $0xff, X1, X1
224222
SHUFPS $0x10, X0, X4
@@ -230,8 +228,6 @@ TEXT _expand_key_128<>(SB),NOSPLIT,$0
230228
ADDQ $16, BX
231229
RET
232230

233-
#define PSLLDQ_X5_ BYTE $0x66; BYTE $0x0f; BYTE $0x73; BYTE $0xfd
234-
#define PSHUFD_X0_X3_ BYTE $0x66; BYTE $0x0f; BYTE $0x70; BYTE $0xd8
235231
TEXT _expand_key_192a<>(SB),NOSPLIT,$0
236232
PSHUFD $0x55, X1, X1
237233
SHUFPS $0x10, X0, X4
@@ -242,7 +238,7 @@ TEXT _expand_key_192a<>(SB),NOSPLIT,$0
242238

243239
MOVAPS X2, X5
244240
MOVAPS X2, X6
245-
PSLLDQ_X5_; BYTE $0x4
241+
PSLLDQ $0x4, X5
246242
PSHUFD $0xff, X0, X3
247243
PXOR X3, X2
248244
PXOR X5, X2
@@ -264,7 +260,7 @@ TEXT _expand_key_192b<>(SB),NOSPLIT,$0
264260
PXOR X1, X0
265261

266262
MOVAPS X2, X5
267-
PSLLDQ_X5_; BYTE $0x4
263+
PSLLDQ $0x4, X5
268264
PSHUFD $0xff, X0, X3
269265
PXOR X3, X2
270266
PXOR X5, X2

src/crypto/aes/gcm_amd64.s

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ TEXT ·gcmAesData(SB),NOSPLIT,$0
345345
PXOR B0, B0
346346
MOVQ (aut), B0
347347
PINSRD $2, 8(aut), B0
348-
BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x20; BYTE $0x46; BYTE $0x0c; BYTE $0x0c //PINSRB $12, 12(aut), B0
348+
PINSRB $12, 12(aut), B0
349349
XORQ autLen, autLen
350350
JMP dataMul
351351

@@ -404,7 +404,7 @@ dataEnd:
404404
dataLoadLoop:
405405

406406
PSLLDQ $1, B0
407-
BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x20; BYTE $0x06; BYTE $0x00 //PINSRB $0, (aut), B0
407+
PINSRB $0, (aut), B0
408408

409409
LEAQ -1(aut), aut
410410
DECQ autLen
@@ -892,7 +892,7 @@ encLast4:
892892
PXOR B0, B0
893893
ptxLoadLoop:
894894
PSLLDQ $1, B0
895-
BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x20; BYTE $0x06; BYTE $0x00 //PINSRB $0, (ptx), B0
895+
PINSRB $0, (ptx), B0
896896
LEAQ -1(ptx), ptx
897897
DECQ ptxLen
898898
JNE ptxLoadLoop
@@ -1264,7 +1264,7 @@ decLast3:
12641264
PXOR T1, B0
12651265

12661266
ptxStoreLoop:
1267-
BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x14; BYTE $0x06; BYTE $0x00 // PEXTRB $0, B0, (ptx)
1267+
PEXTRB $0, B0, (ptx)
12681268
PSRLDQ $1, B0
12691269
LEAQ 1(ptx), ptx
12701270
DECQ ptxLen

src/hash/crc32/crc32_amd64.s

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,7 @@ finish:
225225
PCLMULQDQ $0, X0, X1
226226
PXOR X2, X1
227227

228-
/* PEXTRD $1, X1, AX (SSE 4.1) */
229-
BYTE $0x66; BYTE $0x0f; BYTE $0x3a;
230-
BYTE $0x16; BYTE $0xc8; BYTE $0x01;
228+
PEXTRD $1, X1, AX
231229
MOVL AX, ret+32(FP)
232230

233231
RET

src/runtime/asm_amd64.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ notintel:
5656
JNE noavx
5757
MOVL $0, CX
5858
// For XGETBV, OSXSAVE bit is required and sufficient
59-
BYTE $0x0F; BYTE $0x01; BYTE $0xD0
59+
XGETBV
6060
ANDL $6, AX
6161
CMPL AX, $6 // Check for OS support of YMM registers
6262
JNE noavx
@@ -822,10 +822,10 @@ TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
822822
TEXT runtime·cputicks(SB),NOSPLIT,$0-0
823823
CMPB runtime·lfenceBeforeRdtsc(SB), $1
824824
JNE mfence
825-
BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
825+
LFENCE
826826
JMP done
827827
mfence:
828-
BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
828+
MFENCE
829829
done:
830830
RDTSC
831831
SHLQ $32, DX

0 commit comments

Comments
 (0)