@@ -181,6 +181,7 @@ const (
181
181
Zm_r
182
182
Zm2_r
183
183
Zm_r_xm
184
+ Zm_r_xm_vex
184
185
Zm_r_i_xm
185
186
Zm_r_3d
186
187
Zm_r_xm_nr
@@ -193,6 +194,7 @@ const (
193
194
Zpseudo
194
195
Zr_m
195
196
Zr_m_xm
197
+ Zr_m_xm_vex
196
198
Zrp_
197
199
Z_ib
198
200
Z_il
@@ -206,21 +208,23 @@ const (
206
208
)
207
209
208
210
const (
209
- Px = 0
210
- Px1 = 1 // symbolic; exact value doesn't matter
211
- P32 = 0x32 /* 32-bit only */
212
- Pe = 0x66 /* operand escape */
213
- Pm = 0x0f /* 2byte opcode escape */
214
- Pq = 0xff /* both escapes: 66 0f */
215
- Pb = 0xfe /* byte operands */
216
- Pf2 = 0xf2 /* xmm escape 1: f2 0f */
217
- Pf3 = 0xf3 /* xmm escape 2: f3 0f */
218
- Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
219
- Pw = 0x48 /* Rex.w */
220
- Pw8 = 0x90 // symbolic; exact value doesn't matter
221
- Py = 0x80 /* defaults to 64-bit mode */
222
- Py1 = 0x81 // symbolic; exact value doesn't matter
223
- Py3 = 0x83 // symbolic; exact value doesn't matter
211
+ Px = 0
212
+ Px1 = 1 // symbolic; exact value doesn't matter
213
+ P32 = 0x32 /* 32-bit only */
214
+ Pe = 0x66 /* operand escape */
215
+ Pm = 0x0f /* 2byte opcode escape */
216
+ Pq = 0xff /* both escapes: 66 0f */
217
+ Pb = 0xfe /* byte operands */
218
+ Pf2 = 0xf2 /* xmm escape 1: f2 0f */
219
+ Pf3 = 0xf3 /* xmm escape 2: f3 0f */
220
+ Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
221
+ Pvex1 = 0xc5 /* 66 escape, vex encoding */
222
+ Pvex2 = 0xc6 /* f3 escape, vex encoding */
223
+ Pw = 0x48 /* Rex.w */
224
+ Pw8 = 0x90 // symbolic; exact value doesn't matter
225
+ Py = 0x80 /* defaults to 64-bit mode */
226
+ Py1 = 0x81 // symbolic; exact value doesn't matter
227
+ Py3 = 0x83 // symbolic; exact value doesn't matter
224
228
225
229
Rxw = 1 << 3 /* =1, 64-bit operand size */
226
230
Rxr = 1 << 2 /* extend modrm reg */
@@ -622,6 +626,10 @@ var yxr_ml = []ytab{
622
626
{Yxr , Ynone , Yml , Zr_m_xm , 1 },
623
627
}
624
628
629
+ var yxr_ml_vex = []ytab {
630
+ {Yxr , Ynone , Yml , Zr_m_xm_vex , 1 },
631
+ }
632
+
625
633
var ymr = []ytab {
626
634
{Ymr , Ynone , Ymr , Zm_r , 1 },
627
635
}
@@ -638,6 +646,11 @@ var yxcmpi = []ytab{
638
646
{Yxm , Yxr , Yi8 , Zm_r_i_xm , 2 },
639
647
}
640
648
649
+ var yxmov_vex = []ytab {
650
+ {Yxm , Ynone , Yxr , Zm_r_xm_vex , 1 },
651
+ {Yxr , Ynone , Yxm , Zr_m_xm_vex , 1 },
652
+ }
653
+
641
654
var yxmov = []ytab {
642
655
{Yxm , Ynone , Yxr , Zm_r_xm , 1 },
643
656
{Yxr , Ynone , Yxm , Zr_m_xm , 1 },
@@ -1480,6 +1493,10 @@ var optab =
1480
1493
{AROUNDSS , yaes2 , Pq , [23 ]uint8 {0x3a , 0x0a , 0 }},
1481
1494
{APSHUFD , yxshuf , Pq , [23 ]uint8 {0x70 , 0 }},
1482
1495
{APCLMULQDQ , yxshuf , Pq , [23 ]uint8 {0x3a , 0x44 , 0 }},
1496
+ {AVZEROUPPER , ynone , Px , [23 ]uint8 {0xc5 , 0xf8 , 0x77 }},
1497
+ {AMOVHDU , yxmov_vex , Pvex2 , [23 ]uint8 {0x6f , 0x7f }},
1498
+ {AMOVNTHD , yxr_ml_vex , Pvex1 , [23 ]uint8 {0xe7 }},
1499
+ {AMOVHDA , yxmov_vex , Pvex1 , [23 ]uint8 {0x6f , 0x7f }},
1483
1500
{obj .AUSEFIELD , ynop , Px , [23 ]uint8 {0 , 0 }},
1484
1501
{obj .ATYPE , nil , 0 , [23 ]uint8 {}},
1485
1502
{obj .AFUNCDATA , yfuncdata , Px , [23 ]uint8 {0 , 0 }},
@@ -2911,6 +2928,50 @@ var bpduff2 = []byte{
2911
2928
0x48 , 0x8b , 0x6d , 0x00 , // MOVQ 0(BP), BP
2912
2929
}
2913
2930
2931
+ func vexprefix (ctxt * obj.Link , to * obj.Addr , from * obj.Addr , pref uint8 ) {
2932
+ rexR := regrex [to .Reg ]
2933
+ rexB := regrex [from .Reg ]
2934
+ rexX := regrex [from .Index ]
2935
+ var prefBit uint8
2936
+ if pref == Pvex1 {
2937
+ prefBit = 1
2938
+ } else if pref == Pvex2 {
2939
+ prefBit = 2
2940
+ } // TODO add Pvex0,Pvex3
2941
+
2942
+ if rexX == 0 && rexB == 0 { // 2-byte vex prefix
2943
+ ctxt .Andptr [0 ] = 0xc5
2944
+ ctxt .Andptr = ctxt .Andptr [1 :]
2945
+
2946
+ if rexR != 0 {
2947
+ ctxt .Andptr [0 ] = 0x7c
2948
+ } else {
2949
+ ctxt .Andptr [0 ] = 0xfc
2950
+ }
2951
+ ctxt .Andptr [0 ] |= prefBit
2952
+ ctxt .Andptr = ctxt .Andptr [1 :]
2953
+ } else {
2954
+ ctxt .Andptr [0 ] = 0xc4
2955
+ ctxt .Andptr = ctxt .Andptr [1 :]
2956
+
2957
+ ctxt .Andptr [0 ] = 0x1 // TODO handle different prefix
2958
+ if rexR == 0 {
2959
+ ctxt .Andptr [0 ] |= 0x80
2960
+ }
2961
+ if rexX == 0 {
2962
+ ctxt .Andptr [0 ] |= 0x40
2963
+ }
2964
+ if rexB == 0 {
2965
+ ctxt .Andptr [0 ] |= 0x20
2966
+ }
2967
+ ctxt .Andptr = ctxt .Andptr [1 :]
2968
+
2969
+ ctxt .Andptr [0 ] = 0x7c
2970
+ ctxt .Andptr [0 ] |= prefBit
2971
+ ctxt .Andptr = ctxt .Andptr [1 :]
2972
+ }
2973
+ }
2974
+
2914
2975
func doasm (ctxt * obj.Link , p * obj.Prog ) {
2915
2976
ctxt .Curp = p // TODO
2916
2977
@@ -3144,6 +3205,13 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
3144
3205
mediaop (ctxt , o , op , int (yt .zoffset ), z )
3145
3206
asmand (ctxt , p , & p .From , & p .To )
3146
3207
3208
+ case Zm_r_xm_vex :
3209
+ ctxt .Vexflag = 1
3210
+ vexprefix (ctxt , & p .To , & p .From , o .prefix )
3211
+ ctxt .Andptr [0 ] = byte (op )
3212
+ ctxt .Andptr = ctxt .Andptr [1 :]
3213
+ asmand (ctxt , p , & p .From , & p .To )
3214
+
3147
3215
case Zm_r_xm_nr :
3148
3216
ctxt .Rexflag = 0
3149
3217
mediaop (ctxt , o , op , int (yt .zoffset ), z )
@@ -3199,6 +3267,13 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
3199
3267
ctxt .Andptr = ctxt .Andptr [1 :]
3200
3268
asmand (ctxt , p , & p .To , & p .From )
3201
3269
3270
+ case Zr_m_xm_vex :
3271
+ ctxt .Vexflag = 1
3272
+ vexprefix (ctxt , & p .From , & p .To , o .prefix )
3273
+ ctxt .Andptr [0 ] = byte (op )
3274
+ ctxt .Andptr = ctxt .Andptr [1 :]
3275
+ asmand (ctxt , p , & p .To , & p .From )
3276
+
3202
3277
case Zr_m_xm :
3203
3278
mediaop (ctxt , o , op , int (yt .zoffset ), z )
3204
3279
asmand (ctxt , p , & p .To , & p .From )
@@ -4307,10 +4382,11 @@ func asmins(ctxt *obj.Link, p *obj.Prog) {
4307
4382
}
4308
4383
4309
4384
ctxt .Rexflag = 0
4385
+ ctxt .Vexflag = 0
4310
4386
and0 := ctxt .Andptr
4311
4387
ctxt .Asmode = int (p .Mode )
4312
4388
doasm (ctxt , p )
4313
- if ctxt .Rexflag != 0 {
4389
+ if ctxt .Rexflag != 0 && ctxt . Vexflag == 0 {
4314
4390
/*
4315
4391
* as befits the whole approach of the architecture,
4316
4392
* the rex prefix must appear before the first opcode byte
0 commit comments