Skip to content

Commit 2672037

Browse files
authored
[AMDGPU][True16][MC] Support VOP3 only instructions with true16 and fake16 (#109891)
Update VOP3 only instructions with true16 and fake16 formats. This patch includes instructions: V_MUL_LO_U16 V_MAX_U16 V_MAX_I16 V_MIN_U16 V_MIN_I16 V_LSHLREV_B16 V_LSHRREV_B16 V_ASHRREV_I16
1 parent 936142e commit 2672037

20 files changed

+4384
-1256
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,7 @@ multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName,
917917
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
918918
VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
919919

920-
multiclass VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<bits<9> op, string asmName,
920+
multiclass VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<bits<9> op, string asmName,
921921
string opName = NAME> {
922922
defm opName#"_t16" :
923923
VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
@@ -965,10 +965,10 @@ defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16"
965965
defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
966966
defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
967967

968-
defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
969-
defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
970-
defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
971-
defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
968+
defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
969+
defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
970+
defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
971+
defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
972972
defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
973973
defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
974974
defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
@@ -980,7 +980,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
980980
defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
981981
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
982982
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
983-
defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
983+
defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
984984
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
985985
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
986986
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
@@ -991,11 +991,11 @@ defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f1
991991
defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
992992
defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
993993
defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
994-
defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
995-
defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
994+
defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
995+
defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
996996

997-
defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
998-
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
997+
defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
998+
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
999999

10001000
//===----------------------------------------------------------------------===//
10011001
// GFX10.

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,11 @@ multiclass VOP2Inst_e64_t16<string opName,
209209
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
210210
defm NAME : VOP2Inst<opName, P, node, revOp>;
211211
}
212-
let SubtargetPredicate = HasTrue16BitInsts in {
213-
defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_Fake16<P>, node, revOp#"_t16">;
212+
let SubtargetPredicate = UseRealTrue16Insts in {
213+
defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16">;
214+
}
215+
let SubtargetPredicate = UseFakeTrue16Insts in {
216+
defm _fake16 : VOP2Inst_e64<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16">;
214217
}
215218
}
216219

@@ -1657,9 +1660,14 @@ multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> :
16571660
multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
16581661
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
16591662

1660-
multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName> :
1661-
VOP3Only_Realtriple_t16<GFX11Gen, op, asmName>,
1662-
VOP3Only_Realtriple_t16<GFX12Gen, op, asmName>;
1663+
multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> :
1664+
VOP3Only_Realtriple_t16<GFX11Gen, op, asmName, OpName>,
1665+
VOP3Only_Realtriple_t16<GFX12Gen, op, asmName, OpName>;
1666+
1667+
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
1668+
defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
1669+
defm OpName#"_fake16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">;
1670+
}
16631671

16641672
multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> :
16651673
VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,13 +1191,13 @@ defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_
11911191
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
11921192
defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x303>;
11931193
defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x304>;
1194-
defm V_MUL_LO_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
1194+
defm V_MUL_LO_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
11951195
defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>;
11961196
defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>;
1197-
defm V_MAX_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x309, "v_max_u16">;
1198-
defm V_MAX_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30a, "v_max_i16">;
1199-
defm V_MIN_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30b, "v_min_u16">;
1200-
defm V_MIN_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30c, "v_min_i16">;
1197+
defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x309, "v_max_u16">;
1198+
defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
1199+
defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
1200+
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
12011201
defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
12021202
defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
12031203
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
@@ -1214,9 +1214,9 @@ defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12<0x32c>;
12141214
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12<0x32d>;
12151215
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12<0x32e>;
12161216
defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32f>;
1217-
defm V_LSHLREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x338, "v_lshlrev_b16">;
1218-
defm V_LSHRREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
1219-
defm V_ASHRREV_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x33a, "v_ashrrev_i16">;
1217+
defm V_LSHLREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x338, "v_lshlrev_b16">;
1218+
defm V_LSHRREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
1219+
defm V_ASHRREV_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33a, "v_ashrrev_i16">;
12201220
defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>;
12211221
defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12<0x33d>;
12221222
defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12<0x33e>;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ body: |
107107
; GFX11-NEXT: {{ $}}
108108
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
109109
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
110-
; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
111-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]]
110+
; GFX11-NEXT: [[V_ASHRREV_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
111+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_fake16_e64_]]
112112
%0:vgpr(s32) = COPY $vgpr0
113113
%1:sgpr(s32) = COPY $sgpr0
114114
%2:vgpr(s16) = G_TRUNC %0
@@ -200,8 +200,8 @@ body: |
200200
; GFX11-NEXT: {{ $}}
201201
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
202202
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
203-
; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
204-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]]
203+
; GFX11-NEXT: [[V_ASHRREV_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
204+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_fake16_e64_]]
205205
%0:vgpr(s32) = COPY $vgpr0
206206
%1:vgpr(s32) = COPY $vgpr1
207207
%2:vgpr(s16) = G_TRUNC %0
@@ -247,9 +247,9 @@ body: |
247247
; GFX11-NEXT: {{ $}}
248248
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
249249
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
250-
; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
250+
; GFX11-NEXT: [[V_ASHRREV_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
251251
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
252-
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ASHRREV_I16_t16_e64_]], implicit $exec
252+
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ASHRREV_I16_fake16_e64_]], implicit $exec
253253
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
254254
%0:vgpr(s32) = COPY $vgpr0
255255
%1:vgpr(s32) = COPY $vgpr1
@@ -310,10 +310,10 @@ body: |
310310
; GFX11-NEXT: {{ $}}
311311
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
312312
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
313-
; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
313+
; GFX11-NEXT: [[V_ASHRREV_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
314314
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
315315
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
316-
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_t16_e64_]], implicit $exec
316+
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_fake16_e64_]], implicit $exec
317317
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
318318
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
319319
; GFX11-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
@@ -455,8 +455,8 @@ body: |
455455
; GFX11-NEXT: {{ $}}
456456
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
457457
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
458-
; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
459-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_t16_e64_]]
458+
; GFX11-NEXT: [[V_ASHRREV_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
459+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ASHRREV_I16_fake16_e64_]]
460460
%0:sgpr(s32) = COPY $sgpr0
461461
%1:vgpr(s32) = COPY $vgpr0
462462
%2:sgpr(s16) = G_TRUNC %0

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ body: |
105105
; GFX11-NEXT: {{ $}}
106106
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
107107
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
108-
; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
109-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]]
108+
; GFX11-NEXT: [[V_LSHRREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
109+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_fake16_e64_]]
110110
%0:vgpr(s32) = COPY $vgpr0
111111
%1:sgpr(s32) = COPY $sgpr0
112112
%2:vgpr(s16) = G_TRUNC %0
@@ -198,8 +198,8 @@ body: |
198198
; GFX11-NEXT: {{ $}}
199199
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
200200
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
201-
; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
202-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]]
201+
; GFX11-NEXT: [[V_LSHRREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
202+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_fake16_e64_]]
203203
%0:vgpr(s32) = COPY $vgpr0
204204
%1:vgpr(s32) = COPY $vgpr1
205205
%2:vgpr(s16) = G_TRUNC %0
@@ -245,9 +245,9 @@ body: |
245245
; GFX11-NEXT: {{ $}}
246246
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
247247
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
248-
; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
248+
; GFX11-NEXT: [[V_LSHRREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
249249
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
250-
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHRREV_B16_t16_e64_]], implicit $exec
250+
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHRREV_B16_fake16_e64_]], implicit $exec
251251
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
252252
%0:vgpr(s32) = COPY $vgpr0
253253
%1:vgpr(s32) = COPY $vgpr1
@@ -308,10 +308,10 @@ body: |
308308
; GFX11-NEXT: {{ $}}
309309
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
310310
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
311-
; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
311+
; GFX11-NEXT: [[V_LSHRREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
312312
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
313313
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
314-
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_t16_e64_]], implicit $exec
314+
; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_fake16_e64_]], implicit $exec
315315
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
316316
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
317317
; GFX11-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
@@ -453,8 +453,8 @@ body: |
453453
; GFX11-NEXT: {{ $}}
454454
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
455455
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
456-
; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
457-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_t16_e64_]]
456+
; GFX11-NEXT: [[V_LSHRREV_B16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_fake16_e64 [[COPY1]], [[COPY]], implicit $exec
457+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B16_fake16_e64_]]
458458
%0:sgpr(s32) = COPY $sgpr0
459459
%1:vgpr(s32) = COPY $vgpr0
460460
%2:sgpr(s16) = G_TRUNC %0

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ body: |
9090
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
9191
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
9292
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
93-
; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
93+
; GFX11-NEXT: [[V_MAX_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
9494
; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
95-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_t16_e64_]]
95+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
9696
%0:vgpr(s32) = COPY $vgpr0
9797
%1:vgpr(s32) = COPY $vgpr1
9898
%2:vgpr(s32) = COPY $vgpr2
@@ -143,9 +143,9 @@ body: |
143143
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
144144
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
145145
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
146-
; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
146+
; GFX11-NEXT: [[V_MIN_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
147147
; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
148-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_t16_e64_]]
148+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_fake16_e64_]]
149149
%0:vgpr(s32) = COPY $vgpr0
150150
%1:vgpr(s32) = COPY $vgpr1
151151
%2:vgpr(s32) = COPY $vgpr2
@@ -197,10 +197,10 @@ body: |
197197
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
198198
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
199199
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
200-
; GFX11-NEXT: [[V_MIN_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec
201-
; GFX11-NEXT: [[V_MAX_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_t16_e64 [[V_MIN_I16_t16_e64_]], [[COPY2]], implicit $exec
200+
; GFX11-NEXT: [[V_MIN_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
201+
; GFX11-NEXT: [[V_MAX_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_fake16_e64 [[V_MIN_I16_fake16_e64_]], [[COPY2]], implicit $exec
202202
; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
203-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_t16_e64_]]
203+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
204204
%0:vgpr(s32) = COPY $vgpr0
205205
%1:vgpr(s32) = COPY $vgpr1
206206
%2:vgpr(s32) = COPY $vgpr2

0 commit comments

Comments
 (0)