@@ -973,11 +973,16 @@ class VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOPProfile P> : VOP3_Profil
973
973
let HasOMod = 0;
974
974
}
975
975
976
+ class VOP3_CVT_SCALE_FP4_F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
977
+ let HasFP8DstByteSel = 1;
978
+ }
979
+
976
980
class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
977
981
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
978
982
Int32InputMods:$src1_modifiers, Src1RC64:$src1,
979
983
FP32InputMods:$src2_modifiers, Src2RC64:$src2,
980
984
VGPR_32:$vdst_in, op_sel0:$op_sel);
985
+ let HasFP8DstByteSel = 1;
981
986
}
982
987
983
988
@@ -995,6 +1000,7 @@ class VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<
995
1000
HasSrc0FloatMods, HasSrc1FloatMods,
996
1001
HasSrc2FloatMods>.ret);
997
1002
let HasExtVOP3DPP = 0;
1003
+ let HasFP8DstByteSel = 1;
998
1004
}
999
1005
1000
1006
class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
@@ -1007,6 +1013,7 @@ class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
1007
1013
let HasExtVOP3DPP = 0;
1008
1014
let HasOpSel = 1;
1009
1015
let HasOMod = 0;
1016
+ let HasFP8DstByteSel = 1;
1010
1017
}
1011
1018
1012
1019
def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
@@ -1018,6 +1025,7 @@ def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32
1018
1025
let HasExtVOP3DPP = 0;
1019
1026
let HasOpSel = 1;
1020
1027
let HasOMod = 0;
1028
+ let HasFP8DstByteSel = 1;
1021
1029
}
1022
1030
1023
1031
class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
@@ -1093,7 +1101,7 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
1093
1101
let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
1094
1102
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
1095
1103
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
1096
- defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile <VOP_I32_F32_F32_F32>>;
1104
+ defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4_F32_TiedInput_Profile <VOP_I32_F32_F32_F32>>;
1097
1105
let Constraints = "@earlyclobber $vdst" in {
1098
1106
defm V_CVT_SCALEF32_SR_PK_FP4_F16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
1099
1107
defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
@@ -2027,6 +2035,7 @@ multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0>
2027
2035
}
2028
2036
}
2029
2037
}
2038
+
2030
2039
} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
2031
2040
2032
2041
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
0 commit comments