Skip to content

Commit 6a2bace

Browse files
committed
[RFC][WIP][AMDGPU] Use bf16 instead of i16 for bfloat
Currently it looks like we generally use `i16` to represent `bf16` in those tablegen files. I'm not sure of the reason behind it. My wild guess is the type `bf16` was not available when we enabled the support. This patch is trying to use `bf16` directly in those tablegen files, aiming at fixing #79369. Of course for #79369 a workaround can be to treat all `INT16` variants as `BFloat` in `getOpFltSemantics`, but it doesn't look good IMHO. Since I'm fairly new to AMDGPU backend, I'd appreciate it if you can point out where I don't understand correctly.
1 parent 05091aa commit 6a2bace

18 files changed

+217
-74
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

+2-2
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,8 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomi
246246

247247
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot10-insts")
248248
TARGET_BUILTIN(__builtin_amdgcn_fdot2_f16_f16, "hV2hV2hh", "nc", "dot9-insts")
249-
TARGET_BUILTIN(__builtin_amdgcn_fdot2_bf16_bf16, "sV2sV2ss", "nc", "dot9-insts")
250-
TARGET_BUILTIN(__builtin_amdgcn_fdot2_f32_bf16, "fV2sV2sfIb", "nc", "dot9-insts")
249+
TARGET_BUILTIN(__builtin_amdgcn_fdot2_bf16_bf16, "yV2yV2yy", "nc", "dot9-insts")
250+
TARGET_BUILTIN(__builtin_amdgcn_fdot2_f32_bf16, "fV2yV2yfIb", "nc", "dot9-insts")
251251
TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dot2-insts")
252252
TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dot2-insts")
253253
TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dot1-insts")

clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl

+6-3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
typedef unsigned int uint;
77
typedef half __attribute__((ext_vector_type(2))) half2;
8+
typedef __bf16 bfloat;
9+
typedef bfloat __attribute__((ext_vector_type(2))) bfloat2;
810
typedef short __attribute__((ext_vector_type(2))) short2;
911
typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
1012

@@ -15,16 +17,17 @@ kernel void builtins_amdgcn_dl_insts_err(
1517
half2 v2hA, half2 v2hB, float fC, half hC,
1618
short2 v2ssA, short2 v2ssB, short sC, int siA, int siB, int siC,
1719
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC,
20+
bfloat2 v2bfsA, bfloat2 v2bfsB, bfloat bfC,
1821
int A, int B, int C) {
1922
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot10-insts}}
2023
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot10-insts}}
2124

2225
hOut[0] = __builtin_amdgcn_fdot2_f16_f16(v2hA, v2hB, hC); // expected-error {{'__builtin_amdgcn_fdot2_f16_f16' needs target feature dot9-insts}}
2326

24-
sOut[0] = __builtin_amdgcn_fdot2_bf16_bf16(v2ssA, v2ssB, sC); // expected-error {{'__builtin_amdgcn_fdot2_bf16_bf16' needs target feature dot9-insts}}
27+
sOut[0] = __builtin_amdgcn_fdot2_bf16_bf16(v2bfsA, v2bfsB, bfC); // expected-error {{'__builtin_amdgcn_fdot2_bf16_bf16' needs target feature dot9-insts}}
2528

26-
fOut[3] = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2_f32_bf16' needs target feature dot9-insts}}
27-
fOut[4] = __builtin_amdgcn_fdot2_f32_bf16(v2ssA, v2ssB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2_f32_bf16' needs target feature dot9-insts}}
29+
fOut[3] = __builtin_amdgcn_fdot2_f32_bf16(v2bfsA, v2bfsB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2_f32_bf16' needs target feature dot9-insts}}
30+
fOut[4] = __builtin_amdgcn_fdot2_f32_bf16(v2bfsA, v2bfsB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2_f32_bf16' needs target feature dot9-insts}}
2831

2932
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
3033
siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}

clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx11.cl

+7-6
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44

55
typedef unsigned int uint;
66
typedef half __attribute__((ext_vector_type(2))) half2;
7-
typedef short __attribute__((ext_vector_type(2))) short2;
7+
typedef __bf16 bfloat;
8+
typedef bfloat __attribute__((ext_vector_type(2))) bfloat2;
89
typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
910

1011
// CHECK-LABEL: @builtins_amdgcn_dl_insts
1112
// CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 false)
1213
// CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 true)
1314
// CHECK: call half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %v2hA, <2 x half> %v2hB, half %hC)
14-
// CHECK: call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i16 %sC)
15-
// CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> %v2ssB, float %fC, i1 false)
16-
// CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %v2ssA, <2 x i16> %v2ssB, float %fC, i1 true)
15+
// CHECK: call bfloat @llvm.amdgcn.fdot2.bf16.bf16(<2 x bfloat> %v2ssA, <2 x bfloat> %v2ssB, bfloat %sC)
16+
// CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %v2ssA, <2 x bfloat> %v2ssB, float %fC, i1 false)
17+
// CHECK: call float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %v2ssA, <2 x bfloat> %v2ssB, float %fC, i1 true)
1718
// CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
1819
// CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
1920
// CHECK: call i32 @llvm.amdgcn.sudot4(i1 true, i32 %A, i1 false, i32 %B, i32 %C, i1 false)
@@ -25,9 +26,9 @@ typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
2526
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2627
kernel void builtins_amdgcn_dl_insts_err(
2728
global float *fOut, global int *siOut, global uint *uiOut,
28-
global short *sOut, global int *iOut, global half *hOut,
29+
global bfloat *sOut, global int *iOut, global half *hOut,
2930
half2 v2hA, half2 v2hB, float fC, half hC,
30-
short2 v2ssA, short2 v2ssB, short sC, int siA, int siB, int siC,
31+
bfloat2 v2ssA, bfloat2 v2ssB, bfloat sC, int siA, int siB, int siC,
3132
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC,
3233
int A, int B, int C) {
3334
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false);

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

+6-6
Original file line numberDiff line numberDiff line change
@@ -2819,11 +2819,11 @@ def int_amdgcn_fdot2_f16_f16 :
28192819
def int_amdgcn_fdot2_bf16_bf16 :
28202820
ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
28212821
DefaultAttrsIntrinsic<
2822-
[llvm_i16_ty], // %r
2822+
[llvm_bfloat_ty], // %r
28232823
[
2824-
llvm_v2i16_ty, // %a
2825-
llvm_v2i16_ty, // %b
2826-
llvm_i16_ty // %c
2824+
llvm_v2bf16_ty, // %a
2825+
llvm_v2bf16_ty, // %b
2826+
llvm_bfloat_ty // %c
28272827
],
28282828
[IntrNoMem, IntrSpeculatable]
28292829
>;
@@ -2835,8 +2835,8 @@ def int_amdgcn_fdot2_f32_bf16 :
28352835
DefaultAttrsIntrinsic<
28362836
[llvm_float_ty], // %r
28372837
[
2838-
llvm_v2i16_ty, // %a
2839-
llvm_v2i16_ty, // %b
2838+
llvm_v2bf16_ty, // %a
2839+
llvm_v2bf16_ty, // %b
28402840
llvm_float_ty, // %c
28412841
llvm_i1_ty // %clamp
28422842
],

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1562,8 +1562,9 @@ bool IRTranslator::translateBitCast(const User &U,
15621562

15631563
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
15641564
MachineIRBuilder &MIRBuilder) {
1565-
if (U.getType()->getScalarType()->isBFloatTy() ||
1566-
U.getOperand(0)->getType()->getScalarType()->isBFloatTy())
1565+
if (Opcode != TargetOpcode::G_BITCAST &&
1566+
(U.getType()->getScalarType()->isBFloatTy() ||
1567+
U.getOperand(0)->getType()->getScalarType()->isBFloatTy()))
15671568
return false;
15681569
Register Op = getOrCreateVReg(*U.getOperand(0));
15691570
Register Res = getOrCreateVReg(U);

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

+66
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
474474

475475
bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
476476

477+
bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
478+
477479
bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
478480

479481
bool isSSrcV2F16() const {
@@ -540,22 +542,40 @@ class AMDGPUOperand : public MCParsedAsmOperand {
540542
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
541543
}
542544

545+
bool isVCSrcTBF16() const {
546+
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
547+
}
548+
543549
bool isVCSrcTF16() const {
544550
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
545551
}
546552

553+
bool isVCSrcTBF16_Lo128() const {
554+
return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
555+
}
556+
547557
bool isVCSrcTF16_Lo128() const {
548558
return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
549559
}
550560

561+
bool isVCSrcFake16BF16_Lo128() const {
562+
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
563+
}
564+
551565
bool isVCSrcFake16F16_Lo128() const {
552566
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
553567
}
554568

569+
bool isVCSrc_bf16() const {
570+
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
571+
}
572+
555573
bool isVCSrc_f16() const {
556574
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
557575
}
558576

577+
bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
578+
559579
bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
560580

561581
bool isVSrc_b32() const {
@@ -596,18 +616,34 @@ class AMDGPUOperand : public MCParsedAsmOperand {
596616

597617
bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
598618

619+
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
620+
599621
bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
600622

623+
bool isVSrcT_bf16_Lo128() const {
624+
return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
625+
}
626+
601627
bool isVSrcT_f16_Lo128() const {
602628
return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
603629
}
604630

631+
bool isVSrcFake16_bf16_Lo128() const {
632+
return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
633+
}
634+
605635
bool isVSrcFake16_f16_Lo128() const {
606636
return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
607637
}
608638

639+
bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
640+
609641
bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
610642

643+
bool isVSrc_v2bf16() const {
644+
return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
645+
}
646+
611647
bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
612648

613649
bool isVISrcB32() const {
@@ -634,6 +670,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
634670
return isVISrcF16() || isVISrcB32();
635671
}
636672

673+
bool isVISrc_64_bf16() const {
674+
return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
675+
}
676+
637677
bool isVISrc_64_f16() const {
638678
return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
639679
}
@@ -802,6 +842,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
802842
return isAISrc_128F16() || isAISrc_128_b32();
803843
}
804844

845+
bool isVISrc_128_bf16() const {
846+
return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
847+
}
848+
805849
bool isVISrc_128_f16() const {
806850
return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
807851
}
@@ -1889,6 +1933,14 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
18891933
case AMDGPU::OPERAND_REG_IMM_V2FP16:
18901934
case AMDGPU::OPERAND_KIMM16:
18911935
return &APFloat::IEEEhalf();
1936+
case AMDGPU::OPERAND_REG_IMM_BF16:
1937+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1938+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1939+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1940+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1941+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1942+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
1943+
return &APFloat::BFloat();
18921944
default:
18931945
llvm_unreachable("unsupported fp type");
18941946
}
@@ -2185,17 +2237,24 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
21852237
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
21862238
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
21872239
case AMDGPU::OPERAND_REG_IMM_INT16:
2240+
case AMDGPU::OPERAND_REG_IMM_BF16:
21882241
case AMDGPU::OPERAND_REG_IMM_FP16:
2242+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
21892243
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
21902244
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2245+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
21912246
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
21922247
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2248+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
21932249
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
21942250
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2251+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
21952252
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
21962253
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2254+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
21972255
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
21982256
case AMDGPU::OPERAND_REG_IMM_V2INT16:
2257+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
21992258
case AMDGPU::OPERAND_REG_IMM_V2FP16:
22002259
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
22012260
case AMDGPU::OPERAND_REG_IMM_V2FP32:
@@ -2239,6 +2298,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22392298
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
22402299
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
22412300
case AMDGPU::OPERAND_REG_IMM_V2INT16:
2301+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
22422302
case AMDGPU::OPERAND_REG_IMM_V2FP16:
22432303
case AMDGPU::OPERAND_REG_IMM_V2FP32:
22442304
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
@@ -2276,11 +2336,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22762336
return;
22772337

22782338
case AMDGPU::OPERAND_REG_IMM_INT16:
2339+
case AMDGPU::OPERAND_REG_IMM_BF16:
22792340
case AMDGPU::OPERAND_REG_IMM_FP16:
2341+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
22802342
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
22812343
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2344+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
22822345
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
22832346
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2347+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
22842348
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
22852349
if (isSafeTruncation(Val, 16) &&
22862350
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
@@ -2295,8 +2359,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22952359
return;
22962360

22972361
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2362+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
22982363
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
22992364
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2365+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
23002366
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
23012367
assert(isSafeTruncation(Val, 16));
23022368
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -521,8 +521,11 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
521521
if (printImmediateFloat32(Imm, STI, O))
522522
return;
523523
break;
524+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
524525
case AMDGPU::OPERAND_REG_IMM_V2FP16:
526+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
525527
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
528+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
526529
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
527530
if (isUInt<16>(Imm) &&
528531
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
@@ -792,17 +795,24 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
792795
case AMDGPU::OPERAND_REG_IMM_INT16:
793796
printImmediateInt16(Op.getImm(), STI, O);
794797
break;
798+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
795799
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
800+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
796801
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
802+
case AMDGPU::OPERAND_REG_IMM_BF16:
797803
case AMDGPU::OPERAND_REG_IMM_FP16:
804+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
798805
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
799806
printImmediate16(Op.getImm(), STI, O);
800807
break;
801808
case AMDGPU::OPERAND_REG_IMM_V2INT16:
809+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
802810
case AMDGPU::OPERAND_REG_IMM_V2FP16:
803811
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
804812
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
813+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
805814
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
815+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
806816
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
807817
printImmediateV216(Op.getImm(), OpTy, STI, O);
808818
break;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,13 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
276276
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
277277
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
278278
return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
279+
case AMDGPU::OPERAND_REG_IMM_BF16:
279280
case AMDGPU::OPERAND_REG_IMM_FP16:
281+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
280282
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
283+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
281284
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
285+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
282286
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
283287
// FIXME Is this correct? What do inline immediates do on SI for f16 src
284288
// which does not have f16 support?
@@ -288,8 +292,11 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
288292
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
289293
return AMDGPU::getInlineEncodingV2I16(static_cast<uint32_t>(Imm))
290294
.value_or(255);
295+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
291296
case AMDGPU::OPERAND_REG_IMM_V2FP16:
297+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
292298
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
299+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
293300
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
294301
return AMDGPU::getInlineEncodingV2F16(static_cast<uint32_t>(Imm))
295302
.value_or(255);

llvm/lib/Target/AMDGPU/SIDefines.h

+7
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,12 @@ enum OperandType : unsigned {
196196
OPERAND_REG_IMM_INT16,
197197
OPERAND_REG_IMM_FP32,
198198
OPERAND_REG_IMM_FP64,
199+
OPERAND_REG_IMM_BF16,
199200
OPERAND_REG_IMM_FP16,
201+
OPERAND_REG_IMM_BF16_DEFERRED,
200202
OPERAND_REG_IMM_FP16_DEFERRED,
201203
OPERAND_REG_IMM_FP32_DEFERRED,
204+
OPERAND_REG_IMM_V2BF16,
202205
OPERAND_REG_IMM_V2FP16,
203206
OPERAND_REG_IMM_V2INT16,
204207
OPERAND_REG_IMM_V2INT32,
@@ -208,10 +211,12 @@ enum OperandType : unsigned {
208211
OPERAND_REG_INLINE_C_INT16,
209212
OPERAND_REG_INLINE_C_INT32,
210213
OPERAND_REG_INLINE_C_INT64,
214+
OPERAND_REG_INLINE_C_BF16,
211215
OPERAND_REG_INLINE_C_FP16,
212216
OPERAND_REG_INLINE_C_FP32,
213217
OPERAND_REG_INLINE_C_FP64,
214218
OPERAND_REG_INLINE_C_V2INT16,
219+
OPERAND_REG_INLINE_C_V2BF16,
215220
OPERAND_REG_INLINE_C_V2FP16,
216221
OPERAND_REG_INLINE_C_V2INT32,
217222
OPERAND_REG_INLINE_C_V2FP32,
@@ -226,10 +231,12 @@ enum OperandType : unsigned {
226231
/// Operands with an AccVGPR register or inline constant
227232
OPERAND_REG_INLINE_AC_INT16,
228233
OPERAND_REG_INLINE_AC_INT32,
234+
OPERAND_REG_INLINE_AC_BF16,
229235
OPERAND_REG_INLINE_AC_FP16,
230236
OPERAND_REG_INLINE_AC_FP32,
231237
OPERAND_REG_INLINE_AC_FP64,
232238
OPERAND_REG_INLINE_AC_V2INT16,
239+
OPERAND_REG_INLINE_AC_V2BF16,
233240
OPERAND_REG_INLINE_AC_V2FP16,
234241
OPERAND_REG_INLINE_AC_V2INT32,
235242
OPERAND_REG_INLINE_AC_V2FP32,

0 commit comments

Comments
 (0)