Skip to content

Commit 7c6e461

Browse files
committed
[SelectionDAG] Add STRICT_BF16_TO_FP and STRICT_FP_TO_BF16
This patch adds the support for `STRICT_BF16_TO_FP` and `STRICT_FP_TO_BF16`. Fix #78540.
1 parent 0ec318e commit 7c6e461

14 files changed

+288
-24
lines changed

compiler-rt/lib/builtins/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ set(GENERIC_SOURCES
190190

191191
# We only build BF16 files when "__bf16" is available.
192192
set(BF16_SOURCES
193+
extendbfsf2.c
193194
truncdfbf2.c
194195
truncsfbf2.c
195196
)
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//===-- lib/extendbfsf2.c - bfloat -> single conversion -----------*- C -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#define SRC_BFLOAT
10+
#define DST_SINGLE
11+
#include "fp_extend_impl.inc"
12+
13+
COMPILER_RT_ABI float __extendbfsf2(src_t a) { return __extendXfYf2__(a); }

compiler-rt/lib/builtins/fp_extend.h

+7
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ static inline int src_rep_t_clz_impl(src_rep_t a) {
8181

8282
#define src_rep_t_clz src_rep_t_clz_impl
8383

84+
#elif defined SRC_BFLOAT
85+
typedef __bf16 src_t;
86+
typedef uint16_t src_rep_t;
87+
#define SRC_REP_C UINT16_C
88+
static const int srcSigBits = 7;
89+
#define src_rep_t_clz __builtin_clz
90+
8491
#else
8592
#error Source should be half, single, or double precision!
8693
#endif // end source precision

llvm/include/llvm/CodeGen/ISDOpcodes.h

+2
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,8 @@ enum NodeType {
921921
/// has native conversions.
922922
BF16_TO_FP,
923923
FP_TO_BF16,
924+
STRICT_BF16_TO_FP,
925+
STRICT_FP_TO_BF16,
924926

925927
/// Perform various unary floating-point operations inspired by libm. For
926928
/// FPOWI, the result is undefined if the integer operand doesn't fit into

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

+2
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,8 @@ END_TWO_BYTE_PACK()
698698
return false;
699699
case ISD::STRICT_FP16_TO_FP:
700700
case ISD::STRICT_FP_TO_FP16:
701+
case ISD::STRICT_BF16_TO_FP:
702+
case ISD::STRICT_FP_TO_BF16:
701703
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
702704
case ISD::STRICT_##DAGN:
703705
#include "llvm/IR/ConstrainedOps.def"

llvm/include/llvm/IR/RuntimeLibcalls.def

+1
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ HANDLE_LIBCALL(FEGETMODE, "fegetmode")
304304
HANDLE_LIBCALL(FESETMODE, "fesetmode")
305305

306306
// Conversion
307+
HANDLE_LIBCALL(FPEXT_BF16_F32, "__extendbfsf2")
307308
HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
308309
HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq")
309310
HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")

llvm/include/llvm/Target/TargetSelectionDAG.td

+13
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,8 @@ def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
541541
def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
542542
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
543543
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
544+
def bf16_to_fp : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>;
545+
def fp_to_bf16 : SDNode<"ISD::FP_TO_BF16" , SDTFPToIntOp>;
544546

545547
def strict_fadd : SDNode<"ISD::STRICT_FADD",
546548
SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
@@ -620,6 +622,11 @@ def strict_f16_to_fp : SDNode<"ISD::STRICT_FP16_TO_FP",
620622
def strict_fp_to_f16 : SDNode<"ISD::STRICT_FP_TO_FP16",
621623
SDTFPToIntOp, [SDNPHasChain]>;
622624

625+
def strict_bf16_to_fp : SDNode<"ISD::STRICT_BF16_TO_FP",
626+
SDTIntToFPOp, [SDNPHasChain]>;
627+
def strict_fp_to_bf16 : SDNode<"ISD::STRICT_FP_TO_BF16",
628+
SDTFPToIntOp, [SDNPHasChain]>;
629+
623630
def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>;
624631
def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>;
625632

@@ -1591,6 +1598,12 @@ def any_f16_to_fp : PatFrags<(ops node:$src),
15911598
def any_fp_to_f16 : PatFrags<(ops node:$src),
15921599
[(fp_to_f16 node:$src),
15931600
(strict_fp_to_f16 node:$src)]>;
1601+
def any_bf16_to_fp : PatFrags<(ops node:$src),
1602+
[(bf16_to_fp node:$src),
1603+
(strict_bf16_to_fp node:$src)]>;
1604+
def any_fp_to_bf16 : PatFrags<(ops node:$src),
1605+
[(fp_to_bf16 node:$src),
1606+
(strict_fp_to_bf16 node:$src)]>;
15941607

15951608
multiclass binary_atomic_op_ord {
15961609
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+28-9
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
10471047
Node->getOperand(0).getValueType());
10481048
break;
10491049
case ISD::STRICT_FP_TO_FP16:
1050+
case ISD::STRICT_FP_TO_BF16:
10501051
case ISD::STRICT_SINT_TO_FP:
10511052
case ISD::STRICT_UINT_TO_FP:
10521053
case ISD::STRICT_LRINT:
@@ -3286,6 +3287,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
32863287
Results.push_back(Op);
32873288
break;
32883289
}
3290+
case ISD::STRICT_FP_TO_BF16:
3291+
// We don't support this expansion for now.
3292+
break;
32893293
case ISD::FP_TO_BF16: {
32903294
SDValue Op = Node->getOperand(0);
32913295
if (Op.getValueType() != MVT::f32)
@@ -3645,14 +3649,14 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
36453649
DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
36463650
}
36473651
break;
3652+
case ISD::STRICT_BF16_TO_FP:
36483653
case ISD::STRICT_FP16_TO_FP:
36493654
if (Node->getValueType(0) != MVT::f32) {
36503655
// We can extend to types bigger than f32 in two steps without changing
36513656
// the result. Since "f16 -> f32" is much more commonly available, give
36523657
// CodeGen the option of emitting that before resorting to a libcall.
3653-
SDValue Res =
3654-
DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other},
3655-
{Node->getOperand(0), Node->getOperand(1)});
3658+
SDValue Res = DAG.getNode(Node->getOpcode(), dl, {MVT::f32, MVT::Other},
3659+
{Node->getOperand(0), Node->getOperand(1)});
36563660
Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
36573661
{Node->getValueType(0), MVT::Other},
36583662
{Res.getValue(1), Res});
@@ -4651,6 +4655,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
46514655
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
46524656
}
46534657
break;
4658+
case ISD::STRICT_BF16_TO_FP:
4659+
if (Node->getValueType(0) == MVT::f32) {
4660+
TargetLowering::MakeLibCallOptions CallOptions;
4661+
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
4662+
DAG, RTLIB::FPEXT_BF16_F32, MVT::f32, Node->getOperand(1),
4663+
CallOptions, SDLoc(Node), Node->getOperand(0));
4664+
Results.push_back(Tmp.first);
4665+
Results.push_back(Tmp.second);
4666+
}
4667+
break;
46544668
case ISD::STRICT_FP16_TO_FP: {
46554669
if (Node->getValueType(0) == MVT::f32) {
46564670
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4792,12 +4806,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
47924806
break;
47934807
}
47944808
case ISD::STRICT_FP_EXTEND:
4795-
case ISD::STRICT_FP_TO_FP16: {
4796-
RTLIB::Libcall LC =
4797-
Node->getOpcode() == ISD::STRICT_FP_TO_FP16
4798-
? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16)
4799-
: RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
4800-
Node->getValueType(0));
4809+
case ISD::STRICT_FP_TO_FP16:
4810+
case ISD::STRICT_FP_TO_BF16: {
4811+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
4812+
if (Node->getOpcode() == ISD::STRICT_FP_TO_FP16)
4813+
LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
4814+
else if (Node->getOpcode() == ISD::STRICT_FP_TO_BF16)
4815+
LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::bf16);
4816+
else
4817+
LC = RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
4818+
Node->getValueType(0));
4819+
48014820
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
48024821

48034822
TargetLowering::MakeLibCallOptions CallOptions;

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

+27-14
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
918918
case ISD::STRICT_FP_TO_FP16:
919919
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
920920
case ISD::FP_TO_BF16:
921+
case ISD::STRICT_FP_TO_BF16:
921922
case ISD::STRICT_FP_ROUND:
922923
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
923924
case ISD::STRICT_FP_TO_SINT:
@@ -970,6 +971,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
970971
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
971972
N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
972973
N->getOpcode() == ISD::FP_TO_BF16 ||
974+
N->getOpcode() == ISD::STRICT_FP_TO_BF16 ||
973975
N->getOpcode() == ISD::STRICT_FP_ROUND);
974976

975977
bool IsStrict = N->isStrictFPOpcode();
@@ -980,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
980982
if (N->getOpcode() == ISD::FP_TO_FP16 ||
981983
N->getOpcode() == ISD::STRICT_FP_TO_FP16)
982984
FloatRVT = MVT::f16;
983-
else if (N->getOpcode() == ISD::FP_TO_BF16)
985+
else if (N->getOpcode() == ISD::FP_TO_BF16 ||
986+
N->getOpcode() == ISD::STRICT_FP_TO_BF16)
984987
FloatRVT = MVT::bf16;
985988

986989
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
@@ -2193,13 +2196,11 @@ static ISD::NodeType GetPromotionOpcodeStrict(EVT OpVT, EVT RetVT) {
21932196
if (RetVT == MVT::f16)
21942197
return ISD::STRICT_FP_TO_FP16;
21952198

2196-
if (OpVT == MVT::bf16) {
2197-
// TODO: return ISD::STRICT_BF16_TO_FP;
2198-
}
2199+
if (OpVT == MVT::bf16)
2200+
return ISD::STRICT_BF16_TO_FP;
21992201

2200-
if (RetVT == MVT::bf16) {
2201-
// TODO: return ISD::STRICT_FP_TO_BF16;
2202-
}
2202+
if (RetVT == MVT::bf16)
2203+
return ISD::STRICT_FP_TO_BF16;
22032204

22042205
report_fatal_error("Attempt at an invalid promotion-related conversion");
22052206
}
@@ -2999,10 +3000,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
29993000
EVT SVT = N->getOperand(0).getValueType();
30003001

30013002
if (N->isStrictFPOpcode()) {
3002-
assert(RVT == MVT::f16);
3003-
SDValue Res =
3004-
DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
3005-
{N->getOperand(0), N->getOperand(1)});
3003+
// FIXME: assume we only have two f16 variants for now.
3004+
unsigned Opcode;
3005+
if (RVT == MVT::f16)
3006+
Opcode = ISD::STRICT_FP_TO_FP16;
3007+
else if (RVT == MVT::bf16)
3008+
Opcode = ISD::STRICT_FP_TO_BF16;
3009+
else
3010+
llvm_unreachable("unknown half type");
3011+
SDValue Res = DAG.getNode(Opcode, SDLoc(N), {MVT::i16, MVT::Other},
3012+
{N->getOperand(0), N->getOperand(1)});
30063013
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
30073014
return Res;
30083015
}
@@ -3192,10 +3199,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
31923199
Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
31933200

31943201
if (IsStrict) {
3195-
assert(SVT == MVT::f16);
3202+
unsigned Opcode;
3203+
if (SVT == MVT::f16)
3204+
Opcode = ISD::STRICT_FP16_TO_FP;
3205+
else if (SVT == MVT::bf16)
3206+
Opcode = ISD::STRICT_BF16_TO_FP;
3207+
else
3208+
llvm_unreachable("unknown half type");
31963209
SDValue Res =
3197-
DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
3198-
{N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
3210+
DAG.getNode(Opcode, SDLoc(N), {N->getValueType(0), MVT::Other},
3211+
{N->getOperand(0), Op});
31993212
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
32003213
ReplaceValueWith(SDValue(N, 0), Res);
32013214
return SDValue();

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
165165
case ISD::FP_TO_FP16:
166166
Res = PromoteIntRes_FP_TO_FP16_BF16(N);
167167
break;
168+
case ISD::STRICT_FP_TO_BF16:
168169
case ISD::STRICT_FP_TO_FP16:
169170
Res = PromoteIntRes_STRICT_FP_TO_FP16_BF16(N);
170171
break;

llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
380380
case ISD::FP_TO_FP16: return "fp_to_fp16";
381381
case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
382382
case ISD::BF16_TO_FP: return "bf16_to_fp";
383+
case ISD::STRICT_BF16_TO_FP: return "strict_bf16_to_fp";
383384
case ISD::FP_TO_BF16: return "fp_to_bf16";
385+
case ISD::STRICT_FP_TO_BF16: return "strict_fp_to_bf16";
384386
case ISD::LROUND: return "lround";
385387
case ISD::STRICT_LROUND: return "strict_lround";
386388
case ISD::LLROUND: return "llround";

llvm/lib/CodeGen/TargetLoweringBase.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,9 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
307307
} else if (OpVT == MVT::f80) {
308308
if (RetVT == MVT::f128)
309309
return FPEXT_F80_F128;
310+
} else if (OpVT == MVT::bf16) {
311+
if (RetVT == MVT::f32)
312+
return FPEXT_BF16_F32;
310313
}
311314

312315
return UNKNOWN_LIBCALL;

llvm/lib/Target/X86/X86ISelLowering.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
424424
}
425425

426426
for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
427-
ISD::STRICT_FP_TO_FP16}) {
427+
ISD::STRICT_FP_TO_FP16, ISD::STRICT_FP_TO_BF16}) {
428428
// Special handling for half-precision floating point conversions.
429429
// If we don't have F16C support, then lower half float conversions
430430
// into library calls.
@@ -437,6 +437,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
437437
setOperationAction(Op, MVT::f128, Expand);
438438
}
439439

440+
// FIXME: If a target has F16C, it needs to be Custom.
441+
setOperationAction(ISD::STRICT_FP_TO_BF16, MVT::f32, Expand);
442+
setOperationAction(ISD::STRICT_FP_TO_BF16, MVT::f64, Expand);
443+
444+
setOperationAction(ISD::STRICT_BF16_TO_FP, MVT::f32, Expand);
445+
setOperationAction(ISD::STRICT_BF16_TO_FP, MVT::f64, Expand);
446+
440447
for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
441448
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
442449
setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);

0 commit comments

Comments
 (0)