Skip to content

Commit 639ca43

Browse files
committed
Add f8E4M3 IEEE 754 type to llvm
1 parent 2628a5f commit 639ca43

File tree

5 files changed

+96
-3
lines changed

5 files changed

+96
-3
lines changed

clang/include/clang/AST/Stmt.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,10 @@ class alignas(void *) Stmt {
460460
unsigned : NumExprBits;
461461

462462
static_assert(
463-
llvm::APFloat::S_MaxSemantics < 16,
464-
"Too many Semantics enum values to fit in bitfield of size 4");
463+
llvm::APFloat::S_MaxSemantics < 32,
464+
"Too many Semantics enum values to fit in bitfield of size 5");
465465
LLVM_PREFERRED_TYPE(llvm::APFloat::Semantics)
466-
unsigned Semantics : 4; // Provides semantics for APFloat construction
466+
unsigned Semantics : 5; // Provides semantics for APFloat construction
467467
LLVM_PREFERRED_TYPE(bool)
468468
unsigned IsExact : 1;
469469
};

clang/lib/AST/MicrosoftMangle.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,7 @@ void MicrosoftCXXNameMangler::mangleFloat(llvm::APFloat Number) {
946946
case APFloat::S_IEEEquad: Out << 'Y'; break;
947947
case APFloat::S_PPCDoubleDouble: Out << 'Z'; break;
948948
case APFloat::S_Float8E5M2:
949+
case APFloat::S_Float8E4M3:
949950
case APFloat::S_Float8E4M3FN:
950951
case APFloat::S_Float8E5M2FNUZ:
951952
case APFloat::S_Float8E4M3FNUZ:

llvm/include/llvm/ADT/APFloat.h

+6
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,9 @@ struct APFloatBase {
166166
// This format's exponent bias is 16, instead of the 15 (2 ** (5 - 1) - 1)
167167
// that IEEE precedent would imply.
168168
S_Float8E5M2FNUZ,
169+
// 8-bit floating point number following IEEE-754 conventions with bit
170+
// layout S1E4M3.
171+
S_Float8E4M3,
169172
// 8-bit floating point number mostly following IEEE-754 conventions with
170173
// bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
171174
// Unlike IEEE-754 types, there are no infinity values, and NaN is
@@ -217,6 +220,7 @@ struct APFloatBase {
217220
static const fltSemantics &PPCDoubleDouble() LLVM_READNONE;
218221
static const fltSemantics &Float8E5M2() LLVM_READNONE;
219222
static const fltSemantics &Float8E5M2FNUZ() LLVM_READNONE;
223+
static const fltSemantics &Float8E4M3() LLVM_READNONE;
220224
static const fltSemantics &Float8E4M3FN() LLVM_READNONE;
221225
static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE;
222226
static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE;
@@ -638,6 +642,7 @@ class IEEEFloat final : public APFloatBase {
638642
APInt convertPPCDoubleDoubleAPFloatToAPInt() const;
639643
APInt convertFloat8E5M2APFloatToAPInt() const;
640644
APInt convertFloat8E5M2FNUZAPFloatToAPInt() const;
645+
APInt convertFloat8E4M3APFloatToAPInt() const;
641646
APInt convertFloat8E4M3FNAPFloatToAPInt() const;
642647
APInt convertFloat8E4M3FNUZAPFloatToAPInt() const;
643648
APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const;
@@ -656,6 +661,7 @@ class IEEEFloat final : public APFloatBase {
656661
void initFromPPCDoubleDoubleAPInt(const APInt &api);
657662
void initFromFloat8E5M2APInt(const APInt &api);
658663
void initFromFloat8E5M2FNUZAPInt(const APInt &api);
664+
void initFromFloat8E4M3APInt(const APInt &api);
659665
void initFromFloat8E4M3FNAPInt(const APInt &api);
660666
void initFromFloat8E4M3FNUZAPInt(const APInt &api);
661667
void initFromFloat8E4M3B11FNUZAPInt(const APInt &api);

llvm/lib/Support/APFloat.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
136136
static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
137137
static constexpr fltSemantics semFloat8E5M2FNUZ = {
138138
15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139+
static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
139140
static constexpr fltSemantics semFloat8E4M3FN = {
140141
8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
141142
static constexpr fltSemantics semFloat8E4M3FNUZ = {
@@ -208,6 +209,8 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
208209
return Float8E5M2();
209210
case S_Float8E5M2FNUZ:
210211
return Float8E5M2FNUZ();
212+
case S_Float8E4M3:
213+
return Float8E4M3();
211214
case S_Float8E4M3FN:
212215
return Float8E4M3FN();
213216
case S_Float8E4M3FNUZ:
@@ -246,6 +249,8 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
246249
return S_Float8E5M2;
247250
else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
248251
return S_Float8E5M2FNUZ;
252+
else if (&Sem == &llvm::APFloat::Float8E4M3())
253+
return S_Float8E4M3;
249254
else if (&Sem == &llvm::APFloat::Float8E4M3FN())
250255
return S_Float8E4M3FN;
251256
else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
@@ -276,6 +281,7 @@ const fltSemantics &APFloatBase::PPCDoubleDouble() {
276281
}
277282
const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
278283
const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
284+
const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
279285
const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
280286
const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
281287
const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
@@ -3617,6 +3623,11 @@ APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
36173623
return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
36183624
}
36193625

3626+
APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3627+
assert(partCount() == 1);
3628+
return convertIEEEFloatToAPInt<semFloat8E4M3>();
3629+
}
3630+
36203631
APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
36213632
assert(partCount() == 1);
36223633
return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
@@ -3681,6 +3692,9 @@ APInt IEEEFloat::bitcastToAPInt() const {
36813692
if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
36823693
return convertFloat8E5M2FNUZAPFloatToAPInt();
36833694

3695+
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3696+
return convertFloat8E4M3APFloatToAPInt();
3697+
36843698
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
36853699
return convertFloat8E4M3FNAPFloatToAPInt();
36863700

@@ -3902,6 +3916,10 @@ void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
39023916
initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
39033917
}
39043918

3919+
void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3920+
initFromIEEEAPInt<semFloat8E4M3>(api);
3921+
}
3922+
39053923
void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
39063924
initFromIEEEAPInt<semFloat8E4M3FN>(api);
39073925
}
@@ -3951,6 +3969,8 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
39513969
return initFromFloat8E5M2APInt(api);
39523970
if (Sem == &semFloat8E5M2FNUZ)
39533971
return initFromFloat8E5M2FNUZAPInt(api);
3972+
if (Sem == &semFloat8E4M3)
3973+
return initFromFloat8E4M3APInt(api);
39543974
if (Sem == &semFloat8E4M3FN)
39553975
return initFromFloat8E4M3FNAPInt(api);
39563976
if (Sem == &semFloat8E4M3FNUZ)

llvm/unittests/ADT/APFloatTest.cpp

+66
Original file line numberDiff line numberDiff line change
@@ -2133,6 +2133,8 @@ TEST(APFloatTest, getZero) {
21332133
{&APFloat::Float8E5M2(), true, true, {0x80ULL, 0}, 1},
21342134
{&APFloat::Float8E5M2FNUZ(), false, false, {0, 0}, 1},
21352135
{&APFloat::Float8E5M2FNUZ(), true, false, {0, 0}, 1},
2136+
{&APFloat::Float8E4M3(), false, true, {0, 0}, 1},
2137+
{&APFloat::Float8E4M3(), true, true, {0x80ULL, 0}, 1},
21362138
{&APFloat::Float8E4M3FN(), false, true, {0, 0}, 1},
21372139
{&APFloat::Float8E4M3FN(), true, true, {0x80ULL, 0}, 1},
21382140
{&APFloat::Float8E4M3FNUZ(), false, false, {0, 0}, 1},
@@ -6532,6 +6534,34 @@ TEST(APFloatTest, Float8E5M2ToDouble) {
65326534
EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
65336535
}
65346536

6537+
TEST(APFloatTest, Float8E4M3ToDouble) {
6538+
APFloat One(APFloat::Float8E4M3(), "1.0");
6539+
EXPECT_EQ(1.0, One.convertToDouble());
6540+
APFloat Two(APFloat::Float8E4M3(), "2.0");
6541+
EXPECT_EQ(2.0, Two.convertToDouble());
6542+
APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3(), false);
6543+
EXPECT_EQ(240.0F, PosLargest.convertToDouble());
6544+
APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3(), true);
6545+
EXPECT_EQ(-240.0F, NegLargest.convertToDouble());
6546+
APFloat PosSmallest =
6547+
APFloat::getSmallestNormalized(APFloat::Float8E4M3(), false);
6548+
EXPECT_EQ(0x1.p-6, PosSmallest.convertToDouble());
6549+
APFloat NegSmallest =
6550+
APFloat::getSmallestNormalized(APFloat::Float8E4M3(), true);
6551+
EXPECT_EQ(-0x1.p-6, NegSmallest.convertToDouble());
6552+
6553+
APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3(), false);
6554+
EXPECT_TRUE(SmallestDenorm.isDenormal());
6555+
EXPECT_EQ(0x1.p-9, SmallestDenorm.convertToDouble());
6556+
6557+
APFloat PosInf = APFloat::getInf(APFloat::Float8E4M3());
6558+
EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble());
6559+
APFloat NegInf = APFloat::getInf(APFloat::Float8E4M3(), true);
6560+
EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble());
6561+
APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3());
6562+
EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
6563+
}
6564+
65356565
TEST(APFloatTest, Float8E4M3FNToDouble) {
65366566
APFloat One(APFloat::Float8E4M3FN(), "1.0");
65376567
EXPECT_EQ(1.0, One.convertToDouble());
@@ -6846,6 +6876,42 @@ TEST(APFloatTest, Float8E5M2ToFloat) {
68466876
EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
68476877
}
68486878

6879+
TEST(APFloatTest, Float8E4M3ToFloat) {
6880+
APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3());
6881+
APFloat PosZeroToFloat(PosZero.convertToFloat());
6882+
EXPECT_TRUE(PosZeroToFloat.isPosZero());
6883+
APFloat NegZero = APFloat::getZero(APFloat::Float8E4M3(), true);
6884+
APFloat NegZeroToFloat(NegZero.convertToFloat());
6885+
EXPECT_TRUE(NegZeroToFloat.isNegZero());
6886+
6887+
APFloat One(APFloat::Float8E4M3(), "1.0");
6888+
EXPECT_EQ(1.0F, One.convertToFloat());
6889+
APFloat Two(APFloat::Float8E4M3(), "2.0");
6890+
EXPECT_EQ(2.0F, Two.convertToFloat());
6891+
6892+
APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3(), false);
6893+
EXPECT_EQ(240.0F, PosLargest.convertToFloat());
6894+
APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3(), true);
6895+
EXPECT_EQ(-240.0F, NegLargest.convertToFloat());
6896+
APFloat PosSmallest =
6897+
APFloat::getSmallestNormalized(APFloat::Float8E4M3(), false);
6898+
EXPECT_EQ(0x1.p-6, PosSmallest.convertToFloat());
6899+
APFloat NegSmallest =
6900+
APFloat::getSmallestNormalized(APFloat::Float8E4M3(), true);
6901+
EXPECT_EQ(-0x1.p-6, NegSmallest.convertToFloat());
6902+
6903+
APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3(), false);
6904+
EXPECT_TRUE(SmallestDenorm.isDenormal());
6905+
EXPECT_EQ(0x1.p-9, SmallestDenorm.convertToFloat());
6906+
6907+
APFloat PosInf = APFloat::getInf(APFloat::Float8E4M3());
6908+
EXPECT_EQ(std::numeric_limits<float>::infinity(), PosInf.convertToFloat());
6909+
APFloat NegInf = APFloat::getInf(APFloat::Float8E4M3(), true);
6910+
EXPECT_EQ(-std::numeric_limits<float>::infinity(), NegInf.convertToFloat());
6911+
APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3());
6912+
EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
6913+
}
6914+
68496915
TEST(APFloatTest, Float8E4M3FNToFloat) {
68506916
APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3FN());
68516917
APFloat PosZeroToFloat(PosZero.convertToFloat());

0 commit comments

Comments
 (0)