@@ -7307,7 +7307,6 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
7307
7307
};
7308
7308
7309
7309
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7310
- NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
7311
7310
NEONMAP0(splat_lane_v),
7312
7311
NEONMAP0(splat_laneq_v),
7313
7312
NEONMAP0(splatq_lane_v),
@@ -7407,7 +7406,8 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7407
7406
NEONMAP0(vcvtq_f16_s16),
7408
7407
NEONMAP0(vcvtq_f16_u16),
7409
7408
NEONMAP0(vcvtq_f32_v),
7410
- NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
7409
+ NEONMAP0(vcvtq_high_bf16_f32),
7410
+ NEONMAP0(vcvtq_low_bf16_f32),
7411
7411
NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7412
7412
NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7413
7413
NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
@@ -7616,7 +7616,7 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7616
7616
NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7617
7617
NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7618
7618
NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7619
- NEONMAP1 (vcvth_bf16_f32, aarch64_neon_bfcvt, 0 ),
7619
+ NEONMAP0 (vcvth_bf16_f32),
7620
7620
NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7621
7621
NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7622
7622
NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
@@ -12083,6 +12083,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
12083
12083
return ConstantInt::get(Builder.getInt32Ty(), 0);
12084
12084
}
12085
12085
12086
+ if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
12087
+ return Builder.CreateFPTrunc(
12088
+ Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
12089
+ Builder.getFloatTy()),
12090
+ Builder.getBFloatTy());
12091
+
12086
12092
// Handle MSVC intrinsics before argument evaluation to prevent double
12087
12093
// evaluation.
12088
12094
if (std::optional<MSVCIntrin> MsvcIntId =
@@ -12808,6 +12814,35 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
12808
12814
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12809
12815
"vgetq_lane");
12810
12816
}
12817
+ case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12818
+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12819
+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12820
+ return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12821
+ }
12822
+ case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12823
+ SmallVector<int, 16> ConcatMask(8);
12824
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12825
+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12826
+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12827
+ llvm::Value *Trunc =
12828
+ Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12829
+ return Builder.CreateShuffleVector(
12830
+ Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12831
+ }
12832
+ case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12833
+ SmallVector<int, 16> ConcatMask(8);
12834
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12835
+ SmallVector<int, 16> LoMask(4);
12836
+ std::iota(LoMask.begin(), LoMask.end(), 0);
12837
+ llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12838
+ llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12839
+ llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
12840
+ llvm::Value *Inactive = Builder.CreateShuffleVector(
12841
+ Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12842
+ llvm::Value *Trunc =
12843
+ Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12844
+ return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12845
+ }
12811
12846
12812
12847
case clang::AArch64::BI_InterlockedAdd:
12813
12848
case clang::AArch64::BI_InterlockedAdd64: {
0 commit comments