diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index da496e30fbb52..90b1ec242e6ba 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -943,11 +943,6 @@ defm SVFCVTZS_S64_F16 : SInstCvtMXZ<"svcvt_s64[_f16]", "ddPO", "dPO", "l", "aar defm SVFCVTZS_S32_F32 : SInstCvtMXZ<"svcvt_s32[_f32]", "ddPM", "dPM", "i", "aarch64_sve_fcvtzs", [IsOverloadCvt]>; defm SVFCVTZS_S64_F32 : SInstCvtMXZ<"svcvt_s64[_f32]", "ddPM", "dPM", "l", "aarch64_sve_fcvtzs_i64f32">; -let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "ddPM", "dPM", "b", "aarch64_sve_fcvt_bf16f32">; - def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone, VerifyRuntimeMode]>; -} - // svcvt_s##_f64 defm SVFCVTZS_S32_F64 : SInstCvtMXZ<"svcvt_s32[_f64]", "ttPd", "tPd", "d", "aarch64_sve_fcvtzs_i32f64">; defm SVFCVTZS_S64_F64 : SInstCvtMXZ<"svcvt_s64[_f64]", "ddPN", "dPN", "l", "aarch64_sve_fcvtzs", [IsOverloadCvt]>; @@ -1003,19 +998,26 @@ defm SVFCVT_F32_F64 : SInstCvtMXZ<"svcvt_f32[_f64]", "MMPd", "MPd", "d", "aarc defm SVFCVT_F64_F16 : SInstCvtMXZ<"svcvt_f64[_f16]", "ddPO", "dPO", "d", "aarch64_sve_fcvt_f64f16">; defm SVFCVT_F64_F32 : SInstCvtMXZ<"svcvt_f64[_f32]", "ddPM", "dPM", "d", "aarch64_sve_fcvt_f64f32">; -let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -defm SVCVTLT_F32 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">; -defm SVCVTLT_F64 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">; +let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { +defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "$$Pd", "$Pd", "f", "aarch64_sve_fcvt_bf16f32_v2">; + +def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "$$Pd", "f", MergeOp1, "aarch64_sve_fcvtnt_bf16f32_v2", [IsOverloadNone, VerifyRuntimeMode]>; +// SVCVTNT_X_BF16_F32 : Implemented as macro by SveEmitter.cpp +} -defm SVCVTX_F32 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">; +let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { +defm SVCVTLT_F32_F16 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">; +defm SVCVTLT_F64_F32 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">; -def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; -// SVCVTNT_X : Implemented as macro by SveEmitter.cpp +defm SVCVTX_F32_F64 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">; -def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; -// SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp +def SVCVTNT_F16_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTNT_F32_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; +// SVCVTNT_X_F16_F32 : Implemented as macro by SveEmitter.cpp +// SVCVTNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp +def SVCVTXNT_F32_F64 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; +// SVCVTXNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c index 145d60db6eda3..cbeac2f384f9a 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c @@ -24,14 +24,14 @@ // CHECK-LABEL: @test_svcvt_bf16_f32_x( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, [[TMP0]], [[OP:%.*]]) // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_xu10__SVBool_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, [[TMP0]], [[OP:%.*]]) // CPP-CHECK-NEXT: ret [[TMP1]] // svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR { @@ -40,14 +40,14 @@ svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR { // CHECK-LABEL: @test_svcvt_bf16_f32_z( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( zeroinitializer, [[TMP0]], [[OP:%.*]]) // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_zu10__SVBool_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( zeroinitializer, [[TMP0]], [[OP:%.*]]) // CPP-CHECK-NEXT: ret [[TMP1]] // svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR { @@ -56,14 +56,14 @@ svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR { // CHECK-LABEL: @test_svcvt_bf16_f32_m( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_bf16_f32_mu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) // CPP-CHECK-NEXT: ret [[TMP1]] // svbfloat16_t test_svcvt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c index ce719f92674c0..9a92ddc448f19 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c @@ -24,14 +24,14 @@ // CHECK-LABEL: @test_svcvtnt_bf16_f32_x( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.bf16f32( [[EVEN:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.bf16f32.v2( [[EVEN:%.*]], [[TMP0]], [[OP:%.*]]) // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_xu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.bf16f32( [[EVEN:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.bf16f32.v2( [[EVEN:%.*]], [[TMP0]], [[OP:%.*]]) // CPP-CHECK-NEXT: ret [[TMP1]] // svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR { @@ -40,14 +40,14 @@ svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t // CHECK-LABEL: @test_svcvtnt_bf16_f32_m( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.bf16f32( [[EVEN:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.bf16f32.v2( [[EVEN:%.*]], [[TMP0]], [[OP:%.*]]) // CHECK-NEXT: ret [[TMP1]] // // CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_mu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.bf16f32( [[EVEN:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fcvtnt.bf16f32.v2( [[EVEN:%.*]], [[TMP0]], [[OP:%.*]]) // CPP-CHECK-NEXT: ret [[TMP1]] // svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index cd9e66b10d1de..594069c619ceb 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2184,8 +2184,8 @@ def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT; def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT; -def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT; -def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT; +def int_aarch64_sve_fcvt_bf16f32_v2 : Builtin_SVCVT; +def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT; def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT; def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 6f833acd6dbc0..247e17625e386 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -846,6 +846,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, return false; // No other 'aarch64.sve.bf*'. } + // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32' + if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") { + NewFn = nullptr; + return true; + } + if (Name.consume_front("addqv")) { // 'aarch64.sve.addqv'. if (!F->getReturnType()->isFPOrFPVectorTy()) @@ -4072,6 +4078,35 @@ static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, return Rep; } +static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, + Function *F, IRBuilder<> &Builder) { + Intrinsic::ID NewID = + StringSwitch(Name) + .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2) + .Case("sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2) + .Default(Intrinsic::not_intrinsic); + if (NewID == Intrinsic::not_intrinsic) + llvm_unreachable("Unhandled Intrinsic!"); + + SmallVector Args(CI->args()); + + // The original intrinsics incorrectly used a predicate based on the smallest + // element type rather than the largest. + Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8); + Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4); + + if (Args[1]->getType() != BadPredTy) + llvm_unreachable("Unexpected predicate type!"); + + Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, + BadPredTy, Args[1]); + Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, + GoodPredTy, Args[1]); + + Function *NewF = Intrinsic::getDeclaration(CI->getModule(), NewID); + return Builder.CreateCall(NewF, Args, CI->getName()); +} + static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder) { if (Name == "mve.vctp64.old") { @@ -4325,6 +4360,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { bool IsX86 = Name.consume_front("x86."); bool IsNVVM = Name.consume_front("nvvm."); + bool IsAArch64 = Name.consume_front("aarch64."); bool IsARM = Name.consume_front("arm."); bool IsAMDGCN = Name.consume_front("amdgcn."); bool IsDbg = Name.consume_front("dbg."); @@ -4336,6 +4372,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder); } else if (IsX86) { Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder); + } else if (IsAArch64) { + Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder); } else if (IsARM) { Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder); } else if (IsAMDGCN) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8c89fb991b0b9..fa79bda9ea3f9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5549,10 +5549,17 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) { "Only expect to cast between legal scalable predicate types!"); // Return the operand if the cast isn't changing type, - // e.g. -> if (InVT == VT) return Op; + // Look through casts to when their input has more lanes + // than VT. This will increase the chances of removing casts that introduce + // new lanes, which have to be explicitly zero'd. + if (Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool && + Op.getOperand(1).getValueType().bitsGT(VT)) + Op = Op.getOperand(1); + SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op); // We only have to zero the lanes if new lanes are being defined, e.g. when diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 7240f6a22a87b..76362768e0aa6 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2425,8 +2425,8 @@ let Predicates = [HasBF16, HasSVEorSME] in { defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>; defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>; defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>; - defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32, AArch64fcvtr_mt>; - defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>; + defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>; + defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>; } // End HasBF16, HasSVEorSME let Predicates = [HasSVEorSME] in { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 84aea83424e4d..80d5168ae961a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2157,7 +2157,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, switch (IID) { default: break; - case Intrinsic::aarch64_sve_fcvt_bf16f32: + case Intrinsic::aarch64_sve_fcvt_bf16f32_v2: case Intrinsic::aarch64_sve_fcvt_f16f32: case Intrinsic::aarch64_sve_fcvt_f16f64: case Intrinsic::aarch64_sve_fcvt_f32f16: @@ -2188,7 +2188,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ucvtf_f32i64: case Intrinsic::aarch64_sve_ucvtf_f64i32: return instCombineSVEAllOrNoActiveUnary(IC, II); - case Intrinsic::aarch64_sve_fcvtnt_bf16f32: + case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2: case Intrinsic::aarch64_sve_fcvtnt_f16f32: case Intrinsic::aarch64_sve_fcvtnt_f32f64: case Intrinsic::aarch64_sve_fcvtxnt_f32f64: diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 0bfac6465a1f3..13c2a90a963f8 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -8811,7 +8811,7 @@ multiclass sve_bfloat_convert { def NAME : sve_bfloat_convert; - def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; def : SVE_1_Op_Passthru_Round_Pat(NAME)>; def : SVE_1_Op_Passthru_Round_Pat(NAME)>; } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll index 7d1e63e23a2ce..7f352041ec587 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll @@ -237,10 +237,19 @@ define @bfmmla_f32( %a, @fcvt_bf16_f32( %a, %pg, %b) nounwind { +define @fcvt_bf16_f32( %a, %pg, %b) nounwind { ; CHECK-LABEL: fcvt_bf16_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.bf16f32.v2( %a, %pg, %b) + ret %out +} + +define @fcvt_bf16_f32_deprecated( %a, %pg, %b) nounwind { +; CHECK-LABEL: fcvt_bf16_f32_deprecated: +; CHECK: // %bb.0: +; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcvt.bf16f32( %a, %pg, %b) ret %out @@ -250,10 +259,19 @@ define @fcvt_bf16_f32( %a, @fcvtnt_bf16_f32( %a, %pg, %b) nounwind { +define @fcvtnt_bf16_f32( %a, %pg, %b) nounwind { ; CHECK-LABEL: fcvtnt_bf16_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.bf16f32.v2( %a, %pg, %b) + ret %out +} + +define @fcvtnt_bf16_f32_deprecated( %a, %pg, %b) nounwind { +; CHECK-LABEL: fcvtnt_bf16_f32_deprecated: +; CHECK: // %bb.0: +; CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.fcvtnt.bf16f32( %a, %pg, %b) ret %out @@ -267,4 +285,6 @@ declare @llvm.aarch64.sve.bfmlalt(, @llvm.aarch64.sve.bfmlalt.lane.v2(, , , i32) declare @llvm.aarch64.sve.bfmmla(, , ) declare @llvm.aarch64.sve.fcvt.bf16f32(, , ) +declare @llvm.aarch64.sve.fcvt.bf16f32.v2(, , ) declare @llvm.aarch64.sve.fcvtnt.bf16f32(, , ) +declare @llvm.aarch64.sve.fcvtnt.bf16f32.v2(, , ) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll index 04550156be30b..930fdfc136565 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll @@ -5,36 +5,36 @@ target triple = "aarch64-unknown-linux-gnu" define @test_fcvt_bf16_f32_undef( %a, %b) { ; CHECK-LABEL: define @test_fcvt_bf16_f32_undef( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, [[PG]], [[B]]) ; CHECK-NEXT: ret [[OUT]] ; - %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.fcvt.bf16f32( undef, %pg, %b) + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, %pg, %b) ret %out } define @test_fcvt_bf16_f32_poison( %a, %b) { ; CHECK-LABEL: define @test_fcvt_bf16_f32_poison( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32( poison, [[PG]], [[B]]) +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32.v2( poison, [[PG]], [[B]]) ; CHECK-NEXT: ret [[OUT]] ; - %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.fcvt.bf16f32( poison, %pg, %b) + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.bf16f32.v2( poison, %pg, %b) ret %out } define @test_fcvt_bf16_f32( %a, %b) { ; CHECK-LABEL: define @test_fcvt_bf16_f32( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, [[PG]], [[B]]) ; CHECK-NEXT: ret [[OUT]] ; - %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.fcvt.bf16f32( %a, %pg, %b) + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvt.bf16f32.v2( %a, %pg, %b) ret %out } @@ -137,12 +137,12 @@ define @test_fcvtlt_f64_f32( %a, @test_fcvtnt_bf16_f32( %a, %b) { ; CHECK-LABEL: define @test_fcvtnt_bf16_f32( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32( [[A]], [[PG]], [[B]]) +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32.v2( [[A]], [[PG]], [[B]]) ; CHECK-NEXT: ret [[OUT]] ; - %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %out = call @llvm.aarch64.sve.fcvtnt.bf16f32( %a, %pg, %b) + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.fcvtnt.bf16f32.v2( %a, %pg, %b) ret %out } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll index 9b1528eda8ffd..4f6f839b3d64f 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll @@ -7,7 +7,7 @@ define @test_fcvt_bf16_f32( %a, [[A:%.*]], [[B:%.*]]) { ; CHECK-NEXT: ret [[A]] ; - %out = call @llvm.aarch64.sve.fcvt.bf16f32( %a, zeroinitializer, %b) + %out = call @llvm.aarch64.sve.fcvt.bf16f32.v2( %a, zeroinitializer, %b) ret %out } @@ -88,7 +88,7 @@ define @test_fcvtnt_bf16_f32( %a, [[A:%.*]], [[B:%.*]]) { ; CHECK-NEXT: ret [[A]] ; - %out = call @llvm.aarch64.sve.fcvtnt.bf16f32( %a, zeroinitializer, %b) + %out = call @llvm.aarch64.sve.fcvtnt.bf16f32.v2( %a, zeroinitializer, %b) ret %out }