diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 68f4ec5ef49f3..10c097e62b52c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1084,6 +1084,23 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } + // TODO: Could we merge some code with zvfhmin? + if (Subtarget.hasVInstructionsBF16()) { + for (MVT VT : BF16VecVTs) { + if (!isTypeLegal(VT)) + continue; + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR}, + VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + // TODO: Promote to fp32. + } + } + if (Subtarget.hasVInstructionsF32()) { for (MVT VT : F32VecVTs) { if (!isTypeLegal(VT)) @@ -1299,6 +1316,19 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, continue; } + if (VT.getVectorElementType() == MVT::bf16) { + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR}, + VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + // TODO: Promote to fp32. + continue; + } + // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, Custom); @@ -2558,6 +2588,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT, if (!Subtarget.hasVInstructionsF16Minimal()) return false; break; + case MVT::bf16: + if (!Subtarget.hasVInstructionsBF16()) + return false; + break; case MVT::f32: if (!Subtarget.hasVInstructionsF32()) return false; @@ -2609,6 +2643,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, case MVT::i16: case MVT::i32: case MVT::i64: + case MVT::bf16: case MVT::f16: case MVT::f32: case MVT::f64: { @@ -8057,8 +8092,10 @@ RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, // RVV can only widen/truncate fp to types double/half the size as the source. if ((VT.getVectorElementType() == MVT::f64 && - SrcVT.getVectorElementType() == MVT::f16) || - (VT.getVectorElementType() == MVT::f16 && + (SrcVT.getVectorElementType() == MVT::f16 || + SrcVT.getVectorElementType() == MVT::bf16)) || + ((VT.getVectorElementType() == MVT::f16 || + VT.getVectorElementType() == MVT::bf16) && SrcVT.getVectorElementType() == MVT::f64)) { // For double rounding, the intermediate rounding should be round-to-odd. unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND @@ -8102,9 +8139,12 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); - bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || - SrcVT.getVectorElementType() != MVT::f16); - bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || + bool IsDirectExtend = + IsExtend && (VT.getVectorElementType() != MVT::f64 || + (SrcVT.getVectorElementType() != MVT::f16 && + SrcVT.getVectorElementType() != MVT::bf16)); + bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 && + VT.getVectorElementType() != MVT::bf16) || SrcVT.getVectorElementType() != MVT::f64); bool IsDirectConv = IsDirectExtend || IsDirectTrunc; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index fc60a9cc7cd30..cbd77f5c48bc3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -355,24 +355,24 @@ defset list AllVectors = { V_M8, f64, FPR64>; } } -} -defset list AllBFloatVectors = { - defset list NoGroupBFloatVectors = { - defset list FractionalGroupBFloatVectors = { - def VBF16MF4: VTypeInfo; - def VBF16MF2: VTypeInfo; + defset list AllBFloatVectors = { + defset list NoGroupBFloatVectors = { + defset list FractionalGroupBFloatVectors = { + def VBF16MF4: VTypeInfo; + def VBF16MF2: VTypeInfo; + } + def VBF16M1: VTypeInfo; + } + + defset list GroupBFloatVectors = { + def VBF16M2: GroupVTypeInfo; + def VBF16M4: GroupVTypeInfo; + def VBF16M8: GroupVTypeInfo; } - def VBF16M1: VTypeInfo; - } - - defset list GroupBFloatVectors = { - def VBF16M2: GroupVTypeInfo; - def VBF16M4: GroupVTypeInfo; - def VBF16M8: GroupVTypeInfo; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index b4af83a3cbf67..714f8cff7b637 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1495,6 +1495,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { fvti.AVL, fvti.Log2SEW, TA_MA)>; } +foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + let Predicates = [HasVInstructionsBF16] in + def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))), + (!cast("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW) + (fvti.Vector (IMPLICIT_DEF)), + fwti.RegClass:$rs1, + // Value to indicate no rounding mode change in + // RISCVInsertReadWriteCSR + FRM_DYN, + fvti.AVL, fvti.Log2SEW, TA_MA)>; +} + //===----------------------------------------------------------------------===// // Vector Splats //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 6c6ecb604fd03..e10b8bf2767b8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2670,6 +2670,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { GPR:$vl, fvti.Log2SEW, TA_MA)>; } +foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + let Predicates = [HasVInstructionsBF16] in + def : Pat<(fwti.Vector (any_riscv_fpextend_vl + (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask V0), + VLOpFrag)), + (!cast("PseudoVFWCVTBF16_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, + (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW, TA_MA)>; +} + // 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions defm : VPatNConvertFP2IVL_W_RM; defm : VPatNConvertFP2IVL_W_RM; @@ -2714,6 +2728,22 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { } } +foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { + defvar fvti = fvtiToFWti.Vti; + defvar fwti = fvtiToFWti.Wti; + let Predicates = [HasVInstructionsBF16] in + def : Pat<(fvti.Vector (any_riscv_fpround_vl + (fwti.Vector fwti.RegClass:$rs1), + (fwti.Mask V0), VLOpFrag)), + (!cast("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, + (fwti.Mask V0), + // Value to indicate no rounding mode change in + // RISCVInsertReadWriteCSR + FRM_DYN, + GPR:$vl, fvti.Log2SEW, TA_MA)>; +} + // 14. Vector Reduction Operations // 14.1. Vector Single-Width Integer Reduction Instructions diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index e15e6452163b1..4f1fcfbe8cc5d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s define @extract_nxv8i32_nxv4i32_0( %vec) { ; CHECK-LABEL: extract_nxv8i32_nxv4i32_0: @@ -481,6 +481,60 @@ define @extract_nxv6f16_nxv12f16_6( %in) ret %res } +define @extract_nxv2bf16_nxv16bf16_0( %vec) { +; CHECK-LABEL: extract_nxv2bf16_nxv16bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %c = call @llvm.vector.extract.nxv2bf16.nxv16bf16( %vec, i64 0) + ret %c +} + +define @extract_nxv2bf16_nxv16bf16_2( %vec) { +; CHECK-LABEL: extract_nxv2bf16_nxv16bf16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: ret + %c = call @llvm.vector.extract.nxv2bf16.nxv16bf16( %vec, i64 2) + ret %c +} + +define @extract_nxv2bf16_nxv16bf16_4( %vec) { +; CHECK-LABEL: extract_nxv2bf16_nxv16bf16_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %c = call @llvm.vector.extract.nxv2bf16.nxv16bf16( %vec, i64 4) + ret %c +} + +define @extract_nxv6bf16_nxv12bf16_0( %in) { +; CHECK-LABEL: extract_nxv6bf16_nxv12bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.vector.extract.nxv6bf16.nxv12bf16( %in, i64 0) + ret %res +} + +define @extract_nxv6bf16_nxv12bf16_6( %in) { +; CHECK-LABEL: extract_nxv6bf16_nxv12bf16_6: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v13, v10, a0 +; CHECK-NEXT: vslidedown.vx v12, v9, a0 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v12, v10, a0 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %res = call @llvm.vector.extract.nxv6bf16.nxv12bf16( %in, i64 6) + ret %res +} + declare @llvm.vector.extract.nxv6f16.nxv12f16(, i64) declare @llvm.vector.extract.nxv1i8.nxv4i8( %vec, i64 %idx) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll index 51ac27acaf470..48cc3f17a6269 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s declare <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half>, <2 x i1>, i32) @@ -120,3 +120,53 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze %v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl) ret <32 x double> %v } + +declare <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat>, <2 x i1>, i32) + +define <2 x float> @vfpext_v2bf16_v2f32(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfpext_v2bf16_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl) + ret <2 x float> %v +} + +define <2 x float> @vfpext_v2bf16_v2f32_unmasked(<2 x bfloat> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfpext_v2bf16_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x float> %v +} + +declare <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat>, <2 x i1>, i32) + +define <2 x double> @vfpext_v2bf16_v2f64(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfpext_v2bf16_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl) + ret <2 x double> %v +} + +define <2 x double> @vfpext_v2bf16_v2f64_unmasked(<2 x bfloat> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfpext_v2bf16_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x double> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll index de11f9e8a9fa2..d890bf5412f9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32) @@ -122,3 +122,53 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 %v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl) ret <32 x float> %v } + +declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float>, <2 x i1>, i32) + +define <2 x bfloat> @vfptrunc_v2bf16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2bf16_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfptrunc_v2bf16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2bf16_v2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x bfloat> %v +} + +declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double>, <2 x i1>, i32) + +define <2 x bfloat> @vfptrunc_v2bf16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2bf16_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfptrunc_v2bf16_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_v2bf16_v2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x bfloat> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load-store.ll index 38aee567e2b5c..fbe8bcbc0d3c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s define void @v2i8(ptr %p, ptr %q) { ; CHECK-LABEL: v2i8: @@ -301,3 +301,15 @@ define void @v2i8_volatile_store(ptr %p, ptr %q) { store volatile <2 x i8> %v, ptr %q ret void } + +define void @v8bf16(ptr %p, ptr %q) { +; CHECK-LABEL: v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret + %v = load <8 x bfloat>, ptr %p + store <8 x bfloat> %v, ptr %q + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll index 791e6eb5ff300..d80d75d3d5d06 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s define <5 x i8> @load_v5i8(ptr %p) { ; CHECK-LABEL: load_v5i8: @@ -181,3 +181,13 @@ define <16 x i64> @exact_vlen_i64_m8(ptr %p) vscale_range(2,2) { %v = load <16 x i64>, ptr %p ret <16 x i64> %v } + +define <8 x bfloat> @load_v8bf16(ptr %p) { +; CHECK-LABEL: load_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %x = load <8 x bfloat>, ptr %p + ret <8 x bfloat> %x +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll index b747d73ce353e..6317a4977562a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s define void @store_v5i8(ptr %p, <5 x i8> %v) { ; CHECK-LABEL: store_v5i8: @@ -294,6 +294,16 @@ define void @exact_vlen_i64_m8(ptr %p) vscale_range(2,2) { ret void } +define void @store_v8bf16(ptr %p, <8 x bfloat> %v) { +; CHECK-LABEL: store_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store <8 x bfloat> %v, ptr %p + ret void +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV32: {{.*}} ; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll index b0e6a6a56051e..5d9076208988b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfpext-constrained-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) @@ -114,3 +114,78 @@ define <8 x double> @vfpext_v8f32_v8f64(<8 x float> %va) strictfp { %evec = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float> %va, metadata !"fpexcept.strict") ret <8 x double> %evec } + +declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2bf16(<2 x bfloat>, metadata) +define <2 x float> @vfpext_v2bf16_v2f32(<2 x bfloat> %va) strictfp { +; CHECK-LABEL: vfpext_v2bf16_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2bf16(<2 x bfloat> %va, metadata !"fpexcept.strict") + ret <2 x float> %evec +} + +declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2bf16(<2 x bfloat>, metadata) +define <2 x double> @vfpext_v2bf16_v2f64(<2 x bfloat> %va) strictfp { +; CHECK-LABEL: vfpext_v2bf16_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v9 +; CHECK-NEXT: ret + %evec = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2bf16(<2 x bfloat> %va, metadata !"fpexcept.strict") + ret <2 x double> %evec +} + +declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4bf16(<4 x bfloat>, metadata) +define <4 x float> @vfpext_v4bf16_v4f32(<4 x bfloat> %va) strictfp { +; CHECK-LABEL: vfpext_v4bf16_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4bf16(<4 x bfloat> %va, metadata !"fpexcept.strict") + ret <4 x float> %evec +} + +declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4bf16(<4 x bfloat>, metadata) +define <4 x double> @vfpext_v4bf16_v4f64(<4 x bfloat> %va) strictfp { +; CHECK-LABEL: vfpext_v4bf16_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v10 +; CHECK-NEXT: ret + %evec = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4bf16(<4 x bfloat> %va, metadata !"fpexcept.strict") + ret <4 x double> %evec +} + +declare <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8bf16(<8 x bfloat>, metadata) +define <8 x float> @vfpext_v8bf16_v8f32(<8 x bfloat> %va) strictfp { +; CHECK-LABEL: vfpext_v8bf16_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %evec = call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8bf16(<8 x bfloat> %va, metadata !"fpexcept.strict") + ret <8 x float> %evec +} + +declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8bf16(<8 x bfloat>, metadata) +define <8 x double> @vfpext_v8bf16_v8f64(<8 x bfloat> %va) strictfp { +; CHECK-LABEL: vfpext_v8bf16_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v12 +; CHECK-NEXT: ret + %evec = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8bf16(<8 x bfloat> %va, metadata !"fpexcept.strict") + ret <8 x double> %evec +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll index fd53113741de0..5781223a5326f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfptrunc-constrained-sdnode.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) @@ -118,3 +118,78 @@ define <8 x half> @vfptrunc_v8f32_v8f16(<8 x float> %va) strictfp { %evec = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(<8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x half> %evec } + +declare <2 x bfloat> @llvm.experimental.constrained.fptrunc.v2bf16.v2f64(<2 x double>, metadata, metadata) +define <2 x bfloat> @vfptrunc_v2f64_v2bf16(<2 x double> %va) strictfp { +; CHECK-LABEL: vfptrunc_v2f64_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %evec = call <2 x bfloat> @llvm.experimental.constrained.fptrunc.v2bf16.v2f64(<2 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x bfloat> %evec +} + +declare <2 x bfloat> @llvm.experimental.constrained.fptrunc.v2bf16.v2f32(<2 x float>, metadata, metadata) +define <2 x bfloat> @vfptrunc_v2f32_v2bf16(<2 x float> %va) strictfp { +; CHECK-LABEL: vfptrunc_v2f32_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call <2 x bfloat> @llvm.experimental.constrained.fptrunc.v2bf16.v2f32(<2 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x bfloat> %evec +} + +declare <4 x bfloat> @llvm.experimental.constrained.fptrunc.v4bf16.v4f64(<4 x double>, metadata, metadata) +define <4 x bfloat> @vfptrunc_v4f64_v4bf16(<4 x double> %va) strictfp { +; CHECK-LABEL: vfptrunc_v4f64_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %evec = call <4 x bfloat> @llvm.experimental.constrained.fptrunc.v4bf16.v4f64(<4 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x bfloat> %evec +} + +declare <4 x bfloat> @llvm.experimental.constrained.fptrunc.v4bf16.v4f32(<4 x float>, metadata, metadata) +define <4 x bfloat> @vfptrunc_v4f32_v4bf16(<4 x float> %va) strictfp { +; CHECK-LABEL: vfptrunc_v4f32_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call <4 x bfloat> @llvm.experimental.constrained.fptrunc.v4bf16.v4f32(<4 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x bfloat> %evec +} + +declare <8 x bfloat> @llvm.experimental.constrained.fptrunc.v8bf16.v8f64(<8 x double>, metadata, metadata) +define <8 x bfloat> @vfptrunc_v8f64_v8bf16(<8 x double> %va) strictfp { +; CHECK-LABEL: vfptrunc_v8f64_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %evec = call <8 x bfloat> @llvm.experimental.constrained.fptrunc.v8bf16.v8f64(<8 x double> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x bfloat> %evec +} + +declare <8 x bfloat> @llvm.experimental.constrained.fptrunc.v8bf16.v8f32(<8 x float>, metadata, metadata) +define <8 x bfloat> @vfptrunc_v8f32_v8bf16(<8 x float> %va) strictfp { +; CHECK-LABEL: vfptrunc_v8f32_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %evec = call <8 x bfloat> @llvm.experimental.constrained.fptrunc.v8bf16.v8f32(<8 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x bfloat> %evec +} diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll index b15896580d425..0cd4f423a9df6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s define @insert_nxv8i32_nxv4i32_0( %vec, %subvec) { ; CHECK-LABEL: insert_nxv8i32_nxv4i32_0: @@ -531,6 +531,65 @@ define @insert_insert_combine2( %subvec) { ret %outer } +define @insert_nxv32bf16_nxv2bf16_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv32bf16_nxv2bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2bf16.nxv32bf16( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv32bf16_nxv2bf16_2( %vec, %subvec) { +; CHECK-LABEL: insert_nxv32bf16_nxv2bf16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2bf16.nxv32bf16( %vec, %subvec, i64 2) + ret %v +} + +define @insert_nxv32bf16_nxv2bf16_26( %vec, %subvec) { +; CHECK-LABEL: insert_nxv32bf16_nxv2bf16_26: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v14, v16, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2bf16.nxv32bf16( %vec, %subvec, i64 26) + ret %v +} + +define @insert_nxv32bf16_undef_nxv1bf16_0( %subvec) { +; CHECK-LABEL: insert_nxv32bf16_undef_nxv1bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1bf16.nxv32bf16( undef, %subvec, i64 0) + ret %v +} + +define @insert_nxv32bf16_undef_nxv1bf16_26( %subvec) { +; CHECK-LABEL: insert_nxv32bf16_undef_nxv1bf16_26: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v14, v8, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1bf16.nxv32bf16( undef, %subvec, i64 26) + ret %v +} attributes #0 = { vscale_range(2,1024) } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll index 5de309757c6dc..8b49b720e851f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-constrained-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.experimental.constrained.fpext.nxv1f32.nxv1f16(, metadata) @@ -151,3 +151,103 @@ define @vfpext_nxv8f32_nxv8f64( %va) s %evec = call @llvm.experimental.constrained.fpext.nxv8f64.nxv8f32( %va, metadata !"fpexcept.strict") ret %evec } + +declare @llvm.experimental.constrained.fpext.nxv1f32.nxv1bf16(, metadata) +define @vfpext_nxv1bf16_nxv1f32( %va) strictfp { +; CHECK-LABEL: vfpext_nxv1bf16_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fpext.nxv1f32.nxv1bf16( %va, metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fpext.nxv1f64.nxv1bf16(, metadata) +define @vfpext_nxv1bf16_nxv1f64( %va) strictfp { +; CHECK-LABEL: vfpext_nxv1bf16_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fpext.nxv1f64.nxv1bf16( %va, metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fpext.nxv2f32.nxv2bf16(, metadata) +define @vfpext_nxv2bf16_nxv2f32( %va) strictfp { +; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fpext.nxv2f32.nxv2bf16( %va, metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fpext.nxv2f64.nxv2bf16(, metadata) +define @vfpext_nxv2bf16_nxv2f64( %va) strictfp { +; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v10 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fpext.nxv2f64.nxv2bf16( %va, metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fpext.nxv4f32.nxv4bf16(, metadata) +define @vfpext_nxv4bf16_nxv4f32( %va) strictfp { +; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fpext.nxv4f32.nxv4bf16( %va, metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fpext.nxv4f64.nxv4bf16(, metadata) +define @vfpext_nxv4bf16_nxv4f64( %va) strictfp { +; CHECK-LABEL: vfpext_nxv4bf16_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v12 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fpext.nxv4f64.nxv4bf16( %va, metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fpext.nxv8f32.nxv8bf16(, metadata) +define @vfpext_nxv8bf16_nxv8f32( %va) strictfp { +; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fpext.nxv8f32.nxv8bf16( %va, metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fpext.nxv8f64.nxv8bf16(, metadata) +define @vfpext_nxv8bf16_nxv8f64( %va) strictfp { +; CHECK-LABEL: vfpext_nxv8bf16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v16 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fpext.nxv8f64.nxv8bf16( %va, metadata !"fpexcept.strict") + ret %evec +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll index d805a103aafd3..b002b8e765668 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfpext_nxv1f16_nxv1f32( %va) { @@ -167,3 +167,115 @@ define @vfpext_nxv8f32_nxv8f64( %va) { %evec = fpext %va to ret %evec } + +define @vfpext_nxv1bf16_nxv1f32( %va) { +; +; CHECK-LABEL: vfpext_nxv1bf16_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} + +define @vfpext_nxv1bf16_nxv1f64( %va) { +; +; CHECK-LABEL: vfpext_nxv1bf16_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v9 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} + +define @vfpext_nxv2bf16_nxv2f32( %va) { +; +; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} + +define @vfpext_nxv2bf16_nxv2f64( %va) { +; +; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v10 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} + +define @vfpext_nxv4bf16_nxv4f32( %va) { +; +; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} + +define @vfpext_nxv4bf16_nxv4f64( %va) { +; +; CHECK-LABEL: vfpext_nxv4bf16_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v12 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} + +define @vfpext_nxv8bf16_nxv8f32( %va) { +; +; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} + +define @vfpext_nxv8bf16_nxv8f64( %va) { +; +; CHECK-LABEL: vfpext_nxv8bf16_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v16 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} + +define @vfpext_nxv16bf16_nxv16f32( %va) { +; +; CHECK-LABEL: vfpext_nxv16bf16_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %evec = fpext %va to + ret %evec +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll index 5cfa98916a2de..aaaf4ad460711 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fpext.nxv2f32.nxv2f16(, , i32) @@ -120,3 +120,54 @@ define @vfpext_nxv32f16_nxv32f32( %a, %v = call @llvm.vp.fpext.nxv32f32.nxv32f16( %a, %m, i32 %vl) ret %v } + +declare @llvm.vp.fpext.nxv2f32.nxv2bf16(, , i32) + +define @vfpext_nxv2bf16_nxv2f32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.fpext.nxv2f32.nxv2bf16( %a, %m, i32 %vl) + ret %v +} + +define @vfpext_nxv2bf16_nxv2f32_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.fpext.nxv2f32.nxv2bf16( %a, splat (i1 true), i32 %vl) + ret %v +} + +declare @llvm.vp.fpext.nxv2f64.nxv2bf16(, , i32) + +define @vfpext_nxv2bf16_nxv2f64( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.fpext.nxv2f64.nxv2bf16( %a, %m, i32 %vl) + ret %v +} + +define @vfpext_nxv2bf16_nxv2f64_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.fpext.nxv2f64.nxv2bf16( %a, splat (i1 true), i32 %vl) + ret %v +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll index 4404a275858f2..4341f45dd6c73 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-constrained-sdnode.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.experimental.constrained.fptrunc.nxv1f32.nxv1f64(, metadata, metadata) @@ -155,3 +155,103 @@ define @vfptrunc_nxv8f32_nxv8f16( %va) s %evec = call @llvm.experimental.constrained.fptrunc.nxv8f16.nxv8f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %evec } + +declare @llvm.experimental.constrained.fptrunc.nxv1bf16.nxv1f64(, metadata, metadata) +define @vfptrunc_nxv1f64_nxv1bf16( %va) strictfp { +; CHECK-LABEL: vfptrunc_nxv1f64_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv1bf16.nxv1f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv1bf16.nxv1f32(, metadata, metadata) +define @vfptrunc_nxv1f32_nxv1bf16( %va) strictfp { +; CHECK-LABEL: vfptrunc_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv1bf16.nxv1f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv2bf16.nxv2f64(, metadata, metadata) +define @vfptrunc_nxv2f64_nxv2bf16( %va) strictfp { +; CHECK-LABEL: vfptrunc_nxv2f64_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv2bf16.nxv2f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv2bf16.nxv2f32(, metadata, metadata) +define @vfptrunc_nxv2f32_nxv2bf16( %va) strictfp { +; CHECK-LABEL: vfptrunc_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv2bf16.nxv2f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv4bf16.nxv4f64(, metadata, metadata) +define @vfptrunc_nxv4f64_nxv4bf16( %va) strictfp { +; CHECK-LABEL: vfptrunc_nxv4f64_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv4bf16.nxv4f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv4bf16.nxv4f32(, metadata, metadata) +define @vfptrunc_nxv4f32_nxv4bf16( %va) strictfp { +; CHECK-LABEL: vfptrunc_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv4bf16.nxv4f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv8bf16.nxv8f64(, metadata, metadata) +define @vfptrunc_nxv8f64_nxv8bf16( %va) strictfp { +; CHECK-LABEL: vfptrunc_nxv8f64_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v16, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv8bf16.nxv8f64( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} + +declare @llvm.experimental.constrained.fptrunc.nxv8bf16.nxv8f32(, metadata, metadata) +define @vfptrunc_nxv8f32_nxv8bf16( %va) strictfp { +; CHECK-LABEL: vfptrunc_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %evec = call @llvm.experimental.constrained.fptrunc.nxv8bf16.nxv8f32( %va, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret %evec +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll index d715b46e95feb..9148a79cb7407 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfptrunc_nxv1f32_nxv1f16( %va) { @@ -167,3 +167,76 @@ define @vfptrunc_nxv8f64_nxv8f32( %va) %evec = fptrunc %va to ret %evec } + +define @vfptrunc_nxv1f32_nxv1bf16( %va) { +; +; CHECK-LABEL: vfptrunc_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = fptrunc %va to + ret %evec +} + +define @vfptrunc_nxv2f32_nxv2bf16( %va) { +; +; CHECK-LABEL: vfptrunc_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %evec = fptrunc %va to + ret %evec +} + +define @vfptrunc_nxv4f32_nxv4bf16( %va) { +; +; CHECK-LABEL: vfptrunc_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %evec = fptrunc %va to + ret %evec +} + +define @vfptrunc_nxv8f32_nxv8bf16( %va) { +; +; CHECK-LABEL: vfptrunc_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %evec = fptrunc %va to + ret %evec +} + +define @vfptrunc_nxv16f32_nxv16bf16( %va) { +; +; CHECK-LABEL: vfptrunc_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %evec = fptrunc %va to + ret %evec +} + +define @vfptrunc_nxv1f64_nxv1bf16( %va) { +; +; CHECK-LABEL: vfptrunc_nxv1f64_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: ret + %evec = fptrunc %va to + ret %evec +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index dd122f1f25110..0c3abe37af27a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fptrunc.nxv2f16.nxv2f32(, , i32) @@ -218,3 +218,53 @@ define @vfptrunc_nxv32f32_nxv32f64( %v = call @llvm.vp.fptrunc.nxv32f64.nxv32f32( %a, %m, i32 %vl) ret %v } + +declare @llvm.vp.fptrunc.nxv2bf16.nxv2f32(, , i32) + +define @vfptrunc_nxv2bf16_nxv2f32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptrunc.nxv2bf16.nxv2f32( %a, %m, i32 %vl) + ret %v +} + +define @vfptrunc_nxv2bf16_nxv2f32_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptrunc.nxv2bf16.nxv2f32( %a, splat (i1 true), i32 %vl) + ret %v +} + +declare @llvm.vp.fptrunc.nxv2bf16.nxv2f64(, , i32) + +define @vfptrunc_nxv2bf16_nxv2f64( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.fptrunc.nxv2bf16.nxv2f64( %a, %m, i32 %vl) + ret %v +} + +define @vfptrunc_nxv2bf16_nxv2f64_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptrunc.nxv2bf16.nxv2f64( %a, splat (i1 true), i32 %vl) + ret %v +}