From a8e525d83671087ab3c17f62ff9d635b65e73e5a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 27 Sep 2024 16:52:07 +0800 Subject: [PATCH 1/3] [RISCV] Lower insert_vector_elt on zvfhmin/zvfbfmin This is the dual of #110144, but doesn't handle the case when the scalar type is illegal i.e. no zfhmin/zfbfmin. It looks like softening isn't yet implemented for insert_vector_elt operands and it will crash during type legalization, so I've left that configuration out of the tests. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 22 +- llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 584 ++++++++++++++++---- 2 files changed, 497 insertions(+), 109 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b70884d49642c..41c55f69e3388 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1076,9 +1076,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom); - setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, - ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_DEINTERLEAVE, - ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE}, + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS, + ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, + ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE, + ISD::VECTOR_REVERSE}, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) @@ -8756,8 +8757,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VecVT = Op.getSimpleValueType(); + MVT XLenVT = Subtarget.getXLenVT(); SDValue Vec = Op.getOperand(0); SDValue Val = Op.getOperand(1); + MVT ValVT = Val.getSimpleValueType(); SDValue Idx = Op.getOperand(2); if (VecVT.getVectorElementType() == MVT::i1) { @@ -8769,6 +8772,17 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec); } + if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || + ValVT == MVT::bf16) { + // If we don't have vfmv.s.f for f16/bf16, insert into fmv.x.h first + MVT IntVT = VecVT.changeTypeToInteger(); + // SDValue IntVal = DAG.getBitcast(IntVT.getVectorElementType(), Val); + SDValue IntInsert = DAG.getNode( + ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec), + DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx); + return DAG.getBitcast(VecVT, IntInsert); + } + MVT ContainerVT = VecVT; // If the operand is a fixed-length vector, convert to a scalable one. if (VecVT.isFixedLengthVector()) { @@ -8812,8 +8826,6 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, AlignedIdx); } - MVT XLenVT = Subtarget.getXLenVT(); - bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; // Even i64-element vectors on RV32 can be lowered without scalar // legalization if the most-significant 32 bits of the value are not affected diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll index 8cfa88e6f9569..607e0085c3f46 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -1,209 +1,585 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zfbfmin,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -define @insertelt_nxv1f16_0( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv1f16_0: +define @insertelt_nxv1bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv1bf16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 0 - ret %r + %r = insertelement %v, bfloat %elt, i32 0 + ret %r } -define @insertelt_nxv1f16_imm( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv1f16_imm: +define @insertelt_nxv1bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv1bf16_imm: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 3 - ret %r + %r = insertelement %v, bfloat %elt, i32 3 + ret %r } -define @insertelt_nxv1f16_idx( %v, half %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv1f16_idx: +define @insertelt_nxv1bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv1bf16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.x.h a2, fa0 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 %idx - ret %r + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r } -define @insertelt_nxv2f16_0( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv2f16_0: +define @insertelt_nxv2bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv2bf16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 0 - ret %r + %r = insertelement %v, bfloat %elt, i32 0 + ret %r } -define @insertelt_nxv2f16_imm( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv2f16_imm: +define @insertelt_nxv2bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv2bf16_imm: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 3 - ret %r + %r = insertelement %v, bfloat %elt, i32 3 + ret %r } -define @insertelt_nxv2f16_idx( %v, half %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv2f16_idx: +define @insertelt_nxv2bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv2bf16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.x.h a2, fa0 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 %idx - ret %r + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r } -define @insertelt_nxv4f16_0( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv4f16_0: +define @insertelt_nxv4bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv4bf16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 0 - ret %r + %r = insertelement %v, bfloat %elt, i32 0 + ret %r } -define @insertelt_nxv4f16_imm( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv4f16_imm: +define @insertelt_nxv4bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv4bf16_imm: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 3 - ret %r + %r = insertelement %v, bfloat %elt, i32 3 + ret %r } -define @insertelt_nxv4f16_idx( %v, half %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv4f16_idx: +define @insertelt_nxv4bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv4bf16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.x.h a2, fa0 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 %idx - ret %r + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r } -define @insertelt_nxv8f16_0( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv8f16_0: +define @insertelt_nxv8bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv8bf16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 0 - ret %r + %r = insertelement %v, bfloat %elt, i32 0 + ret %r } -define @insertelt_nxv8f16_imm( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv8f16_imm: +define @insertelt_nxv8bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv8bf16_imm: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 3 - ret %r + %r = insertelement %v, bfloat %elt, i32 3 + ret %r } -define @insertelt_nxv8f16_idx( %v, half %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv8f16_idx: +define @insertelt_nxv8bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv8bf16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 %idx - ret %r + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r } -define @insertelt_nxv16f16_0( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv16f16_0: +define @insertelt_nxv16bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv16bf16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 0 - ret %r + %r = insertelement %v, bfloat %elt, i32 0 + ret %r } -define @insertelt_nxv16f16_imm( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv16f16_imm: +define @insertelt_nxv16bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv16bf16_imm: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 3 - ret %r + %r = insertelement %v, bfloat %elt, i32 3 + ret %r } -define @insertelt_nxv16f16_idx( %v, half %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv16f16_idx: +define @insertelt_nxv16bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv16bf16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v12, a1 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 %idx - ret %r + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r } -define @insertelt_nxv32f16_0( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv32f16_0: +define @insertelt_nxv32bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv32bf16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 0 - ret %r + %r = insertelement %v, bfloat %elt, i32 0 + ret %r } -define @insertelt_nxv32f16_imm( %v, half %elt) { -; CHECK-LABEL: insertelt_nxv32f16_imm: +define @insertelt_nxv32bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv32bf16_imm: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret - %r = insertelement %v, half %elt, i32 3 - ret %r + %r = insertelement %v, bfloat %elt, i32 3 + ret %r } -define @insertelt_nxv32f16_idx( %v, half %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv32f16_idx: +define @insertelt_nxv32bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv32bf16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v16, a1 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r +} + +define @insertelt_nxv1f16_0( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv1f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 0 + ret %r +} + +define @insertelt_nxv1f16_imm( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv1f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 3 + ret %r +} + +define @insertelt_nxv1f16_idx( %v, half %elt, i32 zeroext %idx) { +; ZVFH-LABEL: insertelt_nxv1f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv1f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 %idx + ret %r +} + +define @insertelt_nxv2f16_0( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv2f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 0 + ret %r +} + +define @insertelt_nxv2f16_imm( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv2f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 3 + ret %r +} + +define @insertelt_nxv2f16_idx( %v, half %elt, i32 zeroext %idx) { +; ZVFH-LABEL: insertelt_nxv2f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv2f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 %idx + ret %r +} + +define @insertelt_nxv4f16_0( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv4f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 0 + ret %r +} + +define @insertelt_nxv4f16_imm( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv4f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vslideup.vi v8, v9, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 3 + ret %r +} + +define @insertelt_nxv4f16_idx( %v, half %elt, i32 zeroext %idx) { +; ZVFH-LABEL: insertelt_nxv4f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v9, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv4f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a2 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 %idx + ret %r +} + +define @insertelt_nxv8f16_0( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv8f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 0 + ret %r +} + +define @insertelt_nxv8f16_imm( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv8f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v10, fa0 +; ZVFH-NEXT: vslideup.vi v8, v10, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v10, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v10, 3 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 3 + ret %r +} + +define @insertelt_nxv8f16_idx( %v, half %elt, i32 zeroext %idx) { +; ZVFH-LABEL: insertelt_nxv8f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v10, fa0 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v10, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv8f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v10, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v10, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 %idx + ret %r +} + +define @insertelt_nxv16f16_0( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv16f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 0 + ret %r +} + +define @insertelt_nxv16f16_imm( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv16f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v12, fa0 +; ZVFH-NEXT: vslideup.vi v8, v12, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v12, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v12, 3 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 3 + ret %r +} + +define @insertelt_nxv16f16_idx( %v, half %elt, i32 zeroext %idx) { +; ZVFH-LABEL: insertelt_nxv16f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v12, fa0 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v12, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv16f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v12, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v12, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 %idx + ret %r +} + +define @insertelt_nxv32f16_0( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv32f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v8, a0 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 0 + ret %r +} + +define @insertelt_nxv32f16_imm( %v, half %elt) { +; ZVFH-LABEL: insertelt_nxv32f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFH-NEXT: vfmv.s.f v16, fa0 +; ZVFH-NEXT: vslideup.vi v8, v16, 3 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; ZVFHMIN-NEXT: vmv.s.x v16, a0 +; ZVFHMIN-NEXT: vslideup.vi v8, v16, 3 +; ZVFHMIN-NEXT: ret + %r = insertelement %v, half %elt, i32 3 + ret %r +} + +define @insertelt_nxv32f16_idx( %v, half %elt, i32 zeroext %idx) { +; ZVFH-LABEL: insertelt_nxv32f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v16, fa0 +; ZVFH-NEXT: addi a1, a0, 1 +; ZVFH-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; ZVFH-NEXT: vslideup.vx v8, v16, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: insertelt_nxv32f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v16, a1 +; ZVFHMIN-NEXT: addi a1, a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; ZVFHMIN-NEXT: vslideup.vx v8, v16, a0 +; ZVFHMIN-NEXT: ret %r = insertelement %v, half %elt, i32 %idx ret %r } From 1112dd87019b3351d021f021fd63a3de029867b4 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 30 Sep 2024 12:00:15 +0800 Subject: [PATCH 2/3] Remove extraneous comment, adjust other comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 41c55f69e3388..4f77bba296898 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8774,9 +8774,8 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || ValVT == MVT::bf16) { - // If we don't have vfmv.s.f for f16/bf16, insert into fmv.x.h first + // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first. MVT IntVT = VecVT.changeTypeToInteger(); - // SDValue IntVal = DAG.getBitcast(IntVT.getVectorElementType(), Val); SDValue IntInsert = DAG.getNode( ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec), DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx); From b4716f505901b402aa36a69200aa1aa548f2b1dc Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 2 Oct 2024 14:34:01 +0800 Subject: [PATCH 3/3] Move bfloat below half to tame diff --- llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 444 ++++++++++---------- 1 file changed, 222 insertions(+), 222 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll index 607e0085c3f46..9b7a2600bcf0b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -8,228 +8,6 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -define @insertelt_nxv1bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv1bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 0 - ret %r -} - -define @insertelt_nxv1bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv1bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 3 - ret %r -} - -define @insertelt_nxv1bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv1bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 %idx - ret %r -} - -define @insertelt_nxv2bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv2bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 0 - ret %r -} - -define @insertelt_nxv2bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv2bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 3 - ret %r -} - -define @insertelt_nxv2bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv2bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 %idx - ret %r -} - -define @insertelt_nxv4bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv4bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 0 - ret %r -} - -define @insertelt_nxv4bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv4bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 3 - ret %r -} - -define @insertelt_nxv4bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv4bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 %idx - ret %r -} - -define @insertelt_nxv8bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv8bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 0 - ret %r -} - -define @insertelt_nxv8bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv8bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vslideup.vi v8, v10, 3 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 3 - ret %r -} - -define @insertelt_nxv8bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv8bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 %idx - ret %r -} - -define @insertelt_nxv16bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv16bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 0 - ret %r -} - -define @insertelt_nxv16bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv16bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vslideup.vi v8, v12, 3 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 3 - ret %r -} - -define @insertelt_nxv16bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv16bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 %idx - ret %r -} - -define @insertelt_nxv32bf16_0( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv32bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 0 - ret %r -} - -define @insertelt_nxv32bf16_imm( %v, bfloat %elt) { -; CHECK-LABEL: insertelt_nxv32bf16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vslideup.vi v8, v16, 3 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 3 - ret %r -} - -define @insertelt_nxv32bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { -; CHECK-LABEL: insertelt_nxv32bf16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a1 -; CHECK-NEXT: addi a1, a0, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret - %r = insertelement %v, bfloat %elt, i32 %idx - ret %r -} - define @insertelt_nxv1f16_0( %v, half %elt) { ; ZVFH-LABEL: insertelt_nxv1f16_0: ; ZVFH: # %bb.0: @@ -584,6 +362,228 @@ define @insertelt_nxv32f16_idx( %v, hal ret %r } +define @insertelt_nxv1bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv1bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 0 + ret %r +} + +define @insertelt_nxv1bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv1bf16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 3 + ret %r +} + +define @insertelt_nxv1bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv1bf16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: fmv.x.h a2, fa0 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r +} + +define @insertelt_nxv2bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv2bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 0 + ret %r +} + +define @insertelt_nxv2bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv2bf16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 3 + ret %r +} + +define @insertelt_nxv2bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv2bf16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: fmv.x.h a2, fa0 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r +} + +define @insertelt_nxv4bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv4bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 0 + ret %r +} + +define @insertelt_nxv4bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv4bf16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 3 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 3 + ret %r +} + +define @insertelt_nxv4bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv4bf16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: fmv.x.h a2, fa0 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r +} + +define @insertelt_nxv8bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv8bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 0 + ret %r +} + +define @insertelt_nxv8bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv8bf16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vslideup.vi v8, v10, 3 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 3 + ret %r +} + +define @insertelt_nxv8bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv8bf16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r +} + +define @insertelt_nxv16bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv16bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 0 + ret %r +} + +define @insertelt_nxv16bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv16bf16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vslideup.vi v8, v12, 3 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 3 + ret %r +} + +define @insertelt_nxv16bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv16bf16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v12, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r +} + +define @insertelt_nxv32bf16_0( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv32bf16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 0 + ret %r +} + +define @insertelt_nxv32bf16_imm( %v, bfloat %elt) { +; CHECK-LABEL: insertelt_nxv32bf16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vslideup.vi v8, v16, 3 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 3 + ret %r +} + +define @insertelt_nxv32bf16_idx( %v, bfloat %elt, i32 zeroext %idx) { +; CHECK-LABEL: insertelt_nxv32bf16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v16, a1 +; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %r = insertelement %v, bfloat %elt, i32 %idx + ret %r +} + define @insertelt_nxv1f32_0( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv1f32_0: ; CHECK: # %bb.0: