diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0a22f06271984..e7ae989fcc349 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4441,7 +4441,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { // libcalls on the undef elements. EVT VT = N->getValueType(0); EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) && TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); return true; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index ea7829f2d6c65..297afd9fc96f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1192,259 +1192,18 @@ define void @sqrt_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; RV32-ZVFHMIN-LABEL: sqrt_v6f16: -; RV32-ZVFHMIN: # %bb.0: -; RV32-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV32-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV32-ZVFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill -; RV32-ZVFHMIN-NEXT: .cfi_offset ra, -4 -; RV32-ZVFHMIN-NEXT: .cfi_offset s0, -8 -; RV32-ZVFHMIN-NEXT: .cfi_offset s1, -12 -; RV32-ZVFHMIN-NEXT: .cfi_offset fs0, -24 -; RV32-ZVFHMIN-NEXT: csrr a1, vlenb -; RV32-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV32-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV32-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV32-ZVFHMIN-NEXT: mv s0, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fmv.s fs0, fa0 -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fsqrt.s fa5, fs0 -; RV32-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV32-ZVFHMIN-NEXT: fmv.s fa0, fa5 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV32-ZVFHMIN-NEXT: add sp, sp, a0 -; RV32-ZVFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV32-ZVFHMIN-NEXT: ret -; -; RV64-ZVFHMIN-LABEL: sqrt_v6f16: -; RV64-ZVFHMIN: # %bb.0: -; RV64-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV64-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV64-ZVFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: .cfi_offset ra, -8 -; RV64-ZVFHMIN-NEXT: .cfi_offset s0, -16 -; RV64-ZVFHMIN-NEXT: .cfi_offset s1, -24 -; RV64-ZVFHMIN-NEXT: .cfi_offset fs0, -32 -; RV64-ZVFHMIN-NEXT: csrr a1, vlenb -; RV64-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV64-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV64-ZVFHMIN-NEXT: mv s0, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fmv.s fs0, fa0 -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fsqrt.s fa5, fs0 -; RV64-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV64-ZVFHMIN-NEXT: fmv.s fa0, fa5 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fsqrt.s fa0, fa0 -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV64-ZVFHMIN-NEXT: add sp, sp, a0 -; RV64-ZVFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV64-ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: sqrt_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v10 +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -3264,337 +3023,25 @@ define void @trunc_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; RV32-ZVFHMIN-LABEL: trunc_v6f16: -; RV32-ZVFHMIN: # %bb.0: -; RV32-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV32-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV32-ZVFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill -; RV32-ZVFHMIN-NEXT: .cfi_offset ra, -4 -; RV32-ZVFHMIN-NEXT: .cfi_offset s0, -8 -; RV32-ZVFHMIN-NEXT: .cfi_offset s1, -12 -; RV32-ZVFHMIN-NEXT: .cfi_offset fs0, -24 -; RV32-ZVFHMIN-NEXT: csrr a1, vlenb -; RV32-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV32-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV32-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV32-ZVFHMIN-NEXT: mv s0, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: lui a0, 307200 -; RV32-ZVFHMIN-NEXT: fmv.w.x fs0, a0 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB116_2 -; RV32-ZVFHMIN-NEXT: # %bb.1: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB116_2: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: lh a0, 16(a0) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB116_4 -; RV32-ZVFHMIN-NEXT: # %bb.3: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB116_4: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB116_6 -; RV32-ZVFHMIN-NEXT: # %bb.5: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB116_6: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB116_8 -; RV32-ZVFHMIN-NEXT: # %bb.7: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB116_8: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB116_10 -; RV32-ZVFHMIN-NEXT: # %bb.9: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB116_10: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB116_12 -; RV32-ZVFHMIN-NEXT: # %bb.11: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB116_12: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV32-ZVFHMIN-NEXT: add sp, sp, a0 -; RV32-ZVFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV32-ZVFHMIN-NEXT: ret -; -; RV64-ZVFHMIN-LABEL: trunc_v6f16: -; RV64-ZVFHMIN: # %bb.0: -; RV64-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV64-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV64-ZVFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: .cfi_offset ra, -8 -; RV64-ZVFHMIN-NEXT: .cfi_offset s0, -16 -; RV64-ZVFHMIN-NEXT: .cfi_offset s1, -24 -; RV64-ZVFHMIN-NEXT: .cfi_offset fs0, -32 -; RV64-ZVFHMIN-NEXT: csrr a1, vlenb -; RV64-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV64-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV64-ZVFHMIN-NEXT: mv s0, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: lui a0, 307200 -; RV64-ZVFHMIN-NEXT: fmv.w.x fs0, a0 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB116_2 -; RV64-ZVFHMIN-NEXT: # %bb.1: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB116_2: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: lh a0, 16(a0) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB116_4 -; RV64-ZVFHMIN-NEXT: # %bb.3: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB116_4: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB116_6 -; RV64-ZVFHMIN-NEXT: # %bb.5: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB116_6: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB116_8 -; RV64-ZVFHMIN-NEXT: # %bb.7: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB116_8: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB116_10 -; RV64-ZVFHMIN-NEXT: # %bb.9: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB116_10: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB116_12 -; RV64-ZVFHMIN-NEXT: # %bb.11: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rtz -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rtz -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB116_12: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV64-ZVFHMIN-NEXT: add sp, sp, a0 -; RV64-ZVFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV64-ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: trunc_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -3710,337 +3157,27 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; RV32-ZVFHMIN-LABEL: ceil_v6f16: -; RV32-ZVFHMIN: # %bb.0: -; RV32-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV32-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV32-ZVFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill -; RV32-ZVFHMIN-NEXT: .cfi_offset ra, -4 -; RV32-ZVFHMIN-NEXT: .cfi_offset s0, -8 -; RV32-ZVFHMIN-NEXT: .cfi_offset s1, -12 -; RV32-ZVFHMIN-NEXT: .cfi_offset fs0, -24 -; RV32-ZVFHMIN-NEXT: csrr a1, vlenb -; RV32-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV32-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV32-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV32-ZVFHMIN-NEXT: mv s0, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: lui a0, 307200 -; RV32-ZVFHMIN-NEXT: fmv.w.x fs0, a0 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB120_2 -; RV32-ZVFHMIN-NEXT: # %bb.1: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB120_2: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: lh a0, 16(a0) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB120_4 -; RV32-ZVFHMIN-NEXT: # %bb.3: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB120_4: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB120_6 -; RV32-ZVFHMIN-NEXT: # %bb.5: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB120_6: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB120_8 -; RV32-ZVFHMIN-NEXT: # %bb.7: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB120_8: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB120_10 -; RV32-ZVFHMIN-NEXT: # %bb.9: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB120_10: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB120_12 -; RV32-ZVFHMIN-NEXT: # %bb.11: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB120_12: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV32-ZVFHMIN-NEXT: add sp, sp, a0 -; RV32-ZVFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV32-ZVFHMIN-NEXT: ret -; -; RV64-ZVFHMIN-LABEL: ceil_v6f16: -; RV64-ZVFHMIN: # %bb.0: -; RV64-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV64-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV64-ZVFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: .cfi_offset ra, -8 -; RV64-ZVFHMIN-NEXT: .cfi_offset s0, -16 -; RV64-ZVFHMIN-NEXT: .cfi_offset s1, -24 -; RV64-ZVFHMIN-NEXT: .cfi_offset fs0, -32 -; RV64-ZVFHMIN-NEXT: csrr a1, vlenb -; RV64-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV64-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV64-ZVFHMIN-NEXT: mv s0, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: lui a0, 307200 -; RV64-ZVFHMIN-NEXT: fmv.w.x fs0, a0 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB120_2 -; RV64-ZVFHMIN-NEXT: # %bb.1: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB120_2: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: lh a0, 16(a0) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB120_4 -; RV64-ZVFHMIN-NEXT: # %bb.3: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB120_4: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB120_6 -; RV64-ZVFHMIN-NEXT: # %bb.5: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB120_6: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB120_8 -; RV64-ZVFHMIN-NEXT: # %bb.7: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB120_8: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB120_10 -; RV64-ZVFHMIN-NEXT: # %bb.9: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB120_10: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB120_12 -; RV64-ZVFHMIN-NEXT: # %bb.11: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rup -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rup -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB120_12: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV64-ZVFHMIN-NEXT: add sp, sp, a0 -; RV64-ZVFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV64-ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: ceil_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -4160,337 +3297,27 @@ define void @floor_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; RV32-ZVFHMIN-LABEL: floor_v6f16: -; RV32-ZVFHMIN: # %bb.0: -; RV32-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV32-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV32-ZVFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill -; RV32-ZVFHMIN-NEXT: .cfi_offset ra, -4 -; RV32-ZVFHMIN-NEXT: .cfi_offset s0, -8 -; RV32-ZVFHMIN-NEXT: .cfi_offset s1, -12 -; RV32-ZVFHMIN-NEXT: .cfi_offset fs0, -24 -; RV32-ZVFHMIN-NEXT: csrr a1, vlenb -; RV32-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV32-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV32-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV32-ZVFHMIN-NEXT: mv s0, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: lui a0, 307200 -; RV32-ZVFHMIN-NEXT: fmv.w.x fs0, a0 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB124_2 -; RV32-ZVFHMIN-NEXT: # %bb.1: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB124_2: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: lh a0, 16(a0) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB124_4 -; RV32-ZVFHMIN-NEXT: # %bb.3: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB124_4: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB124_6 -; RV32-ZVFHMIN-NEXT: # %bb.5: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB124_6: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB124_8 -; RV32-ZVFHMIN-NEXT: # %bb.7: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB124_8: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB124_10 -; RV32-ZVFHMIN-NEXT: # %bb.9: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB124_10: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB124_12 -; RV32-ZVFHMIN-NEXT: # %bb.11: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB124_12: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV32-ZVFHMIN-NEXT: add sp, sp, a0 -; RV32-ZVFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV32-ZVFHMIN-NEXT: ret -; -; RV64-ZVFHMIN-LABEL: floor_v6f16: -; RV64-ZVFHMIN: # %bb.0: -; RV64-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV64-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV64-ZVFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: .cfi_offset ra, -8 -; RV64-ZVFHMIN-NEXT: .cfi_offset s0, -16 -; RV64-ZVFHMIN-NEXT: .cfi_offset s1, -24 -; RV64-ZVFHMIN-NEXT: .cfi_offset fs0, -32 -; RV64-ZVFHMIN-NEXT: csrr a1, vlenb -; RV64-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV64-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV64-ZVFHMIN-NEXT: mv s0, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: lui a0, 307200 -; RV64-ZVFHMIN-NEXT: fmv.w.x fs0, a0 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB124_2 -; RV64-ZVFHMIN-NEXT: # %bb.1: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB124_2: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: lh a0, 16(a0) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB124_4 -; RV64-ZVFHMIN-NEXT: # %bb.3: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB124_4: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB124_6 -; RV64-ZVFHMIN-NEXT: # %bb.5: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB124_6: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB124_8 -; RV64-ZVFHMIN-NEXT: # %bb.7: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB124_8: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB124_10 -; RV64-ZVFHMIN-NEXT: # %bb.9: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB124_10: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB124_12 -; RV64-ZVFHMIN-NEXT: # %bb.11: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rdn -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rdn -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB124_12: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV64-ZVFHMIN-NEXT: add sp, sp, a0 -; RV64-ZVFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV64-ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: floor_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -4610,337 +3437,27 @@ define void @round_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; RV32-ZVFHMIN-LABEL: round_v6f16: -; RV32-ZVFHMIN: # %bb.0: -; RV32-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV32-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV32-ZVFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-ZVFHMIN-NEXT: fsd fs0, 24(sp) # 8-byte Folded Spill -; RV32-ZVFHMIN-NEXT: .cfi_offset ra, -4 -; RV32-ZVFHMIN-NEXT: .cfi_offset s0, -8 -; RV32-ZVFHMIN-NEXT: .cfi_offset s1, -12 -; RV32-ZVFHMIN-NEXT: .cfi_offset fs0, -24 -; RV32-ZVFHMIN-NEXT: csrr a1, vlenb -; RV32-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV32-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV32-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV32-ZVFHMIN-NEXT: mv s0, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: lui a0, 307200 -; RV32-ZVFHMIN-NEXT: fmv.w.x fs0, a0 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB128_2 -; RV32-ZVFHMIN-NEXT: # %bb.1: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB128_2: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: lh a0, 16(a0) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB128_4 -; RV32-ZVFHMIN-NEXT: # %bb.3: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB128_4: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB128_6 -; RV32-ZVFHMIN-NEXT: # %bb.5: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB128_6: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB128_8 -; RV32-ZVFHMIN-NEXT: # %bb.7: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB128_8: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB128_10 -; RV32-ZVFHMIN-NEXT: # %bb.9: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB128_10: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV32-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: add a0, sp, a0 -; RV32-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV32-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV32-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV32-ZVFHMIN-NEXT: call __extendhfsf2 -; RV32-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV32-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV32-ZVFHMIN-NEXT: beqz a0, .LBB128_12 -; RV32-ZVFHMIN-NEXT: # %bb.11: -; RV32-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV32-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV32-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV32-ZVFHMIN-NEXT: .LBB128_12: -; RV32-ZVFHMIN-NEXT: call __truncsfhf2 -; RV32-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV32-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV32-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV32-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV32-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV32-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV32-ZVFHMIN-NEXT: csrr a0, vlenb -; RV32-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV32-ZVFHMIN-NEXT: add sp, sp, a0 -; RV32-ZVFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32-ZVFHMIN-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload -; RV32-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV32-ZVFHMIN-NEXT: ret -; -; RV64-ZVFHMIN-LABEL: round_v6f16: -; RV64-ZVFHMIN: # %bb.0: -; RV64-ZVFHMIN-NEXT: addi sp, sp, -48 -; RV64-ZVFHMIN-NEXT: .cfi_def_cfa_offset 48 -; RV64-ZVFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV64-ZVFHMIN-NEXT: .cfi_offset ra, -8 -; RV64-ZVFHMIN-NEXT: .cfi_offset s0, -16 -; RV64-ZVFHMIN-NEXT: .cfi_offset s1, -24 -; RV64-ZVFHMIN-NEXT: .cfi_offset fs0, -32 -; RV64-ZVFHMIN-NEXT: csrr a1, vlenb -; RV64-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64-ZVFHMIN-NEXT: sub sp, sp, a1 -; RV64-ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; RV64-ZVFHMIN-NEXT: mv s0, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vle16.v v8, (a0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: lui a0, 307200 -; RV64-ZVFHMIN-NEXT: fmv.w.x fs0, a0 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB128_2 -; RV64-ZVFHMIN-NEXT: # %bb.1: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB128_2: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w s1, fa0 -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: lh a0, 16(a0) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB128_4 -; RV64-ZVFHMIN-NEXT: # %bb.3: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB128_4: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vmv.v.x v8, a0 -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, s1 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB128_6 -; RV64-ZVFHMIN-NEXT: # %bb.5: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB128_6: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB128_8 -; RV64-ZVFHMIN-NEXT: # %bb.7: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB128_8: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 4 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB128_10 -; RV64-ZVFHMIN-NEXT: # %bb.9: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB128_10: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: addi a0, sp, 16 -; RV64-ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: add a0, sp, a0 -; RV64-ZVFHMIN-NEXT: addi a0, a0, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 5 -; RV64-ZVFHMIN-NEXT: vmv.x.s a0, v8 -; RV64-ZVFHMIN-NEXT: fmv.w.x fa0, a0 -; RV64-ZVFHMIN-NEXT: call __extendhfsf2 -; RV64-ZVFHMIN-NEXT: fabs.s fa5, fa0 -; RV64-ZVFHMIN-NEXT: flt.s a0, fa5, fs0 -; RV64-ZVFHMIN-NEXT: beqz a0, .LBB128_12 -; RV64-ZVFHMIN-NEXT: # %bb.11: -; RV64-ZVFHMIN-NEXT: fcvt.w.s a0, fa0, rmm -; RV64-ZVFHMIN-NEXT: fcvt.s.w fa5, a0, rmm -; RV64-ZVFHMIN-NEXT: fsgnj.s fa0, fa5, fa0 -; RV64-ZVFHMIN-NEXT: .LBB128_12: -; RV64-ZVFHMIN-NEXT: call __truncsfhf2 -; RV64-ZVFHMIN-NEXT: fmv.x.w a0, fa0 -; RV64-ZVFHMIN-NEXT: addi a1, sp, 16 -; RV64-ZVFHMIN-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; RV64-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 -; RV64-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV64-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64-ZVFHMIN-NEXT: vse16.v v8, (s0) -; RV64-ZVFHMIN-NEXT: csrr a0, vlenb -; RV64-ZVFHMIN-NEXT: slli a0, a0, 1 -; RV64-ZVFHMIN-NEXT: add sp, sp, a0 -; RV64-ZVFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV64-ZVFHMIN-NEXT: addi sp, sp, 48 -; RV64-ZVFHMIN-NEXT: ret +; ZVFHMIN-LABEL: round_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x