From 7f5a128d9c3f9468f7a4c6bd2c08dfa478f3bf5c Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Sat, 30 Nov 2024 14:19:20 +0800 Subject: [PATCH 1/7] [llvm][CodeGen] Intrinsic `llvm.powi.*` code gen for vector arguments In some backends, the i32 type is illegal and will be promoted. This causes exponent type check to fail when ISD::FOWI node generates a libcall. --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 18 + .../CodeGen/LoongArch/lasx/intrinsic-fpowi.ll | 142 ++ .../CodeGen/LoongArch/lsx/intrinsic-fpowi.ll | 88 + .../CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll | 1427 +++++++++++++++++ 4 files changed, 1675 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-fpowi.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-fpowi.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 63536336e9622..2829bbaef8310 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4648,6 +4648,24 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { bool ExponentHasSizeOfInt = DAG.getLibInfo().getIntSize() == Node->getOperand(1 + Offset).getValueType().getSizeInBits(); + if (!ExponentHasSizeOfInt) { + // In some backends, such as RISCV64 and LoongArch64, the i32 type is + // illegal and is promoted by previous process. For such cases, the + // exponent actually matches with sizeof(int) and a libcall should be + // generated. + SDNode *ExponentNode = Node->getOperand(1 + Offset).getNode(); + unsigned LibIntSize = DAG.getLibInfo().getIntSize(); + if (ExponentNode->getOpcode() == ISD::SIGN_EXTEND_INREG || + ExponentNode->getOpcode() == ISD::AssertSext || + ExponentNode->getOpcode() == ISD::AssertZext) { + EVT InnerType = cast(ExponentNode->getOperand(1))->getVT(); + ExponentHasSizeOfInt = LibIntSize == InnerType.getSizeInBits(); + } else if (ISD::isExtOpcode(ExponentNode->getOpcode())) { + ExponentHasSizeOfInt = + LibIntSize == + ExponentNode->getOperand(0).getValueType().getSizeInBits(); + } + } if (!ExponentHasSizeOfInt) { // If the exponent does not match with sizeof(int) a libcall to // RTLIB::POWI would use the wrong type for the argument. diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fpowi.ll new file mode 100644 index 0000000000000..f6b14a9bb000f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fpowi.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32) + +define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind { +; CHECK-LABEL: powi_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> %va, i32 %b) + ret <8 x float> %res +} + +declare <4 x double> @llvm.powi.v4f64.i32(<4 x double>, i32) + +define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind { +; CHECK-LABEL: powi_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3 +; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> %va, i32 %b) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fpowi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fpowi.ll new file mode 100644 index 0000000000000..b0f54e78c7a44 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fpowi.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) + +define <4 x float> @powi_v4f32(<4 x float> %va, i32 %b) nounwind { +; CHECK-LABEL: powi_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %va, i32 %b) + ret <4 x float> %res +} + +declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) + +define <2 x double> @powi_v2f64(<2 x double> %va, i32 %b) nounwind { +; CHECK-LABEL: powi_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %va, i32 %b) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll new file mode 100644 index 0000000000000..d99feb5fdd921 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll @@ -0,0 +1,1427 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV64 + +define <1 x float> @powi_v1f32(<1 x float> %x, i32 %y) { +; RV32-LABEL: powi_v1f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.s.f v8, fa0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v1f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.s.f v8, fa0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <1 x float> @llvm.powi.v1f32.i32(<1 x float> %x, i32 %y) + ret <1 x float> %a +} +declare <1 x float> @llvm.powi.v1f32.i32(<1 x float>, i32) + +define <2 x float> @powi_v2f32(<2 x float> %x, i32 %y) { +; RV32-LABEL: powi_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v2f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb +; RV64-NEXT: addi a1, sp, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> %x, i32 %y) + ret <2 x float> %a +} +declare <2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) + +define <3 x float> @powi_v3f32(<3 x float> %x, i32 %y) { +; RV32-LABEL: powi_v3f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: flw fa0, 16(a0) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v3f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: flw fa0, 32(a0) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <3 x float> @llvm.powi.v3f32.i32(<3 x float> %x, i32 %y) + ret <3 x float> %a +} +declare <3 x float> @llvm.powi.v3f32.i32(<3 x float>, i32) + +define <4 x float> @powi_v4f32(<4 x float> %x, i32 %y) { +; RV32-LABEL: powi_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: flw fa0, 16(a0) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v4f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: flw fa0, 32(a0) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %x, i32 %y) + ret <4 x float> %a +} +declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) + +define <8 x float> @powi_v8f32(<8 x float> %x, i32 %y) { +; RV32-LABEL: powi_v8f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v10 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 4 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 5 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 6 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 7 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v8f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v10 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 4 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 5 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 6 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 7 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> %x, i32 %y) + ret <8 x float> %a +} +declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32) + +define <16 x float> @powi_v16f32(<16 x float> %x, i32 %y) { +; RV32-LABEL: powi_v16f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -272 +; RV32-NEXT: .cfi_def_cfa_offset 272 +; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: addi s0, sp, 272 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: flw fa0, 124(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 188(sp) +; RV32-NEXT: flw fa0, 120(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 184(sp) +; RV32-NEXT: flw fa0, 116(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 180(sp) +; RV32-NEXT: flw fa0, 112(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 176(sp) +; RV32-NEXT: flw fa0, 108(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 172(sp) +; RV32-NEXT: flw fa0, 104(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 168(sp) +; RV32-NEXT: flw fa0, 100(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 164(sp) +; RV32-NEXT: flw fa0, 96(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 160(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 128(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 140(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 136(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 132(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 7 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 156(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 6 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 152(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 5 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 148(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 4 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 144(sp) +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: addi sp, s0, -272 +; RV32-NEXT: .cfi_def_cfa sp, 272 +; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore s2 +; RV32-NEXT: addi sp, sp, 272 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v16f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -272 +; RV64-NEXT: .cfi_def_cfa_offset 272 +; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s2, -24 +; RV64-NEXT: addi s0, sp, 272 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: addi a1, sp, 240 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s2, a0 +; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: flw fa0, 124(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 188(sp) +; RV64-NEXT: flw fa0, 120(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 184(sp) +; RV64-NEXT: flw fa0, 116(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 180(sp) +; RV64-NEXT: flw fa0, 112(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 176(sp) +; RV64-NEXT: flw fa0, 108(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 172(sp) +; RV64-NEXT: flw fa0, 104(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 168(sp) +; RV64-NEXT: flw fa0, 100(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 164(sp) +; RV64-NEXT: flw fa0, 96(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 160(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 128(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 140(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 136(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 132(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 7 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 156(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 6 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 152(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 5 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 148(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 4 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 144(sp) +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: addi sp, s0, -272 +; RV64-NEXT: .cfi_def_cfa sp, 272 +; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore s2 +; RV64-NEXT: addi sp, sp, 272 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> %x, i32 %y) + ret <16 x float> %a +} +declare <16 x float> @llvm.powi.v16f32.i32(<16 x float>, i32) + +define <1 x double> @powi_v1f64(<1 x double> %x, i32 %y) { +; RV32-LABEL: powi_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.s.f v8, fa0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.s.f v8, fa0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <1 x double> @llvm.powi.v1f64.i32(<1 x double> %x, i32 %y) + ret <1 x double> %a +} +declare <1 x double> @llvm.powi.v1f64.i32(<1 x double>, i32) + +define <2 x double> @powi_v2f64(<2 x double> %x, i32 %y) { +; RV32-LABEL: powi_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fmv.d fs0, fa0 +; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb +; RV64-NEXT: addi a1, sp, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fmv.d fs0, fa0 +; RV64-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %x, i32 %y) + ret <2 x double> %a +} +declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) + +define <4 x double> @powi_v4f64(<4 x double> %x, i32 %y) { +; RV32-LABEL: powi_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v10 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fmv.d fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v10 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fmv.d fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> %x, i32 %y) + ret <4 x double> %a +} +declare <4 x double> @llvm.powi.v4f64.i32(<4 x double>, i32) + +define <8 x double> @powi_v8f64(<8 x double> %x, i32 %y) { +; RV32-LABEL: powi_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -272 +; RV32-NEXT: .cfi_def_cfa_offset 272 +; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: addi s0, sp, 272 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: fld fa0, 120(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 184(sp) +; RV32-NEXT: fld fa0, 112(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 176(sp) +; RV32-NEXT: fld fa0, 104(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 168(sp) +; RV32-NEXT: fld fa0, 96(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 160(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 128(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 136(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 152(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 144(sp) +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi sp, s0, -272 +; RV32-NEXT: .cfi_def_cfa sp, 272 +; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore s2 +; RV32-NEXT: addi sp, sp, 272 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -272 +; RV64-NEXT: .cfi_def_cfa_offset 272 +; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s2, -24 +; RV64-NEXT: addi s0, sp, 272 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: addi a1, sp, 240 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s2, a0 +; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: fld fa0, 120(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 184(sp) +; RV64-NEXT: fld fa0, 112(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 176(sp) +; RV64-NEXT: fld fa0, 104(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 168(sp) +; RV64-NEXT: fld fa0, 96(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 160(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 128(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 136(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 152(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 144(sp) +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi sp, s0, -272 +; RV64-NEXT: .cfi_def_cfa sp, 272 +; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore s2 +; RV64-NEXT: addi sp, sp, 272 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> %x, i32 %y) + ret <8 x double> %a +} +declare <8 x double> @llvm.powi.v8f64.i32(<8 x double>, i32) From 73281bcee717f14c6f11ad019db76b9e16f72ba5 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Mon, 2 Dec 2024 10:44:58 +0800 Subject: [PATCH 2/7] Modify test file name. NFC --- llvm/test/CodeGen/LoongArch/lasx/{intrinsic-fpowi.ll => fpowi.ll} | 0 llvm/test/CodeGen/LoongArch/lsx/{intrinsic-fpowi.ll => fpowi.ll} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/LoongArch/lasx/{intrinsic-fpowi.ll => fpowi.ll} (100%) rename llvm/test/CodeGen/LoongArch/lsx/{intrinsic-fpowi.ll => fpowi.ll} (100%) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll similarity index 100% rename from llvm/test/CodeGen/LoongArch/lasx/intrinsic-fpowi.ll rename to llvm/test/CodeGen/LoongArch/lasx/fpowi.ll diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fpowi.ll b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll similarity index 100% rename from llvm/test/CodeGen/LoongArch/lsx/intrinsic-fpowi.ll rename to llvm/test/CodeGen/LoongArch/lsx/fpowi.ll From e2c155d3a0dec1006d7c3b0badd7a07d45611679 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Wed, 4 Dec 2024 09:57:41 +0800 Subject: [PATCH 3/7] Fixes for reviews. For nodes such as `ISD::FPOWI`, `ISD::FLDEXP`, if the first operand is a vector operand, since the corresponding library functions do not have vector-type signatures, the vector will be unroll during the type legalization, without promoting the second operand. --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 18 - .../SelectionDAG/LegalizeIntegerTypes.cpp | 3 + llvm/test/CodeGen/LoongArch/lasx/fldexp.ll | 142 ++ llvm/test/CodeGen/LoongArch/lsx/fldexp.ll | 88 + .../CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll | 1427 +++++++++++++++++ 5 files changed, 1660 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/fldexp.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/fldexp.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2829bbaef8310..63536336e9622 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4648,24 +4648,6 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { bool ExponentHasSizeOfInt = DAG.getLibInfo().getIntSize() == Node->getOperand(1 + Offset).getValueType().getSizeInBits(); - if (!ExponentHasSizeOfInt) { - // In some backends, such as RISCV64 and LoongArch64, the i32 type is - // illegal and is promoted by previous process. For such cases, the - // exponent actually matches with sizeof(int) and a libcall should be - // generated. - SDNode *ExponentNode = Node->getOperand(1 + Offset).getNode(); - unsigned LibIntSize = DAG.getLibInfo().getIntSize(); - if (ExponentNode->getOpcode() == ISD::SIGN_EXTEND_INREG || - ExponentNode->getOpcode() == ISD::AssertSext || - ExponentNode->getOpcode() == ISD::AssertZext) { - EVT InnerType = cast(ExponentNode->getOperand(1))->getVT(); - ExponentHasSizeOfInt = LibIntSize == InnerType.getSizeInBits(); - } else if (ISD::isExtOpcode(ExponentNode->getOpcode())) { - ExponentHasSizeOfInt = - LibIntSize == - ExponentNode->getOperand(0).getValueType().getSizeInBits(); - } - } if (!ExponentHasSizeOfInt) { // If the exponent does not match with sizeof(int) a libcall to // RTLIB::POWI would use the wrong type for the argument. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 7b9f544a5f9a4..df3b655351689 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2572,6 +2572,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { + if (N->getValueType(0).isVector()) + return DAG.UnrollVectorOp(N); + bool IsStrict = N->isStrictFPOpcode(); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); diff --git a/llvm/test/CodeGen/LoongArch/lasx/fldexp.ll b/llvm/test/CodeGen/LoongArch/lasx/fldexp.ll new file mode 100644 index 0000000000000..e63812dda11f6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fldexp.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.ldexp.v8f32.i32(<8 x float>, i32) + +define <8 x float> @ldexp_v8f32(<8 x float> %va, i32 %b) nounwind { +; CHECK-LABEL: ldexp_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.ldexp.v8f32.i32(<8 x float> %va, i32 %b) + ret <8 x float> %res +} + +declare <4 x double> @llvm.ldexp.v4f64.i32(<4 x double>, i32) + +define <4 x double> @ldexp_v4f64(<4 x double> %va, i32 %b) nounwind { +; CHECK-LABEL: ldexp_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexp) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexp) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexp) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexp) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3 +; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.ldexp.v4f64.i32(<4 x double> %va, i32 %b) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/fldexp.ll b/llvm/test/CodeGen/LoongArch/lsx/fldexp.ll new file mode 100644 index 0000000000000..84e53c5b9bc0f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fldexp.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.ldexp.v4f32.i32(<4 x float>, i32) + +define <4 x float> @ldexp_v4f32(<4 x float> %va, i32 %b) nounwind { +; CHECK-LABEL: ldexp_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexpf) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.ldexp.v4f32.i32(<4 x float> %va, i32 %b) + ret <4 x float> %res +} + +declare <2 x double> @llvm.ldexp.v2f64.i32(<2 x double>, i32) + +define <2 x double> @ldexp_v2f64(<2 x double> %va, i32 %b) nounwind { +; CHECK-LABEL: ldexp_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexp) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(ldexp) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.ldexp.v2f64.i32(<2 x double> %va, i32 %b) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll new file mode 100644 index 0000000000000..8be0105a21288 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll @@ -0,0 +1,1427 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV64 + +define <1 x float> @ldexp_v1f32(<1 x float> %x, i32 %y) { +; RV32-LABEL: ldexp_v1f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: call ldexpf +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.s.f v8, fa0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v1f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: call ldexpf +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.s.f v8, fa0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <1 x float> @llvm.ldexp.v1f32.i32(<1 x float> %x, i32 %y) + ret <1 x float> %a +} +declare <1 x float> @llvm.ldexp.v1f32.i32(<1 x float>, i32) + +define <2 x float> @ldexp_v2f32(<2 x float> %x, i32 %y) { +; RV32-LABEL: ldexp_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v2f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb +; RV64-NEXT: addi a1, sp, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <2 x float> @llvm.ldexp.v2f32.i32(<2 x float> %x, i32 %y) + ret <2 x float> %a +} +declare <2 x float> @llvm.ldexp.v2f32.i32(<2 x float>, i32) + +define <3 x float> @ldexp_v3f32(<3 x float> %x, i32 %y) { +; RV32-LABEL: ldexp_v3f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: flw fa0, 16(a0) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v3f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: flw fa0, 32(a0) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <3 x float> @llvm.ldexp.v3f32.i32(<3 x float> %x, i32 %y) + ret <3 x float> %a +} +declare <3 x float> @llvm.ldexp.v3f32.i32(<3 x float>, i32) + +define <4 x float> @ldexp_v4f32(<4 x float> %x, i32 %y) { +; RV32-LABEL: ldexp_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: flw fa0, 16(a0) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v4f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: flw fa0, 32(a0) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <4 x float> @llvm.ldexp.v4f32.i32(<4 x float> %x, i32 %y) + ret <4 x float> %a +} +declare <4 x float> @llvm.ldexp.v4f32.i32(<4 x float>, i32) + +define <8 x float> @ldexp_v8f32(<8 x float> %x, i32 %y) { +; RV32-LABEL: ldexp_v8f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v10 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 4 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 5 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 6 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 7 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexpf +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v8f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v10 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 4 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 5 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 6 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 7 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexpf +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <8 x float> @llvm.ldexp.v8f32.i32(<8 x float> %x, i32 %y) + ret <8 x float> %a +} +declare <8 x float> @llvm.ldexp.v8f32.i32(<8 x float>, i32) + +define <16 x float> @ldexp_v16f32(<16 x float> %x, i32 %y) { +; RV32-LABEL: ldexp_v16f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -272 +; RV32-NEXT: .cfi_def_cfa_offset 272 +; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: addi s0, sp, 272 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: flw fa0, 124(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 188(sp) +; RV32-NEXT: flw fa0, 120(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 184(sp) +; RV32-NEXT: flw fa0, 116(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 180(sp) +; RV32-NEXT: flw fa0, 112(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 176(sp) +; RV32-NEXT: flw fa0, 108(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 172(sp) +; RV32-NEXT: flw fa0, 104(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 168(sp) +; RV32-NEXT: flw fa0, 100(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 164(sp) +; RV32-NEXT: flw fa0, 96(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 160(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 128(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 140(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 136(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 132(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 7 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 156(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 6 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 152(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 5 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 148(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 4 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexpf +; RV32-NEXT: fsw fa0, 144(sp) +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: addi sp, s0, -272 +; RV32-NEXT: .cfi_def_cfa sp, 272 +; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore s2 +; RV32-NEXT: addi sp, sp, 272 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v16f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -272 +; RV64-NEXT: .cfi_def_cfa_offset 272 +; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s2, -24 +; RV64-NEXT: addi s0, sp, 272 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: addi a1, sp, 240 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s2, a0 +; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: flw fa0, 124(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 188(sp) +; RV64-NEXT: flw fa0, 120(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 184(sp) +; RV64-NEXT: flw fa0, 116(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 180(sp) +; RV64-NEXT: flw fa0, 112(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 176(sp) +; RV64-NEXT: flw fa0, 108(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 172(sp) +; RV64-NEXT: flw fa0, 104(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 168(sp) +; RV64-NEXT: flw fa0, 100(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 164(sp) +; RV64-NEXT: flw fa0, 96(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 160(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 128(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 140(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 136(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 132(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 7 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 156(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 6 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 152(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 5 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 148(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 4 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexpf +; RV64-NEXT: fsw fa0, 144(sp) +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: addi sp, s0, -272 +; RV64-NEXT: .cfi_def_cfa sp, 272 +; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore s2 +; RV64-NEXT: addi sp, sp, 272 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <16 x float> @llvm.ldexp.v16f32.i32(<16 x float> %x, i32 %y) + ret <16 x float> %a +} +declare <16 x float> @llvm.ldexp.v16f32.i32(<16 x float>, i32) + +define <1 x double> @ldexp_v1f64(<1 x double> %x, i32 %y) { +; RV32-LABEL: ldexp_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: call ldexp +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.s.f v8, fa0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: call ldexp +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.s.f v8, fa0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <1 x double> @llvm.ldexp.v1f64.i32(<1 x double> %x, i32 %y) + ret <1 x double> %a +} +declare <1 x double> @llvm.ldexp.v1f64.i32(<1 x double>, i32) + +define <2 x double> @ldexp_v2f64(<2 x double> %x, i32 %y) { +; RV32-LABEL: ldexp_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call ldexp +; RV32-NEXT: fmv.d fs0, fa0 +; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexp +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb +; RV64-NEXT: addi a1, sp, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexp +; RV64-NEXT: fmv.d fs0, fa0 +; RV64-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexp +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <2 x double> @llvm.ldexp.v2f64.i32(<2 x double> %x, i32 %y) + ret <2 x double> %a +} +declare <2 x double> @llvm.ldexp.v2f64.i32(<2 x double>, i32) + +define <4 x double> @ldexp_v4f64(<4 x double> %x, i32 %y) { +; RV32-LABEL: ldexp_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset fs0, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v10 +; RV32-NEXT: call ldexp +; RV32-NEXT: fmv.d fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexp +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexp +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call ldexp +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore fs0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: .cfi_def_cfa_offset 64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset fs0, -24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v10 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexp +; RV64-NEXT: fmv.d fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexp +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexp +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call ldexp +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 64 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore fs0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <4 x double> @llvm.ldexp.v4f64.i32(<4 x double> %x, i32 %y) + ret <4 x double> %a +} +declare <4 x double> @llvm.ldexp.v4f64.i32(<4 x double>, i32) + +define <8 x double> @ldexp_v8f64(<8 x double> %x, i32 %y) { +; RV32-LABEL: ldexp_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -272 +; RV32-NEXT: .cfi_def_cfa_offset 272 +; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: addi s0, sp, 272 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: fld fa0, 120(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexp +; RV32-NEXT: fsd fa0, 184(sp) +; RV32-NEXT: fld fa0, 112(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexp +; RV32-NEXT: fsd fa0, 176(sp) +; RV32-NEXT: fld fa0, 104(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexp +; RV32-NEXT: fsd fa0, 168(sp) +; RV32-NEXT: fld fa0, 96(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexp +; RV32-NEXT: fsd fa0, 160(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexp +; RV32-NEXT: fsd fa0, 128(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexp +; RV32-NEXT: fsd fa0, 136(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexp +; RV32-NEXT: fsd fa0, 152(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call ldexp +; RV32-NEXT: fsd fa0, 144(sp) +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi sp, s0, -272 +; RV32-NEXT: .cfi_def_cfa sp, 272 +; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: .cfi_restore s2 +; RV32-NEXT: addi sp, sp, 272 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: ldexp_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -272 +; RV64-NEXT: .cfi_def_cfa_offset 272 +; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: .cfi_offset s2, -24 +; RV64-NEXT: addi s0, sp, 272 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: addi a1, sp, 240 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s2, a0 +; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: fld fa0, 120(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexp +; RV64-NEXT: fsd fa0, 184(sp) +; RV64-NEXT: fld fa0, 112(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexp +; RV64-NEXT: fsd fa0, 176(sp) +; RV64-NEXT: fld fa0, 104(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexp +; RV64-NEXT: fsd fa0, 168(sp) +; RV64-NEXT: fld fa0, 96(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexp +; RV64-NEXT: fsd fa0, 160(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexp +; RV64-NEXT: fsd fa0, 128(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexp +; RV64-NEXT: fsd fa0, 136(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexp +; RV64-NEXT: fsd fa0, 152(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call ldexp +; RV64-NEXT: fsd fa0, 144(sp) +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi sp, s0, -272 +; RV64-NEXT: .cfi_def_cfa sp, 272 +; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: .cfi_restore s2 +; RV64-NEXT: addi sp, sp, 272 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %a = call <8 x double> @llvm.ldexp.v8f64.i32(<8 x double> %x, i32 %y) + ret <8 x double> %a +} +declare <8 x double> @llvm.ldexp.v8f64.i32(<8 x double>, i32) From 685192d6c4181cfc3ec9865ae1b53636222e3da9 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Wed, 4 Dec 2024 15:22:30 +0800 Subject: [PATCH 4/7] Fixes for review. --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 5 +- .../CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll | 212 ++---------------- .../CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll | 212 ++---------------- 3 files changed, 38 insertions(+), 391 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index df3b655351689..07fc3d23b9c38 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2572,9 +2572,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { - if (N->getValueType(0).isVector()) - return DAG.UnrollVectorOp(N); - bool IsStrict = N->isStrictFPOpcode(); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); @@ -2588,6 +2585,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { : RTLIB::getLDEXP(N->getValueType(0)); if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + if (N->getValueType(0).isVector()) + return DAG.UnrollVectorOp(N); SmallVector NewOps(N->ops()); NewOps[1 + OpOffset] = SExtPromotedInteger(N->getOperand(1 + OpOffset)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll index 8be0105a21288..6949875172596 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll @@ -4,60 +4,47 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV64 -define <1 x float> @ldexp_v1f32(<1 x float> %x, i32 %y) { +define <1 x float> @ldexp_v1f32(<1 x float> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v1f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call ldexpf ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.s.f v8, fa0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v1f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: call ldexpf ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vfmv.s.f v8, fa0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra ; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <1 x float> @llvm.ldexp.v1f32.i32(<1 x float> %x, i32 %y) ret <1 x float> %a } declare <1 x float> @llvm.ldexp.v1f32.i32(<1 x float>, i32) -define <2 x float> @ldexp_v2f32(<2 x float> %x, i32 %y) { +define <2 x float> @ldexp_v2f32(<2 x float> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v2f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -74,30 +61,20 @@ define <2 x float> @ldexp_v2f32(<2 x float> %x, i32 %y) { ; RV32-NEXT: vfslide1down.vf v8, v8, fs0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v2f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; RV64-NEXT: addi a1, sp, 32 ; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: sext.w s0, a0 @@ -115,36 +92,26 @@ define <2 x float> @ldexp_v2f32(<2 x float> %x, i32 %y) { ; RV64-NEXT: vfslide1down.vf v8, v8, fs0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <2 x float> @llvm.ldexp.v2f32.i32(<2 x float> %x, i32 %y) ret <2 x float> %a } declare <2 x float> @llvm.ldexp.v2f32.i32(<2 x float>, i32) -define <3 x float> @ldexp_v3f32(<3 x float> %x, i32 %y) { +define <3 x float> @ldexp_v3f32(<3 x float> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v3f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: add a1, sp, a1 @@ -181,31 +148,21 @@ define <3 x float> @ldexp_v3f32(<3 x float> %x, i32 %y) { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v3f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 @@ -243,36 +200,26 @@ define <3 x float> @ldexp_v3f32(<3 x float> %x, i32 %y) { ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <3 x float> @llvm.ldexp.v3f32.i32(<3 x float> %x, i32 %y) ret <3 x float> %a } declare <3 x float> @llvm.ldexp.v3f32.i32(<3 x float>, i32) -define <4 x float> @ldexp_v4f32(<4 x float> %x, i32 %y) { +define <4 x float> @ldexp_v4f32(<4 x float> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v4f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: add a1, sp, a1 @@ -321,31 +268,21 @@ define <4 x float> @ldexp_v4f32(<4 x float> %x, i32 %y) { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v4f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 @@ -395,36 +332,26 @@ define <4 x float> @ldexp_v4f32(<4 x float> %x, i32 %y) { ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <4 x float> @llvm.ldexp.v4f32.i32(<4 x float> %x, i32 %y) ret <4 x float> %a } declare <4 x float> @llvm.ldexp.v4f32.i32(<4 x float>, i32) -define <8 x float> @ldexp_v8f32(<8 x float> %x, i32 %y) { +define <8 x float> @ldexp_v8f32(<8 x float> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v8f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 1 @@ -538,31 +465,21 @@ define <8 x float> @ldexp_v8f32(<8 x float> %x, i32 %y) { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v8f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add a1, sp, a1 @@ -677,34 +594,24 @@ define <8 x float> @ldexp_v8f32(<8 x float> %x, i32 %y) { ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 2 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <8 x float> @llvm.ldexp.v8f32.i32(<8 x float> %x, i32 %y) ret <8 x float> %a } declare <8 x float> @llvm.ldexp.v8f32.i32(<8 x float>, i32) -define <16 x float> @ldexp_v16f32(<16 x float> %x, i32 %y) { +define <16 x float> @ldexp_v16f32(<16 x float> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v16f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -272 -; RV32-NEXT: .cfi_def_cfa_offset 272 ; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: addi s0, sp, 272 -; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub sp, sp, a1 @@ -814,40 +721,30 @@ define <16 x float> @ldexp_v16f32(<16 x float> %x, i32 %y) { ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: addi sp, s0, -272 -; RV32-NEXT: .cfi_def_cfa sp, 272 ; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore s2 ; RV32-NEXT: addi sp, sp, 272 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v16f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -272 -; RV64-NEXT: .cfi_def_cfa_offset 272 ; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s2, -24 ; RV64-NEXT: addi s0, sp, 272 -; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub sp, sp, a1 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: addi a1, sp, 240 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s2, a0 -; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: addi a1, sp, 64 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: vse32.v v8, (a1) ; RV64-NEXT: flw fa0, 124(sp) +; RV64-NEXT: sext.w s2, a0 ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call ldexpf ; RV64-NEXT: fsw fa0, 188(sp) @@ -946,75 +843,57 @@ define <16 x float> @ldexp_v16f32(<16 x float> %x, i32 %y) { ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: addi sp, s0, -272 -; RV64-NEXT: .cfi_def_cfa sp, 272 ; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore s2 ; RV64-NEXT: addi sp, sp, 272 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <16 x float> @llvm.ldexp.v16f32.i32(<16 x float> %x, i32 %y) ret <16 x float> %a } declare <16 x float> @llvm.ldexp.v16f32.i32(<16 x float>, i32) -define <1 x double> @ldexp_v1f64(<1 x double> %x, i32 %y) { +define <1 x double> @ldexp_v1f64(<1 x double> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v1f64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call ldexp ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.s.f v8, fa0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v1f64: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: call ldexp ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vfmv.s.f v8, fa0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra ; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <1 x double> @llvm.ldexp.v1f64.i32(<1 x double> %x, i32 %y) ret <1 x double> %a } declare <1 x double> @llvm.ldexp.v1f64.i32(<1 x double>, i32) -define <2 x double> @ldexp_v2f64(<2 x double> %x, i32 %y) { +define <2 x double> @ldexp_v2f64(<2 x double> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v2f64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -1031,30 +910,20 @@ define <2 x double> @ldexp_v2f64(<2 x double> %x, i32 %y) { ; RV32-NEXT: vfslide1down.vf v8, v8, fs0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v2f64: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; RV64-NEXT: addi a1, sp, 32 ; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: sext.w s0, a0 @@ -1072,36 +941,26 @@ define <2 x double> @ldexp_v2f64(<2 x double> %x, i32 %y) { ; RV64-NEXT: vfslide1down.vf v8, v8, fs0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <2 x double> @llvm.ldexp.v2f64.i32(<2 x double> %x, i32 %y) ret <2 x double> %a } declare <2 x double> @llvm.ldexp.v2f64.i32(<2 x double>, i32) -define <4 x double> @ldexp_v4f64(<4 x double> %x, i32 %y) { +define <4 x double> @ldexp_v4f64(<4 x double> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v4f64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 1 @@ -1157,31 +1016,21 @@ define <4 x double> @ldexp_v4f64(<4 x double> %x, i32 %y) { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v4f64: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add a1, sp, a1 @@ -1238,34 +1087,24 @@ define <4 x double> @ldexp_v4f64(<4 x double> %x, i32 %y) { ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 2 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <4 x double> @llvm.ldexp.v4f64.i32(<4 x double> %x, i32 %y) ret <4 x double> %a } declare <4 x double> @llvm.ldexp.v4f64.i32(<4 x double>, i32) -define <8 x double> @ldexp_v8f64(<8 x double> %x, i32 %y) { +define <8 x double> @ldexp_v8f64(<8 x double> %x, i32 %y) nounwind { ; RV32-LABEL: ldexp_v8f64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -272 -; RV32-NEXT: .cfi_def_cfa_offset 272 ; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: addi s0, sp, 272 -; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub sp, sp, a1 @@ -1327,40 +1166,30 @@ define <8 x double> @ldexp_v8f64(<8 x double> %x, i32 %y) { ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi sp, s0, -272 -; RV32-NEXT: .cfi_def_cfa sp, 272 ; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore s2 ; RV32-NEXT: addi sp, sp, 272 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: ldexp_v8f64: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -272 -; RV64-NEXT: .cfi_def_cfa_offset 272 ; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s2, -24 ; RV64-NEXT: addi s0, sp, 272 -; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub sp, sp, a1 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: addi a1, sp, 240 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s2, a0 -; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: addi a1, sp, 64 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: fld fa0, 120(sp) +; RV64-NEXT: sext.w s2, a0 ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call ldexp ; RV64-NEXT: fsd fa0, 184(sp) @@ -1411,15 +1240,10 @@ define <8 x double> @ldexp_v8f64(<8 x double> %x, i32 %y) { ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi sp, s0, -272 -; RV64-NEXT: .cfi_def_cfa sp, 272 ; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore s2 ; RV64-NEXT: addi sp, sp, 272 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <8 x double> @llvm.ldexp.v8f64.i32(<8 x double> %x, i32 %y) ret <8 x double> %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll index d99feb5fdd921..c6b8b602718b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll @@ -4,60 +4,47 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV64 -define <1 x float> @powi_v1f32(<1 x float> %x, i32 %y) { +define <1 x float> @powi_v1f32(<1 x float> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v1f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call __powisf2 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.s.f v8, fa0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v1f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: call __powisf2 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vfmv.s.f v8, fa0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra ; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <1 x float> @llvm.powi.v1f32.i32(<1 x float> %x, i32 %y) ret <1 x float> %a } declare <1 x float> @llvm.powi.v1f32.i32(<1 x float>, i32) -define <2 x float> @powi_v2f32(<2 x float> %x, i32 %y) { +define <2 x float> @powi_v2f32(<2 x float> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v2f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -74,30 +61,20 @@ define <2 x float> @powi_v2f32(<2 x float> %x, i32 %y) { ; RV32-NEXT: vfslide1down.vf v8, v8, fs0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v2f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; RV64-NEXT: addi a1, sp, 32 ; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: sext.w s0, a0 @@ -115,36 +92,26 @@ define <2 x float> @powi_v2f32(<2 x float> %x, i32 %y) { ; RV64-NEXT: vfslide1down.vf v8, v8, fs0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> %x, i32 %y) ret <2 x float> %a } declare <2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) -define <3 x float> @powi_v3f32(<3 x float> %x, i32 %y) { +define <3 x float> @powi_v3f32(<3 x float> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v3f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: add a1, sp, a1 @@ -181,31 +148,21 @@ define <3 x float> @powi_v3f32(<3 x float> %x, i32 %y) { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v3f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 @@ -243,36 +200,26 @@ define <3 x float> @powi_v3f32(<3 x float> %x, i32 %y) { ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <3 x float> @llvm.powi.v3f32.i32(<3 x float> %x, i32 %y) ret <3 x float> %a } declare <3 x float> @llvm.powi.v3f32.i32(<3 x float>, i32) -define <4 x float> @powi_v4f32(<4 x float> %x, i32 %y) { +define <4 x float> @powi_v4f32(<4 x float> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v4f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: add a1, sp, a1 @@ -321,31 +268,21 @@ define <4 x float> @powi_v4f32(<4 x float> %x, i32 %y) { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v4f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 @@ -395,36 +332,26 @@ define <4 x float> @powi_v4f32(<4 x float> %x, i32 %y) { ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %x, i32 %y) ret <4 x float> %a } declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) -define <8 x float> @powi_v8f32(<8 x float> %x, i32 %y) { +define <8 x float> @powi_v8f32(<8 x float> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v8f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 1 @@ -538,31 +465,21 @@ define <8 x float> @powi_v8f32(<8 x float> %x, i32 %y) { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v8f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add a1, sp, a1 @@ -677,34 +594,24 @@ define <8 x float> @powi_v8f32(<8 x float> %x, i32 %y) { ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 2 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> %x, i32 %y) ret <8 x float> %a } declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32) -define <16 x float> @powi_v16f32(<16 x float> %x, i32 %y) { +define <16 x float> @powi_v16f32(<16 x float> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v16f32: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -272 -; RV32-NEXT: .cfi_def_cfa_offset 272 ; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: addi s0, sp, 272 -; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub sp, sp, a1 @@ -814,40 +721,30 @@ define <16 x float> @powi_v16f32(<16 x float> %x, i32 %y) { ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: addi sp, s0, -272 -; RV32-NEXT: .cfi_def_cfa sp, 272 ; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore s2 ; RV32-NEXT: addi sp, sp, 272 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v16f32: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -272 -; RV64-NEXT: .cfi_def_cfa_offset 272 ; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s2, -24 ; RV64-NEXT: addi s0, sp, 272 -; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub sp, sp, a1 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: addi a1, sp, 240 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s2, a0 -; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: addi a1, sp, 64 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: vse32.v v8, (a1) ; RV64-NEXT: flw fa0, 124(sp) +; RV64-NEXT: sext.w s2, a0 ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __powisf2 ; RV64-NEXT: fsw fa0, 188(sp) @@ -946,75 +843,57 @@ define <16 x float> @powi_v16f32(<16 x float> %x, i32 %y) { ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: addi sp, s0, -272 -; RV64-NEXT: .cfi_def_cfa sp, 272 ; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore s2 ; RV64-NEXT: addi sp, sp, 272 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> %x, i32 %y) ret <16 x float> %a } declare <16 x float> @llvm.powi.v16f32.i32(<16 x float>, i32) -define <1 x double> @powi_v1f64(<1 x double> %x, i32 %y) { +define <1 x double> @powi_v1f64(<1 x double> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v1f64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call __powidf2 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.s.f v8, fa0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v1f64: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: call __powidf2 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vfmv.s.f v8, fa0 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra ; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <1 x double> @llvm.powi.v1f64.i32(<1 x double> %x, i32 %y) ret <1 x double> %a } declare <1 x double> @llvm.powi.v1f64.i32(<1 x double>, i32) -define <2 x double> @powi_v2f64(<2 x double> %x, i32 %y) { +define <2 x double> @powi_v2f64(<2 x double> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v2f64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -1031,30 +910,20 @@ define <2 x double> @powi_v2f64(<2 x double> %x, i32 %y) { ; RV32-NEXT: vfslide1down.vf v8, v8, fs0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v2f64: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; RV64-NEXT: addi a1, sp, 32 ; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: sext.w s0, a0 @@ -1072,36 +941,26 @@ define <2 x double> @powi_v2f64(<2 x double> %x, i32 %y) { ; RV64-NEXT: vfslide1down.vf v8, v8, fs0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %x, i32 %y) ret <2 x double> %a } declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) -define <4 x double> @powi_v4f64(<4 x double> %x, i32 %y) { +define <4 x double> @powi_v4f64(<4 x double> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v4f64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset fs0, -16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 1 @@ -1157,31 +1016,21 @@ define <4 x double> @powi_v4f64(<4 x double> %x, i32 %y) { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v4f64: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset fs0, -24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add a1, sp, a1 @@ -1238,34 +1087,24 @@ define <4 x double> @powi_v4f64(<4 x double> %x, i32 %y) { ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 2 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 64 ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore fs0 ; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> %x, i32 %y) ret <4 x double> %a } declare <4 x double> @llvm.powi.v4f64.i32(<4 x double>, i32) -define <8 x double> @powi_v8f64(<8 x double> %x, i32 %y) { +define <8 x double> @powi_v8f64(<8 x double> %x, i32 %y) nounwind { ; RV32-LABEL: powi_v8f64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -272 -; RV32-NEXT: .cfi_def_cfa_offset 272 ; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: addi s0, sp, 272 -; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub sp, sp, a1 @@ -1327,40 +1166,30 @@ define <8 x double> @powi_v8f64(<8 x double> %x, i32 %y) { ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi sp, s0, -272 -; RV32-NEXT: .cfi_def_cfa sp, 272 ; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore ra -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: .cfi_restore s2 ; RV32-NEXT: addi sp, sp, 272 -; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: powi_v8f64: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -272 -; RV64-NEXT: .cfi_def_cfa_offset 272 ; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: .cfi_offset s2, -24 ; RV64-NEXT: addi s0, sp, 272 -; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub sp, sp, a1 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: addi a1, sp, 240 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s2, a0 -; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: addi a1, sp, 64 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: fld fa0, 120(sp) +; RV64-NEXT: sext.w s2, a0 ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __powidf2 ; RV64-NEXT: fsd fa0, 184(sp) @@ -1411,15 +1240,10 @@ define <8 x double> @powi_v8f64(<8 x double> %x, i32 %y) { ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi sp, s0, -272 -; RV64-NEXT: .cfi_def_cfa sp, 272 ; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore ra -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: .cfi_restore s2 ; RV64-NEXT: addi sp, sp, 272 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %a = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> %x, i32 %y) ret <8 x double> %a From 2576758f21210ec5e3084f93cb060b93cade7719 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Wed, 4 Dec 2024 16:01:32 +0800 Subject: [PATCH 5/7] code formatter --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 07fc3d23b9c38..704f00b73ad00 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2586,7 +2586,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { if (N->getValueType(0).isVector()) - return DAG.UnrollVectorOp(N); + return DAG.UnrollVectorOp(N); SmallVector NewOps(N->ops()); NewOps[1 + OpOffset] = SExtPromotedInteger(N->getOperand(1 + OpOffset)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); From 5abe500b2ae3bda0967f6924bb74302ad1a915e6 Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Fri, 6 Dec 2024 14:37:51 +0800 Subject: [PATCH 6/7] Intrinsic `llvm.ldexp.*` should not be affected. --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 2 +- llvm/test/CodeGen/LoongArch/lasx/fldexp.ll | 142 -- llvm/test/CodeGen/LoongArch/lsx/fldexp.ll | 88 -- .../CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll | 1251 ----------------- 4 files changed, 1 insertion(+), 1482 deletions(-) delete mode 100644 llvm/test/CodeGen/LoongArch/lasx/fldexp.ll delete mode 100644 llvm/test/CodeGen/LoongArch/lsx/fldexp.ll delete mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 704f00b73ad00..d7b8587dc26f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2585,7 +2585,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { : RTLIB::getLDEXP(N->getValueType(0)); if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { - if (N->getValueType(0).isVector()) + if (IsPowI && N->getValueType(0).isVector()) return DAG.UnrollVectorOp(N); SmallVector NewOps(N->ops()); NewOps[1 + OpOffset] = SExtPromotedInteger(N->getOperand(1 + OpOffset)); diff --git a/llvm/test/CodeGen/LoongArch/lasx/fldexp.ll b/llvm/test/CodeGen/LoongArch/lasx/fldexp.ll deleted file mode 100644 index e63812dda11f6..0000000000000 --- a/llvm/test/CodeGen/LoongArch/lasx/fldexp.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s - -declare <8 x float> @llvm.ldexp.v8f32.i32(<8 x float>, i32) - -define <8 x float> @ldexp_v8f32(<8 x float> %va, i32 %b) nounwind { -; CHECK-LABEL: ldexp_v8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -80 -; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill -; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -; CHECK-NEXT: movgr2fr.w $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7 -; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 80 -; CHECK-NEXT: ret -entry: - %res = call <8 x float> @llvm.ldexp.v8f32.i32(<8 x float> %va, i32 %b) - ret <8 x float> %res -} - -declare <4 x double> @llvm.ldexp.v4f64.i32(<4 x double>, i32) - -define <4 x double> @ldexp_v4f64(<4 x double> %va, i32 %b) nounwind { -; CHECK-LABEL: ldexp_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -80 -; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill -; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill -; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 -; CHECK-NEXT: movgr2fr.d $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexp) -; CHECK-NEXT: movfr2gr.d $a0, $fa0 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 -; CHECK-NEXT: movgr2fr.d $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexp) -; CHECK-NEXT: movfr2gr.d $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 -; CHECK-NEXT: movgr2fr.d $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexp) -; CHECK-NEXT: movfr2gr.d $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2 -; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 -; CHECK-NEXT: movgr2fr.d $fa0, $a0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexp) -; CHECK-NEXT: movfr2gr.d $a0, $fa0 -; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3 -; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 80 -; CHECK-NEXT: ret -entry: - %res = call <4 x double> @llvm.ldexp.v4f64.i32(<4 x double> %va, i32 %b) - ret <4 x double> %res -} diff --git a/llvm/test/CodeGen/LoongArch/lsx/fldexp.ll b/llvm/test/CodeGen/LoongArch/lsx/fldexp.ll deleted file mode 100644 index 84e53c5b9bc0f..0000000000000 --- a/llvm/test/CodeGen/LoongArch/lsx/fldexp.ll +++ /dev/null @@ -1,88 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s - -declare <4 x float> @llvm.ldexp.v4f32.i32(<4 x float>, i32) - -define <4 x float> @ldexp_v4f32(<4 x float> %va, i32 %b) nounwind { -; CHECK-LABEL: ldexp_v4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -48 -; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexpf) -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 -; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 48 -; CHECK-NEXT: ret -entry: - %res = call <4 x float> @llvm.ldexp.v4f32.i32(<4 x float> %va, i32 %b) - ret <4 x float> %res -} - -declare <2 x double> @llvm.ldexp.v2f64.i32(<2 x double>, i32) - -define <2 x double> @ldexp_v2f64(<2 x double> %va, i32 %b) nounwind { -; CHECK-LABEL: ldexp_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -48 -; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexp) -; CHECK-NEXT: movfr2gr.d $a0, $fa0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: bl %plt(ldexp) -; CHECK-NEXT: movfr2gr.d $a0, $fa0 -; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 -; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 48 -; CHECK-NEXT: ret -entry: - %res = call <2 x double> @llvm.ldexp.v2f64.i32(<2 x double> %va, i32 %b) - ret <2 x double> %res -} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll deleted file mode 100644 index 6949875172596..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fldexp.ll +++ /dev/null @@ -1,1251 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 - -define <1 x float> @ldexp_v1f32(<1 x float> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v1f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: call ldexpf -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vfmv.s.f v8, fa0 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v1f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vfmv.s.f v8, fa0 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret - %a = call <1 x float> @llvm.ldexp.v1f32.i32(<1 x float> %x, i32 %y) - ret <1 x float> %a -} -declare <1 x float> @llvm.ldexp.v1f32.i32(<1 x float>, i32) - -define <2 x float> @ldexp_v2f32(<2 x float> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v2f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 1 -; RV32-NEXT: vfmv.f.s fa0, v9 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fmv.s fs0, fa0 -; RV32-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vfmv.v.f v8, fa0 -; RV32-NEXT: vfslide1down.vf v8, v8, fs0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v2f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: addi a1, sp, 32 -; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s0, a0 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 1 -; RV64-NEXT: vfmv.f.s fa0, v9 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fmv.s fs0, fa0 -; RV64-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vfmv.v.f v8, fa0 -; RV64-NEXT: vfslide1down.vf v8, v8, fs0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: ret - %a = call <2 x float> @llvm.ldexp.v2f32.i32(<2 x float> %x, i32 %y) - ret <2 x float> %a -} -declare <2 x float> @llvm.ldexp.v2f32.i32(<2 x float>, i32) - -define <3 x float> @ldexp_v3f32(<3 x float> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v3f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 1 -; RV32-NEXT: vfmv.f.s fa0, v9 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fmv.s fs0, fa0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: flw fa0, 16(a0) # 8-byte Folded Reload -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vfmv.v.f v8, fa0 -; RV32-NEXT: vfslide1down.vf v8, v8, fs0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v3f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s0, a0 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 1 -; RV64-NEXT: vfmv.f.s fa0, v9 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fmv.s fs0, fa0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: flw fa0, 32(a0) # 8-byte Folded Reload -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vfmv.v.f v8, fa0 -; RV64-NEXT: vfslide1down.vf v8, v8, fs0 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: ret - %a = call <3 x float> @llvm.ldexp.v3f32.i32(<3 x float> %x, i32 %y) - ret <3 x float> %a -} -declare <3 x float> @llvm.ldexp.v3f32.i32(<3 x float>, i32) - -define <4 x float> @ldexp_v4f32(<4 x float> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v4f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 1 -; RV32-NEXT: vfmv.f.s fa0, v9 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fmv.s fs0, fa0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: flw fa0, 16(a0) # 8-byte Folded Reload -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vfmv.v.f v8, fa0 -; RV32-NEXT: vfslide1down.vf v8, v8, fs0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v4f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s0, a0 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 1 -; RV64-NEXT: vfmv.f.s fa0, v9 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fmv.s fs0, fa0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: flw fa0, 32(a0) # 8-byte Folded Reload -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vfmv.v.f v8, fa0 -; RV64-NEXT: vfslide1down.vf v8, v8, fs0 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 3 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: ret - %a = call <4 x float> @llvm.ldexp.v4f32.i32(<4 x float> %x, i32 %y) - ret <4 x float> %a -} -declare <4 x float> @llvm.ldexp.v4f32.i32(<4 x float>, i32) - -define <8 x float> @ldexp_v8f32(<8 x float> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v8f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 1 -; RV32-NEXT: vfmv.f.s fa0, v10 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fmv.s fs0, fa0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vfmv.v.f v8, fa0 -; RV32-NEXT: vfslide1down.vf v8, v8, fs0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 4 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 5 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 6 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 7 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexpf -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v8f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s0, a0 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 1 -; RV64-NEXT: vfmv.f.s fa0, v10 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fmv.s fs0, fa0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vfmv.v.f v8, fa0 -; RV64-NEXT: vfslide1down.vf v8, v8, fs0 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 3 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 4 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 5 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 6 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 7 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexpf -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: ret - %a = call <8 x float> @llvm.ldexp.v8f32.i32(<8 x float> %x, i32 %y) - ret <8 x float> %a -} -declare <8 x float> @llvm.ldexp.v8f32.i32(<8 x float>, i32) - -define <16 x float> @ldexp_v16f32(<16 x float> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v16f32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -272 -; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill -; RV32-NEXT: addi s0, sp, 272 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: mv s2, a0 -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: flw fa0, 124(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 188(sp) -; RV32-NEXT: flw fa0, 120(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 184(sp) -; RV32-NEXT: flw fa0, 116(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 180(sp) -; RV32-NEXT: flw fa0, 112(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 176(sp) -; RV32-NEXT: flw fa0, 108(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 172(sp) -; RV32-NEXT: flw fa0, 104(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 168(sp) -; RV32-NEXT: flw fa0, 100(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 164(sp) -; RV32-NEXT: flw fa0, 96(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 160(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 128(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 140(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 136(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 132(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 7 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 156(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 6 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 152(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 5 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 148(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 4 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexpf -; RV32-NEXT: fsw fa0, 144(sp) -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: addi sp, s0, -272 -; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 272 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v16f32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -272 -; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill -; RV64-NEXT: addi s0, sp, 272 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: addi a1, sp, 240 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, sp, 64 -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: flw fa0, 124(sp) -; RV64-NEXT: sext.w s2, a0 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 188(sp) -; RV64-NEXT: flw fa0, 120(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 184(sp) -; RV64-NEXT: flw fa0, 116(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 180(sp) -; RV64-NEXT: flw fa0, 112(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 176(sp) -; RV64-NEXT: flw fa0, 108(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 172(sp) -; RV64-NEXT: flw fa0, 104(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 168(sp) -; RV64-NEXT: flw fa0, 100(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 164(sp) -; RV64-NEXT: flw fa0, 96(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 160(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 128(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 3 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 140(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 136(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 132(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 7 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 156(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 6 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 152(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 5 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 148(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 4 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexpf -; RV64-NEXT: fsw fa0, 144(sp) -; RV64-NEXT: addi a0, sp, 128 -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: addi sp, s0, -272 -; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 272 -; RV64-NEXT: ret - %a = call <16 x float> @llvm.ldexp.v16f32.i32(<16 x float> %x, i32 %y) - ret <16 x float> %a -} -declare <16 x float> @llvm.ldexp.v16f32.i32(<16 x float>, i32) - -define <1 x double> @ldexp_v1f64(<1 x double> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v1f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: call ldexp -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vfmv.s.f v8, fa0 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v1f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: call ldexp -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vfmv.s.f v8, fa0 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret - %a = call <1 x double> @llvm.ldexp.v1f64.i32(<1 x double> %x, i32 %y) - ret <1 x double> %a -} -declare <1 x double> @llvm.ldexp.v1f64.i32(<1 x double>, i32) - -define <2 x double> @ldexp_v2f64(<2 x double> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v2f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 1 -; RV32-NEXT: vfmv.f.s fa0, v9 -; RV32-NEXT: call ldexp -; RV32-NEXT: fmv.d fs0, fa0 -; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vfmv.v.f v8, fa0 -; RV32-NEXT: vfslide1down.vf v8, v8, fs0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v2f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: addi a1, sp, 32 -; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s0, a0 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 1 -; RV64-NEXT: vfmv.f.s fa0, v9 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexp -; RV64-NEXT: fmv.d fs0, fa0 -; RV64-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexp -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vfmv.v.f v8, fa0 -; RV64-NEXT: vfslide1down.vf v8, v8, fs0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: ret - %a = call <2 x double> @llvm.ldexp.v2f64.i32(<2 x double> %x, i32 %y) - ret <2 x double> %a -} -declare <2 x double> @llvm.ldexp.v2f64.i32(<2 x double>, i32) - -define <4 x double> @ldexp_v4f64(<4 x double> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 1 -; RV32-NEXT: vfmv.f.s fa0, v10 -; RV32-NEXT: call ldexp -; RV32-NEXT: fmv.d fs0, fa0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vfmv.v.f v8, fa0 -; RV32-NEXT: vfslide1down.vf v8, v8, fs0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexp -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s0 -; RV32-NEXT: call ldexp -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vfslide1down.vf v8, v8, fa0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: sext.w s0, a0 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 1 -; RV64-NEXT: vfmv.f.s fa0, v10 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexp -; RV64-NEXT: fmv.d fs0, fa0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexp -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vfmv.v.f v8, fa0 -; RV64-NEXT: vfslide1down.vf v8, v8, fs0 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexp -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vslidedown.vi v8, v8, 3 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s0 -; RV64-NEXT: call ldexp -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vfslide1down.vf v8, v8, fa0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 -; RV64-NEXT: ret - %a = call <4 x double> @llvm.ldexp.v4f64.i32(<4 x double> %x, i32 %y) - ret <4 x double> %a -} -declare <4 x double> @llvm.ldexp.v4f64.i32(<4 x double>, i32) - -define <8 x double> @ldexp_v8f64(<8 x double> %x, i32 %y) nounwind { -; RV32-LABEL: ldexp_v8f64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -272 -; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill -; RV32-NEXT: addi s0, sp, 272 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: mv s2, a0 -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vse64.v v8, (a0) -; RV32-NEXT: fld fa0, 120(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexp -; RV32-NEXT: fsd fa0, 184(sp) -; RV32-NEXT: fld fa0, 112(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexp -; RV32-NEXT: fsd fa0, 176(sp) -; RV32-NEXT: fld fa0, 104(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexp -; RV32-NEXT: fsd fa0, 168(sp) -; RV32-NEXT: fld fa0, 96(sp) -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexp -; RV32-NEXT: fsd fa0, 160(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexp -; RV32-NEXT: fsd fa0, 128(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexp -; RV32-NEXT: fsd fa0, 136(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexp -; RV32-NEXT: fsd fa0, 152(sp) -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 2 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call ldexp -; RV32-NEXT: fsd fa0, 144(sp) -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi sp, s0, -272 -; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 272 -; RV32-NEXT: ret -; -; RV64-LABEL: ldexp_v8f64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -272 -; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill -; RV64-NEXT: addi s0, sp, 272 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: addi a1, sp, 240 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, sp, 64 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: fld fa0, 120(sp) -; RV64-NEXT: sext.w s2, a0 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexp -; RV64-NEXT: fsd fa0, 184(sp) -; RV64-NEXT: fld fa0, 112(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexp -; RV64-NEXT: fsd fa0, 176(sp) -; RV64-NEXT: fld fa0, 104(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexp -; RV64-NEXT: fsd fa0, 168(sp) -; RV64-NEXT: fld fa0, 96(sp) -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexp -; RV64-NEXT: fsd fa0, 160(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexp -; RV64-NEXT: fsd fa0, 128(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexp -; RV64-NEXT: fsd fa0, 136(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 3 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexp -; RV64-NEXT: fsd fa0, 152(sp) -; RV64-NEXT: addi a0, sp, 240 -; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call ldexp -; RV64-NEXT: fsd fa0, 144(sp) -; RV64-NEXT: addi a0, sp, 128 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, s0, -272 -; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 272 -; RV64-NEXT: ret - %a = call <8 x double> @llvm.ldexp.v8f64.i32(<8 x double> %x, i32 %y) - ret <8 x double> %a -} -declare <8 x double> @llvm.ldexp.v8f64.i32(<8 x double>, i32) From 71a396e9ef7ccbf7521ec43e73f23b63be2ade1a Mon Sep 17 00:00:00 2001 From: yangzhaoxin Date: Tue, 10 Dec 2024 09:45:12 +0800 Subject: [PATCH 7/7] Add comment. --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index d7b8587dc26f7..71aba323bf04b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2585,6 +2585,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { : RTLIB::getLDEXP(N->getValueType(0)); if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + // Scalarize vector FPOWI instead of promoting the type. This allows the + // scalar FPOWIs to be visited and converted to libcalls before promoting + // the type. + // FIXME: This should be done in LegalizeVectorOps/LegalizeDAG, but call + // lowering needs the unpromoted EVT. if (IsPowI && N->getValueType(0).isVector()) return DAG.UnrollVectorOp(N); SmallVector NewOps(N->ops());