From 055f4291952a15a66fdbd5b2c1545d4753088fed Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Mon, 28 Oct 2024 12:14:23 +0800 Subject: [PATCH] [RISCV] Set a barrier between mask producer and user of V0 Here we add a scheduling mutation in pre-ra scheduling, which will adds an artificial dependency edge between mask producer and its previous nearest instruction that uses V0 register. This prevents the overlap of live intervals of mask registers and as a consequence we can reduce some spills/moves. From the test changes, we can see some improvements and also some regressions (more vtype toggles). Partially fixes #113489. --- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 11 + llvm/lib/Target/RISCV/RISCVTargetMachine.h | 4 + .../RISCV/RISCVVectorMaskDAGMutation.cpp | 137 ++++++++++ .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll | 125 +++++---- .../RISCV/rvv/fixed-vectors-fmaximum.ll | 191 +++++++------- .../RISCV/rvv/fixed-vectors-fminimum-vp.ll | 125 +++++---- .../RISCV/rvv/fixed-vectors-fminimum.ll | 191 +++++++------- ...fixed-vectors-interleaved-access-zve32x.ll | 57 ++--- .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 240 +++++++++--------- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 180 +++++++------ .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 240 +++++++++--------- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 180 +++++++------ llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll | 104 ++++---- llvm/test/CodeGen/RISCV/rvv/vmfeq.ll | 72 +++--- llvm/test/CodeGen/RISCV/rvv/vmfge.ll | 72 +++--- llvm/test/CodeGen/RISCV/rvv/vmfgt.ll | 72 +++--- llvm/test/CodeGen/RISCV/rvv/vmfle.ll | 72 +++--- llvm/test/CodeGen/RISCV/rvv/vmflt.ll | 72 +++--- llvm/test/CodeGen/RISCV/rvv/vmfne.ll | 72 +++--- llvm/test/CodeGen/RISCV/rvv/vmseq.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmsge.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmsgt.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmsle.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmsleu.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmslt.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmsltu.ll | 106 ++++---- llvm/test/CodeGen/RISCV/rvv/vmsne.ll | 106 ++++---- 30 files changed, 1555 insertions(+), 1723 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index fd049d1a57860..b95ad9dd428cc 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -58,6 +58,7 @@ add_llvm_target(RISCVCodeGen RISCVTargetMachine.cpp RISCVTargetObjectFile.cpp RISCVTargetTransformInfo.cpp + RISCVVectorMaskDAGMutation.cpp RISCVVectorPeephole.cpp RISCVVLOptimizer.cpp RISCVZacasABIFix.cpp diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index fa507653264cc..c5847d8bfacb4 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -114,6 +114,11 @@ static cl::opt cl::desc("Enable the RISC-V VL Optimizer pass"), cl::init(false), cl::Hidden); +static cl::opt DisableVectorMaskMutation( + "riscv-disable-vector-mask-mutation", + cl::desc("Disable the vector mask scheduling mutation"), cl::init(false), + cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -367,6 +372,12 @@ class RISCVPassConfig : public TargetPassConfig { DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); } + + const RISCVSubtarget &ST = C->MF->getSubtarget(); + if (!DisableVectorMaskMutation && ST.hasVInstructions()) { + DAG = DAG ? DAG : createGenericSchedLive(C); + DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI)); + } return DAG; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h index d73447ef273e4..5506196c3c7e8 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h @@ -61,6 +61,10 @@ class RISCVTargetMachine : public CodeGenTargetMachineImpl { SMRange &SourceRange) const override; void registerPassBuilderCallbacks(PassBuilder &PB) override; }; + +std::unique_ptr +createRISCVVectorMaskDAGMutation(const TargetRegisterInfo *TRI); + } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp new file mode 100644 index 0000000000000..c48a97b12e43f --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp @@ -0,0 +1,137 @@ +//===- RISCVVectorMaskDAGMutation.cpp - RISC-V Vector Mask DAGMutation ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A schedule mutation that adds an artificial dependency between masks producer +// instructions and masked instructions, so that we can reduce the live range +// overlaps of mask registers. +// +// The reason why we need to do this: +// 1. When tracking register pressure, we don't track physical registers. +// 2. We have a RegisterClass for mask reigster (which is `VMV0`), but we don't +// use it in most RVV pseudos (only used in inline asm constraint and add/sub +// with carry instructions). Instead, we use physical register V0 directly +// and insert a `$v0 = COPY ...` before the use. And, there is a fundamental +// issue in register allocator when handling RegisterClass with only one +// physical register, so we can't simply replace V0 with VMV0. +// 3. For mask producers, we are using VR RegisterClass (we can allocate V0-V31 +// to it). So if V0 is not available, there are still 31 available registers +// out there. +// +// This means that the RegPressureTracker can't track the pressure of mask +// registers correctly. +// +// This schedule mutation is a workaround to fix this issue. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/RISCVBaseInfo.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCVRegisterInfo.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/TargetParser/RISCVTargetParser.h" + +#define DEBUG_TYPE "machine-scheduler" + +namespace llvm { + +static inline bool isVectorMaskProducer(const MachineInstr *MI) { + switch (RISCV::getRVVMCOpcode(MI->getOpcode())) { + // Vector Mask Instructions + case RISCV::VMAND_MM: + case RISCV::VMNAND_MM: + case RISCV::VMANDN_MM: + case RISCV::VMXOR_MM: + case RISCV::VMOR_MM: + case RISCV::VMNOR_MM: + case RISCV::VMORN_MM: + case RISCV::VMXNOR_MM: + case RISCV::VMSBF_M: + case RISCV::VMSIF_M: + case RISCV::VMSOF_M: + // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions + case RISCV::VMADC_VV: + case RISCV::VMADC_VX: + case RISCV::VMADC_VI: + case RISCV::VMADC_VVM: + case RISCV::VMADC_VXM: + case RISCV::VMADC_VIM: + case RISCV::VMSBC_VV: + case RISCV::VMSBC_VX: + case RISCV::VMSBC_VVM: + case RISCV::VMSBC_VXM: + // Vector Integer Compare Instructions + case RISCV::VMSEQ_VV: + case RISCV::VMSEQ_VX: + case RISCV::VMSEQ_VI: + case RISCV::VMSNE_VV: + case RISCV::VMSNE_VX: + case RISCV::VMSNE_VI: + case RISCV::VMSLT_VV: + case RISCV::VMSLT_VX: + case RISCV::VMSLTU_VV: + case RISCV::VMSLTU_VX: + case RISCV::VMSLE_VV: + case RISCV::VMSLE_VX: + case RISCV::VMSLE_VI: + case RISCV::VMSLEU_VV: + case RISCV::VMSLEU_VX: + case RISCV::VMSLEU_VI: + case RISCV::VMSGTU_VX: + case RISCV::VMSGTU_VI: + case RISCV::VMSGT_VX: + case RISCV::VMSGT_VI: + // Vector Floating-Point Compare Instructions + case RISCV::VMFEQ_VV: + case RISCV::VMFEQ_VF: + case RISCV::VMFNE_VV: + case RISCV::VMFNE_VF: + case RISCV::VMFLT_VV: + case RISCV::VMFLT_VF: + case RISCV::VMFLE_VV: + case RISCV::VMFLE_VF: + case RISCV::VMFGT_VF: + case RISCV::VMFGE_VF: + return true; + } + return false; +} + +class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation { +private: + const TargetRegisterInfo *TRI; + +public: + RISCVVectorMaskDAGMutation(const TargetRegisterInfo *TRI) : TRI(TRI) {} + + void apply(ScheduleDAGInstrs *DAG) override { + SUnit *NearestUseV0SU = nullptr; + for (SUnit &SU : DAG->SUnits) { + const MachineInstr *MI = SU.getInstr(); + if (MI->findRegisterUseOperand(RISCV::V0, TRI)) + NearestUseV0SU = &SU; + + if (NearestUseV0SU && NearestUseV0SU != &SU && isVectorMaskProducer(MI) && + // For LMUL=8 cases, there will be more possibilities to spill. + // FIXME: We should use RegPressureTracker to do fine-grained + // controls. + RISCVII::getLMul(MI->getDesc().TSFlags) != RISCVII::LMUL_8) + DAG->addEdge(&SU, SDep(NearestUseV0SU, SDep::Artificial)); + } + } +}; + +std::unique_ptr +createRISCVVectorMaskDAGMutation(const TargetRegisterInfo *TRI) { + return std::make_unique(TRI); +} + +} // namespace llvm diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index 9a3838d57a0b0..f43934afc370d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -52,24 +52,24 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -122,24 +122,24 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -194,25 +194,25 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -268,25 +268,25 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -318,11 +318,10 @@ define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -352,11 +351,10 @@ define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -388,11 +386,10 @@ define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -424,11 +421,10 @@ define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -458,11 +454,10 @@ define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -494,11 +489,10 @@ define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -530,11 +524,10 @@ define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll index 900e02876cbe1..e17ad303eddb8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -15,25 +15,25 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v2f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -48,25 +48,25 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v4f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -81,25 +81,25 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v8f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -114,25 +114,25 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v16f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -147,11 +147,10 @@ define <2 x float> @vfmax_v2f32_vv(<2 x float> %a, <2 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) ret <2 x float> %v @@ -164,11 +163,10 @@ define <4 x float> @vfmax_v4f32_vv(<4 x float> %a, <4 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) ret <4 x float> %v @@ -181,11 +179,10 @@ define <8 x float> @vfmax_v8f32_vv(<8 x float> %a, <8 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.maximum.v8f32(<8 x float> %a, <8 x float> %b) ret <8 x float> %v @@ -198,11 +195,10 @@ define <16 x float> @vfmax_v16f32_vv(<16 x float> %a, <16 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %a, <16 x float> %b) ret <16 x float> %v @@ -215,11 +211,10 @@ define <2 x double> @vfmax_v2f64_vv(<2 x double> %a, <2 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) ret <2 x double> %v @@ -232,11 +227,10 @@ define <4 x double> @vfmax_v4f64_vv(<4 x double> %a, <4 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %a, <4 x double> %b) ret <4 x double> %v @@ -249,11 +243,10 @@ define <8 x double> @vfmax_v8f64_vv(<8 x double> %a, <8 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.maximum.v8f64(<8 x double> %a, <8 x double> %b) ret <8 x double> %v @@ -302,32 +295,30 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv_nnana: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vfadd.vv v10, v8, v8 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9 -; ZVFH-NEXT: vmfeq.vv v8, v10, v10 -; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v8 -; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFH-NEXT: vfmax.vv v8, v11, v8 +; ZVFH-NEXT: vfadd.vv v8, v8, v8 +; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 +; ZVFH-NEXT: vfmax.vv v8, v10, v8 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11 +; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v11, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 +; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -341,13 +332,12 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv_nnanb: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vfadd.vv v10, v9, v9 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v9, v10, v10 -; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v9 -; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfadd.vv v9, v9, v9 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnanb: @@ -360,12 +350,11 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0 +; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 4a7f888fbced4..7067cc21ab56d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -52,24 +52,24 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -122,24 +122,24 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -194,25 +194,25 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -268,25 +268,25 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -318,11 +318,10 @@ define <2 x float> @vfmin_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -352,11 +351,10 @@ define <4 x float> @vfmin_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -388,11 +386,10 @@ define <8 x float> @vfmin_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -424,11 +421,10 @@ define <16 x float> @vfmin_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -458,11 +454,10 @@ define <2 x double> @vfmin_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -494,11 +489,10 @@ define <4 x double> @vfmin_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -530,11 +524,10 @@ define <8 x double> @vfmin_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll index db970c89d935c..1362055c4dabf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -15,25 +15,25 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v2f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -48,25 +48,25 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v4f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -81,25 +81,25 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v8f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -114,25 +114,25 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v16f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -147,11 +147,10 @@ define <2 x float> @vfmin_v2f32_vv(<2 x float> %a, <2 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) ret <2 x float> %v @@ -164,11 +163,10 @@ define <4 x float> @vfmin_v4f32_vv(<4 x float> %a, <4 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) ret <4 x float> %v @@ -181,11 +179,10 @@ define <8 x float> @vfmin_v8f32_vv(<8 x float> %a, <8 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.minimum.v8f32(<8 x float> %a, <8 x float> %b) ret <8 x float> %v @@ -198,11 +195,10 @@ define <16 x float> @vfmin_v16f32_vv(<16 x float> %a, <16 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.minimum.v16f32(<16 x float> %a, <16 x float> %b) ret <16 x float> %v @@ -215,11 +211,10 @@ define <2 x double> @vfmin_v2f64_vv(<2 x double> %a, <2 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b) ret <2 x double> %v @@ -232,11 +227,10 @@ define <4 x double> @vfmin_v4f64_vv(<4 x double> %a, <4 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.minimum.v4f64(<4 x double> %a, <4 x double> %b) ret <4 x double> %v @@ -249,11 +243,10 @@ define <8 x double> @vfmin_v8f64_vv(<8 x double> %a, <8 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.minimum.v8f64(<8 x double> %a, <8 x double> %b) ret <8 x double> %v @@ -302,32 +295,30 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv_nnana: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vfadd.vv v10, v8, v8 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9 -; ZVFH-NEXT: vmfeq.vv v8, v10, v10 -; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v8 -; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFH-NEXT: vfmin.vv v8, v11, v8 +; ZVFH-NEXT: vfadd.vv v8, v8, v8 +; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 +; ZVFH-NEXT: vfmin.vv v8, v10, v8 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnana: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11 +; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v11, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 +; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -341,13 +332,12 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv_nnanb: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vfadd.vv v10, v9, v9 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v9, v10, v10 -; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v9 -; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfadd.vv v9, v9, v9 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnanb: @@ -360,12 +350,11 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0 +; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll index 30e41f2f526e5..b65352aed2d52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -16,58 +16,53 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-NEXT: ld a7, 72(a0) ; ZVE32X-NEXT: ld a0, 80(a0) ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmv.s.x v9, zero -; ZVE32X-NEXT: vmv.v.i v10, 0 +; ZVE32X-NEXT: vmv.s.x v8, zero +; ZVE32X-NEXT: vmv.v.i v9, 0 ; ZVE32X-NEXT: xor a3, a3, a4 ; ZVE32X-NEXT: xor a1, a1, a2 ; ZVE32X-NEXT: xor a2, a5, a6 ; ZVE32X-NEXT: xor a0, a7, a0 ; ZVE32X-NEXT: snez a3, a3 ; ZVE32X-NEXT: snez a1, a1 -; ZVE32X-NEXT: snez a2, a2 -; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: vmv.s.x v8, a3 +; ZVE32X-NEXT: vmv.s.x v10, a3 ; ZVE32X-NEXT: vmv.s.x v11, a1 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v8, v8, 1 -; ZVE32X-NEXT: vand.vi v11, v11, 1 -; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: vmsne.vi v8, v11, 0 -; ZVE32X-NEXT: vmerge.vim v11, v9, 1, v0 -; ZVE32X-NEXT: vmv1r.v v0, v8 +; ZVE32X-NEXT: vand.vi v10, v10, 1 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 +; ZVE32X-NEXT: vand.vi v10, v11, 1 +; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 +; ZVE32X-NEXT: snez a1, a2 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmerge.vim v8, v10, 1, v0 +; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v8, v11, 1 -; ZVE32X-NEXT: vmv.s.x v11, a2 -; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v11, v11, 1 +; ZVE32X-NEXT: vslideup.vi v10, v11, 1 +; ZVE32X-NEXT: vmv.s.x v11, a1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v8, 0 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v8, v11, 0 +; ZVE32X-NEXT: vand.vi v10, v11, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmerge.vim v11, v10, 1, v0 -; ZVE32X-NEXT: vmv1r.v v0, v8 +; ZVE32X-NEXT: vmerge.vim v11, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 +; ZVE32X-NEXT: snez a0, a0 +; ZVE32X-NEXT: vmerge.vim v10, v8, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v11, v8, 2 -; ZVE32X-NEXT: vmv.s.x v8, a0 -; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v8, v8, 1 +; ZVE32X-NEXT: vslideup.vi v11, v10, 2 +; ZVE32X-NEXT: vmv.s.x v10, a0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v11, 0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v8, v8, 0 +; ZVE32X-NEXT: vand.vi v10, v10, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmerge.vim v10, v10, 1, v0 -; ZVE32X-NEXT: vmv1r.v v0, v8 +; ZVE32X-NEXT: vmerge.vim v9, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 -; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vslideup.vi v10, v8, 3 ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 +; ZVE32X-NEXT: vmerge.vim v8, v8, 1, v0 +; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vslideup.vi v9, v8, 3 +; ZVE32X-NEXT: vmsne.vi v0, v9, 0 ; ZVE32X-NEXT: ret ; ; ZVE64X-LABEL: load_large_vector: diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index 734dd5e33c4fc..a1cdbd4be2579 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -22,15 +22,16 @@ define @vfmax_nxv1bf16_vv( %a, @vfmax_nxv2bf16_vv( %a, @vfmax_nxv4bf16_vv( %a, @vfmax_nxv8bf16_vv( %a, @vfmax_nxv1f16_vv( %a, @vfmax_nxv2f16_vv( %a, @vfmax_nxv4f16_vv( %a, @vfmax_nxv8f16_vv( %a, @vfmax_nxv16f16_vv( %a, @vfmax_nxv1f32_vv( %a, @llvm.maximum.nxv1f32( %a, %b) ret %v @@ -591,11 +593,10 @@ define @vfmax_nxv2f32_vv( %a, @llvm.maximum.nxv2f32( %a, %b) ret %v @@ -608,11 +609,10 @@ define @vfmax_nxv4f32_vv( %a, @llvm.maximum.nxv4f32( %a, %b) ret %v @@ -625,11 +625,10 @@ define @vfmax_nxv8f32_vv( %a, @llvm.maximum.nxv8f32( %a, %b) ret %v @@ -659,11 +658,10 @@ define @vfmax_nxv1f64_vv( %a, @llvm.maximum.nxv1f64( %a, %b) ret %v @@ -676,11 +674,10 @@ define @vfmax_nxv2f64_vv( %a, @llvm.maximum.nxv2f64( %a, %b) ret %v @@ -693,11 +690,10 @@ define @vfmax_nxv4f64_vv( %a, @llvm.maximum.nxv4f64( %a, %b) ret %v @@ -755,19 +751,18 @@ define @vfmax_nxv1f16_vv_nnana( %a, @vfmax_nxv1f16_vv_nnanb( %a, @vfmax_vv_nxv1bf16_unmasked( % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v11, v11 -; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: vfmax.vv v9, v8, v9 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 @@ -91,13 +92,14 @@ define @vfmax_vv_nxv2bf16_unmasked( % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v11, v11 -; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: vfmax.vv v9, v8, v9 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 @@ -139,14 +141,15 @@ define @vfmax_vv_nxv4bf16_unmasked( % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v12, v12 -; CHECK-NEXT: vmerge.vvm v14, v10, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0 -; CHECK-NEXT: vfmax.vv v10, v8, v14 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK-NEXT: vfmax.vv v10, v10, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret @@ -187,14 +190,15 @@ define @vfmax_vv_nxv8bf16_unmasked( % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vmfeq.vv v8, v16, v16 -; CHECK-NEXT: vmerge.vvm v20, v12, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0 -; CHECK-NEXT: vfmax.vv v12, v8, v20 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK-NEXT: vfmax.vv v12, v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret @@ -604,24 +608,24 @@ define @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -674,24 +678,24 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -746,25 +750,25 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -820,25 +824,25 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -909,11 +913,10 @@ define @vfmax_vv_nxv16f16_unmasked( %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v16, v12, v12 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v20 +; ZVFH-NEXT: vfmax.vv v8, v8, v16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16_unmasked: @@ -1296,11 +1299,10 @@ define @vfmax_vv_nxv1f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1330,11 +1332,10 @@ define @vfmax_vv_nxv2f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1366,11 +1367,10 @@ define @vfmax_vv_nxv4f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1402,11 +1402,10 @@ define @vfmax_vv_nxv8f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1436,11 +1435,10 @@ define @vfmax_vv_nxv1f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1472,11 +1470,10 @@ define @vfmax_vv_nxv2f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1508,11 +1505,10 @@ define @vfmax_vv_nxv4f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index 21251ee2f3c63..d41da7b6a2af9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -22,15 +22,16 @@ define @vfmin_nxv1bf16_vv( %a, @vfmin_nxv2bf16_vv( %a, @vfmin_nxv4bf16_vv( %a, @vfmin_nxv8bf16_vv( %a, @vfmin_nxv1f16_vv( %a, @vfmin_nxv2f16_vv( %a, @vfmin_nxv4f16_vv( %a, @vfmin_nxv8f16_vv( %a, @vfmin_nxv16f16_vv( %a, @vfmin_nxv1f32_vv( %a, @llvm.minimum.nxv1f32( %a, %b) ret %v @@ -591,11 +593,10 @@ define @vfmin_nxv2f32_vv( %a, @llvm.minimum.nxv2f32( %a, %b) ret %v @@ -608,11 +609,10 @@ define @vfmin_nxv4f32_vv( %a, @llvm.minimum.nxv4f32( %a, %b) ret %v @@ -625,11 +625,10 @@ define @vfmin_nxv8f32_vv( %a, @llvm.minimum.nxv8f32( %a, %b) ret %v @@ -659,11 +658,10 @@ define @vfmin_nxv1f64_vv( %a, @llvm.minimum.nxv1f64( %a, %b) ret %v @@ -676,11 +674,10 @@ define @vfmin_nxv2f64_vv( %a, @llvm.minimum.nxv2f64( %a, %b) ret %v @@ -693,11 +690,10 @@ define @vfmin_nxv4f64_vv( %a, @llvm.minimum.nxv4f64( %a, %b) ret %v @@ -755,19 +751,18 @@ define @vfmin_nxv1f16_vv_nnana( %a, @vfmin_nxv1f16_vv_nnanb( %a, @vfmin_vv_nxv1bf16_unmasked( % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v11, v11 -; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: vfmin.vv v9, v8, v9 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 @@ -91,13 +92,14 @@ define @vfmin_vv_nxv2bf16_unmasked( % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v11, v11 -; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: vfmin.vv v9, v8, v9 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 @@ -139,14 +141,15 @@ define @vfmin_vv_nxv4bf16_unmasked( % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v12, v12 -; CHECK-NEXT: vmerge.vvm v14, v10, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0 -; CHECK-NEXT: vfmin.vv v10, v8, v14 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK-NEXT: vfmin.vv v10, v10, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret @@ -187,14 +190,15 @@ define @vfmin_vv_nxv8bf16_unmasked( % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vmfeq.vv v8, v16, v16 -; CHECK-NEXT: vmerge.vvm v20, v12, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0 -; CHECK-NEXT: vfmin.vv v12, v8, v20 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK-NEXT: vfmin.vv v12, v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret @@ -604,24 +608,24 @@ define @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -674,24 +678,24 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -746,25 +750,25 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -820,25 +824,25 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -909,11 +913,10 @@ define @vfmin_vv_nxv16f16_unmasked( %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v16, v12, v12 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v20 +; ZVFH-NEXT: vfmin.vv v8, v8, v16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16_unmasked: @@ -1296,11 +1299,10 @@ define @vfmin_vv_nxv1f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1330,11 +1332,10 @@ define @vfmin_vv_nxv2f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1366,11 +1367,10 @@ define @vfmin_vv_nxv4f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1402,11 +1402,10 @@ define @vfmin_vv_nxv8f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1436,11 +1435,10 @@ define @vfmin_vv_nxv1f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1472,11 +1470,10 @@ define @vfmin_vv_nxv2f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -1508,11 +1505,10 @@ define @vfmin_vv_nxv4f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll index 371055704c090..03b090def5119 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -12,15 +12,14 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 ; CHECK-NEXT: ret %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) ret <2 x i64> %tmp @@ -32,14 +31,13 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp @@ -51,14 +49,13 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 ; CHECK-NEXT: ret %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) ret <8 x i16> %tmp @@ -70,14 +67,13 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 ; CHECK-NEXT: ret %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) ret <16 x i8> %tmp @@ -94,15 +90,14 @@ define @vec_nxv2i64( %x, ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v10, v14, v10 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv2i64( %x, %y) ret %tmp @@ -114,14 +109,13 @@ define @vec_nxv4i32( %x, ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v10, v14, v10 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv4i32( %x, %y) ret %tmp @@ -133,14 +127,13 @@ define @vec_nxv8i16( %x, ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v10, v14, v10 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv8i16( %x, %y) ret %tmp @@ -152,14 +145,13 @@ define @vec_nxv16i8( %x, ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v10, v14, v10 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv16i8( %x, %y) ret %tmp diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll index 9ca78c872befd..3ebfc68ddee4b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfeq.mask.nxv1f16( define @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfeq.mask.nxv2f16( define @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfeq.mask.nxv4f16( define @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfeq.mask.nxv8f16( define @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfeq.mask.nxv16f16( define @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfeq.mask.nxv1f32( define @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfeq.mask.nxv2f32( define @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfeq.mask.nxv4f32( define @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfeq.mask.nxv8f32( define @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfeq.mask.nxv1f64( define @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfeq.mask.nxv2f64( define @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfeq.mask.nxv4f64( define @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll index 7cf18a7015812..e041e5874a8dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f16( define @intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfge.mask.nxv2f16( define @intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfge.mask.nxv4f16( define @intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfge.mask.nxv8f16( define @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfge.mask.nxv16f16( define @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f32( define @intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfge.mask.nxv2f32( define @intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfge.mask.nxv4f32( define @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfge.mask.nxv8f32( define @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f64( define @intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfge.mask.nxv2f64( define @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfge.mask.nxv4f64( define @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll index b78f2da4ae254..0faaf4ebf255d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f16( define @intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfgt.mask.nxv2f16( define @intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfgt.mask.nxv4f16( define @intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfgt.mask.nxv8f16( define @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfgt.mask.nxv16f16( define @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f32( define @intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfgt.mask.nxv2f32( define @intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfgt.mask.nxv4f32( define @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfgt.mask.nxv8f32( define @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f64( define @intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfgt.mask.nxv2f64( define @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfgt.mask.nxv4f64( define @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll index 940e4d043f63f..ef5de6bc3481f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f16( define @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfle.mask.nxv2f16( define @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfle.mask.nxv4f16( define @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfle.mask.nxv8f16( define @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfle.mask.nxv16f16( define @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f32( define @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfle.mask.nxv2f32( define @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfle.mask.nxv4f32( define @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfle.mask.nxv8f32( define @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f64( define @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfle.mask.nxv2f64( define @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfle.mask.nxv4f64( define @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll index 10ddfb8f014ed..0b7740d5e0045 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f16( define @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmflt.mask.nxv2f16( define @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmflt.mask.nxv4f16( define @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmflt.mask.nxv8f16( define @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmflt.mask.nxv16f16( define @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f32( define @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmflt.mask.nxv2f32( define @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmflt.mask.nxv4f32( define @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmflt.mask.nxv8f32( define @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f64( define @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmflt.mask.nxv2f64( define @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmflt.mask.nxv4f64( define @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll index 4d8a95de1d3de..65a04e504a973 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f16( define @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfne.mask.nxv2f16( define @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfne.mask.nxv4f16( define @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfne.mask.nxv8f16( define @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfne.mask.nxv16f16( define @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f32( define @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfne.mask.nxv2f32( define @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfne.mask.nxv4f32( define @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfne.mask.nxv8f32( define @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f64( define @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfne.mask.nxv2f64( define @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfne.mask.nxv4f64( define @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll index c5769e0d1e519..1fd2383c40d18 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i8( define @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i8( define @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmseq.mask.nxv4i8( define @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmseq.mask.nxv8i8( define @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmseq.mask.nxv16i8( define @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmseq.mask.nxv32i8( define @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i16( define @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i16( define @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmseq.mask.nxv4i16( define @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmseq.mask.nxv8i16( define @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmseq.mask.nxv16i16( define @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i32( define @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i32( define @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmseq.mask.nxv4i32( define @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmseq.mask.nxv8i32( define @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i64( define @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmseq.mask.nxv2i64( define @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmseq.mask.nxv4i64( define @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index e6d775dee5b22..2dc133d169f0a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i8( define @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i8( define @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsge.mask.nxv4i8( define @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsge.mask.nxv8i8( define @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsge.mask.nxv16i8( define @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsge.mask.nxv32i8( define @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i16( define @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i16( define @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsge.mask.nxv4i16( define @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsge.mask.nxv8i16( define @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsge.mask.nxv16i16( define @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i32( define @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i32( define @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsge.mask.nxv4i32( define @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsge.mask.nxv8i32( define @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i64( define @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsge.mask.nxv2i64( define @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsge.mask.nxv4i64( define @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll index 68aa912ae42ea..69a3835cd4d67 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i8( define @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i8( define @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i8( define @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i8( define @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i8( define @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv32i8( define @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i16( define @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i16( define @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i16( define @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i16( define @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i16( define @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i32( define @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i32( define @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i32( define @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i32( define @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i64( define @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i64( define @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i64( define @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll index 98a03a2c56280..d7dee2e1bc580 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i8( define @intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i8( define @intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgt.mask.nxv4i8( define @intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgt.mask.nxv8i8( define @intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgt.mask.nxv16i8( define @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgt.mask.nxv32i8( define @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i16( define @intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i16( define @intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgt.mask.nxv4i16( define @intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgt.mask.nxv8i16( define @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgt.mask.nxv16i16( define @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i32( define @intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i32( define @intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgt.mask.nxv4i32( define @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgt.mask.nxv8i32( define @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i64( define @intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgt.mask.nxv2i64( define @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgt.mask.nxv4i64( define @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index 69b22573c289e..fe9d522f6b401 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i8( define @intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i8( define @intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i8( define @intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i8( define @intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i8( define @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv32i8( define @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i16( define @intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i16( define @intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i16( define @intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i16( define @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i16( define @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i32( define @intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i32( define @intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i32( define @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i32( define @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i64( define @intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i64( define @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i64( define @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll index c8794e1b63900..bc98b31957b25 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i8( define @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i8( define @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsle.mask.nxv4i8( define @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsle.mask.nxv8i8( define @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsle.mask.nxv16i8( define @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsle.mask.nxv32i8( define @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i16( define @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i16( define @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsle.mask.nxv4i16( define @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsle.mask.nxv8i16( define @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsle.mask.nxv16i16( define @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i32( define @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i32( define @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsle.mask.nxv4i32( define @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsle.mask.nxv8i32( define @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i64( define @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsle.mask.nxv2i64( define @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsle.mask.nxv4i64( define @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index 86dc48d51cc2b..731989cfe15d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i8( define @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i8( define @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsleu.mask.nxv4i8( define @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsleu.mask.nxv8i8( define @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsleu.mask.nxv16i8( define @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsleu.mask.nxv32i8( define @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i16( define @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i16( define @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsleu.mask.nxv4i16( define @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsleu.mask.nxv8i16( define @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsleu.mask.nxv16i16( define @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i32( define @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i32( define @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsleu.mask.nxv4i32( define @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsleu.mask.nxv8i32( define @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i64( define @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsleu.mask.nxv2i64( define @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsleu.mask.nxv4i64( define @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 8d57f2adc5386..407f85b4f5996 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i8( define @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i8( define @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmslt.mask.nxv4i8( define @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmslt.mask.nxv8i8( define @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmslt.mask.nxv16i8( define @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmslt.mask.nxv32i8( define @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i16( define @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i16( define @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmslt.mask.nxv4i16( define @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmslt.mask.nxv8i16( define @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmslt.mask.nxv16i16( define @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i32( define @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i32( define @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmslt.mask.nxv4i32( define @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmslt.mask.nxv8i32( define @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i64( define @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmslt.mask.nxv2i64( define @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmslt.mask.nxv4i64( define @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index 627b594308713..e051b332018fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i8( define @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i8( define @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsltu.mask.nxv4i8( define @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsltu.mask.nxv8i8( define @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsltu.mask.nxv16i8( define @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsltu.mask.nxv32i8( define @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i16( define @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i16( define @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsltu.mask.nxv4i16( define @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsltu.mask.nxv8i16( define @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsltu.mask.nxv16i16( define @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i32( define @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i32( define @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsltu.mask.nxv4i32( define @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsltu.mask.nxv8i32( define @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i64( define @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsltu.mask.nxv2i64( define @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsltu.mask.nxv4i64( define @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index 47d1048f46cab..1e21b847ed20d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i8( define @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i8( define @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsne.mask.nxv4i8( define @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsne.mask.nxv8i8( define @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsne.mask.nxv16i8( define @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsne.mask.nxv32i8( define @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i16( define @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i16( define @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsne.mask.nxv4i16( define @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsne.mask.nxv8i16( define @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsne.mask.nxv16i16( define @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i32( define @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i32( define @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsne.mask.nxv4i32( define @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsne.mask.nxv8i32( define @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i64( define @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsne.mask.nxv2i64( define @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsne.mask.nxv4i64( define @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i64(