diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e8d1ac1d3a916..87c839b458e6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11549,30 +11549,32 @@ bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, unsigned OperandNo) { // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). // TODO: Target-specific opcodes could be added. - if (auto *Const = isConstOrConstSplat(V)) { + if (auto *ConstV = isConstOrConstSplat(V, /*AllowUndefs*/ false, + /*AllowTruncation*/ true)) { + APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits()); switch (Opcode) { case ISD::ADD: case ISD::OR: case ISD::XOR: case ISD::UMAX: - return Const->isZero(); + return Const.isZero(); case ISD::MUL: - return Const->isOne(); + return Const.isOne(); case ISD::AND: case ISD::UMIN: - return Const->isAllOnes(); + return Const.isAllOnes(); case ISD::SMAX: - return Const->isMinSignedValue(); + return Const.isMinSignedValue(); case ISD::SMIN: - return Const->isMaxSignedValue(); + return Const.isMaxSignedValue(); case ISD::SUB: case ISD::SHL: case ISD::SRA: case ISD::SRL: - return OperandNo == 1 && Const->isZero(); + return OperandNo == 1 && Const.isZero(); case ISD::UDIV: case ISD::SDIV: - return OperandNo == 1 && Const->isOne(); + return OperandNo == 1 && Const.isOne(); } } else if (auto *ConstFP = isConstOrConstSplatFP(V)) { switch (Opcode) { diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index bafa92e06834a..65d0768c60885 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -18,14 +18,12 @@ define i32 @ctz_nxv4i32( %a) #0 { ; RV32-NEXT: vmsne.vi v0, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32-NEXT: vand.vv v8, v11, v8 +; RV32-NEXT: vmerge.vvm v8, v8, v11, v0 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: ctz_nxv4i32: @@ -41,14 +39,12 @@ define i32 @ctz_nxv4i32( %a) #0 { ; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vand.vv v8, v11, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v11, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addiw a1, a1, -1 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: subw a0, a0, a1 +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i32( %a, i1 0) ret i32 %res @@ -158,8 +154,7 @@ define i32 @ctz_nxv16i1( %pg, %a) { ; RV64-NEXT: li a1, -1 ; RV64-NEXT: vmadd.vx v16, a1, v8 ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vand.vv v8, v16, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: subw a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll new file mode 100644 index 0000000000000..3a8d08f306a51 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +; The following binop x, (zext i1) tests will be vector-legalized into a vselect +; of two splat_vectors, but on RV64 the splat value will be implicitly +; truncated: +; +; t15: nxv2i32 = splat_vector Constant:i64<1> +; t13: nxv2i32 = splat_vector Constant:i64<0> +; t16: nxv2i32 = vselect t2, t15, t13 +; t7: nxv2i32 = add t4, t16 +; +; Make sure that foldSelectWithIdentityConstant in DAGCombiner.cpp handles the +; truncating splat, so we pull the vselect back and fold it into a mask. + +define @i1_zext_add( %a, %b) { +; CHECK-LABEL: i1_zext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %add = add %b, %zext + ret %add +} + +define @i1_zext_add_commuted( %a, %b) { +; CHECK-LABEL: i1_zext_add_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %add = add %zext, %b + ret %add +} + +define @i1_zext_sub( %a, %b) { +; CHECK-LABEL: i1_zext_sub: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %sub = sub %b, %zext + ret %sub +} + +define @i1_zext_or( %a, %b) { +; CHECK-LABEL: i1_zext_or: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vor.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %or = or %b, %zext + ret %or +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll index e56dca0732bb4..a14ce71726153 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll @@ -149,49 +149,49 @@ define @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y } define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) { -; RV32-LABEL: vwop_vscale_sext_i1i32_multiple_users: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, mu -; RV32-NEXT: vlm.v v8, (a0) -; RV32-NEXT: vlm.v v9, (a1) -; RV32-NEXT: vlm.v v10, (a2) -; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vmerge.vim v12, v11, -1, v0 -; RV32-NEXT: vmv.v.v v0, v9 -; RV32-NEXT: vmerge.vim v9, v11, -1, v0 -; RV32-NEXT: vmv.v.v v0, v10 -; RV32-NEXT: vmerge.vim v10, v11, -1, v0 -; RV32-NEXT: vmul.vv v9, v12, v9 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsub.vv v11, v12, v10 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vsub.vx v10, v10, a0, v0.t -; RV32-NEXT: vor.vv v8, v9, v10 -; RV32-NEXT: vor.vv v8, v8, v11 -; RV32-NEXT: ret +; NO_FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: +; NO_FOLDING: # %bb.0: +; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; NO_FOLDING-NEXT: vlm.v v8, (a0) +; NO_FOLDING-NEXT: vlm.v v9, (a1) +; NO_FOLDING-NEXT: vlm.v v10, (a2) +; NO_FOLDING-NEXT: vmv.v.i v11, 0 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v9 +; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v10 +; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 +; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 +; NO_FOLDING-NEXT: li a0, 1 +; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v9, v10 +; NO_FOLDING-NEXT: vor.vv v8, v8, v11 +; NO_FOLDING-NEXT: ret ; -; RV64-LABEL: vwop_vscale_sext_i1i32_multiple_users: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; RV64-NEXT: vlm.v v8, (a0) -; RV64-NEXT: vlm.v v9, (a1) -; RV64-NEXT: vlm.v v10, (a2) -; RV64-NEXT: vmv.v.i v11, 0 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v12, v11, -1, v0 -; RV64-NEXT: vmv.v.v v0, v9 -; RV64-NEXT: vmerge.vim v9, v11, -1, v0 -; RV64-NEXT: vmv.v.v v0, v10 -; RV64-NEXT: vmerge.vim v10, v11, -1, v0 -; RV64-NEXT: vmul.vv v9, v12, v9 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v11, 1, v0 -; RV64-NEXT: vsub.vv v8, v10, v8 -; RV64-NEXT: vsub.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: ret +; FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: +; FOLDING: # %bb.0: +; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; FOLDING-NEXT: vlm.v v8, (a0) +; FOLDING-NEXT: vlm.v v9, (a1) +; FOLDING-NEXT: vlm.v v10, (a2) +; FOLDING-NEXT: vmv.v.i v11, 0 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 +; FOLDING-NEXT: vmv.v.v v0, v9 +; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; FOLDING-NEXT: vmv.v.v v0, v10 +; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 +; FOLDING-NEXT: vmul.vv v9, v12, v9 +; FOLDING-NEXT: li a0, 1 +; FOLDING-NEXT: vsub.vv v11, v12, v10 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vor.vv v8, v9, v10 +; FOLDING-NEXT: vor.vv v8, v8, v11 +; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y %b2 = load , ptr %z @@ -209,7 +209,7 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) { ; NO_FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: ; NO_FOLDING: # %bb.0: -; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; NO_FOLDING-NEXT: vlm.v v8, (a0) ; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vlm.v v10, (a2) @@ -221,17 +221,17 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING-NEXT: vmv1r.v v0, v10 ; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 +; NO_FOLDING-NEXT: li a0, 1 +; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v8, v11, 1, v0 -; NO_FOLDING-NEXT: vsub.vv v8, v10, v8 -; NO_FOLDING-NEXT: vsub.vv v10, v12, v10 -; NO_FOLDING-NEXT: vor.vv v8, v9, v8 -; NO_FOLDING-NEXT: vor.vv v8, v8, v10 +; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v9, v10 +; NO_FOLDING-NEXT: vor.vv v8, v8, v11 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; FOLDING-NEXT: vlm.v v8, (a0) ; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vlm.v v10, (a2) @@ -243,12 +243,12 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; FOLDING-NEXT: vmv1r.v v0, v10 ; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 ; FOLDING-NEXT: vmul.vv v9, v12, v9 +; FOLDING-NEXT: li a0, 1 +; FOLDING-NEXT: vsub.vv v11, v12, v10 ; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vmerge.vim v8, v11, 1, v0 -; FOLDING-NEXT: vsub.vv v8, v10, v8 -; FOLDING-NEXT: vsub.vv v10, v12, v10 -; FOLDING-NEXT: vor.vv v8, v9, v8 -; FOLDING-NEXT: vor.vv v8, v8, v10 +; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vor.vv v8, v9, v10 +; FOLDING-NEXT: vor.vv v8, v8, v11 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y @@ -444,41 +444,39 @@ define @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y } define @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) { -; RV32-LABEL: vwop_vscale_zext_i1i32_multiple_users: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, mu -; RV32-NEXT: vlm.v v0, (a0) -; RV32-NEXT: vlm.v v8, (a2) -; RV32-NEXT: vlm.v v9, (a1) -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vmerge.vim v11, v10, 1, v0 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v10, 1, v0 -; RV32-NEXT: vadd.vv v10, v11, v8 -; RV32-NEXT: vsub.vv v8, v11, v8 -; RV32-NEXT: vmv.v.v v0, v9 -; RV32-NEXT: vor.vv v10, v10, v11, v0.t -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret +; NO_FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: +; NO_FOLDING: # %bb.0: +; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; NO_FOLDING-NEXT: vlm.v v0, (a0) +; NO_FOLDING-NEXT: vlm.v v8, (a2) +; NO_FOLDING-NEXT: vlm.v v9, (a1) +; NO_FOLDING-NEXT: vmv.v.i v10, 0 +; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 +; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 +; NO_FOLDING-NEXT: vmv.v.v v0, v9 +; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v10, v8 +; NO_FOLDING-NEXT: ret ; -; RV64-LABEL: vwop_vscale_zext_i1i32_multiple_users: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; RV64-NEXT: vlm.v v0, (a0) -; RV64-NEXT: vlm.v v8, (a1) -; RV64-NEXT: vlm.v v9, (a2) -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vmerge.vim v11, v10, 1, v0 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v10, 1, v0 -; RV64-NEXT: vmv.v.v v0, v9 -; RV64-NEXT: vmerge.vim v9, v10, 1, v0 -; RV64-NEXT: vmul.vv v8, v11, v8 -; RV64-NEXT: vadd.vv v10, v11, v9 -; RV64-NEXT: vsub.vv v9, v11, v9 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: +; FOLDING: # %bb.0: +; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; FOLDING-NEXT: vlm.v v0, (a0) +; FOLDING-NEXT: vlm.v v8, (a2) +; FOLDING-NEXT: vlm.v v9, (a1) +; FOLDING-NEXT: vmv.v.i v10, 0 +; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v11, v8 +; FOLDING-NEXT: vsub.vv v8, v11, v8 +; FOLDING-NEXT: vmv.v.v v0, v9 +; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y %b2 = load , ptr %z @@ -496,40 +494,36 @@ define @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, define @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) { ; NO_FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: ; NO_FOLDING: # %bb.0: -; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; NO_FOLDING-NEXT: vlm.v v0, (a0) -; NO_FOLDING-NEXT: vlm.v v8, (a1) -; NO_FOLDING-NEXT: vlm.v v9, (a2) +; NO_FOLDING-NEXT: vlm.v v8, (a2) +; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vmv.v.i v10, 0 ; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 ; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 +; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 ; NO_FOLDING-NEXT: vmv1r.v v0, v9 -; NO_FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 -; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 -; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 -; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 -; NO_FOLDING-NEXT: vor.vv v8, v8, v10 -; NO_FOLDING-NEXT: vor.vv v8, v8, v9 +; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v10, v8 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; FOLDING-NEXT: vlm.v v0, (a0) -; FOLDING-NEXT: vlm.v v8, (a1) -; FOLDING-NEXT: vlm.v v9, (a2) +; FOLDING-NEXT: vlm.v v8, (a2) +; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vmv.v.i v10, 0 ; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 ; FOLDING-NEXT: vmv1r.v v0, v8 ; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v11, v8 +; FOLDING-NEXT: vsub.vv v8, v11, v8 ; FOLDING-NEXT: vmv1r.v v0, v9 -; FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 -; FOLDING-NEXT: vmul.vv v8, v11, v8 -; FOLDING-NEXT: vadd.vv v10, v11, v9 -; FOLDING-NEXT: vsub.vv v9, v11, v9 -; FOLDING-NEXT: vor.vv v8, v8, v10 -; FOLDING-NEXT: vor.vv v8, v8, v9 +; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; FOLDING-NEXT: vor.vv v8, v10, v8 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y @@ -594,3 +588,6 @@ define @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y, +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}}