diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index b402e82376276..2655e8428309d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -376,6 +376,15 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { assert(OldVT == VecVT.getVectorElementType() && "BUILD_VECTOR operand type doesn't match vector element type!"); + if (VecVT.isInteger() && TLI.isOperationLegal(ISD::SPLAT_VECTOR, VecVT) && + TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR_PARTS, VecVT)) { + if (SDValue V = cast(N)->getSplatValue()) { + SDValue Lo, Hi; + GetExpandedOp(V, Lo, Hi); + return DAG.getNode(ISD::SPLAT_VECTOR_PARTS, dl, VecVT, Lo, Hi); + } + } + // Build a vector of twice the length out of the expanded elements. // For example <3 x i64> -> <6 x i32>. SmallVector NewElts; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index 901be442c0012..d52cbb54c4b2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -14,9 +14,11 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: mv a0, sp +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -669,9 +671,11 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: mv a0, sp +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index ad075e4b4e198..2f20caa6eb189 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -397,43 +397,22 @@ define void @masked_load_v32i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { declare <32 x i32> @llvm.masked.load.v32i32(ptr, i32, <32 x i1>, <32 x i32>) define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { -; RV32-LABEL: masked_load_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: addi a3, a1, 128 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v0, (a1) -; RV32-NEXT: vle64.v v24, (a3) -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v8, v0, v16 -; RV32-NEXT: vmseq.vv v0, v24, v16 -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: vle64.v v16, (a1), v0.t -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vle64.v v8, (a0), v0.t -; RV32-NEXT: vse64.v v8, (a2) -; RV32-NEXT: addi a0, a2, 128 -; RV32-NEXT: vse64.v v16, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: masked_load_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a3, a1, 128 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v16, (a1) -; RV64-NEXT: vle64.v v24, (a3) -; RV64-NEXT: vmseq.vi v8, v16, 0 -; RV64-NEXT: vmseq.vi v0, v24, 0 -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: vle64.v v16, (a1), v0.t -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vle64.v v8, (a0), v0.t -; RV64-NEXT: vse64.v v8, (a2) -; RV64-NEXT: addi a0, a2, 128 -; RV64-NEXT: vse64.v v16, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: masked_load_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, a1, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v24, (a3) +; CHECK-NEXT: vmseq.vi v8, v16, 0 +; CHECK-NEXT: vmseq.vi v0, v24, 0 +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v16, (a1), v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: vse64.v v8, (a2) +; CHECK-NEXT: addi a0, a2, 128 +; CHECK-NEXT: vse64.v v16, (a0) +; CHECK-NEXT: ret %m = load <32 x i64>, ptr %m_ptr %mask = icmp eq <32 x i64> %m, zeroinitializer %load = call <32 x i64> @llvm.masked.load.v32i64(ptr %a, i32 8, <32 x i1> %mask, <32 x i64> undef) @@ -547,3 +526,6 @@ define void @masked_load_v256i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ret void } declare <256 x i8> @llvm.masked.load.v256i8(ptr, i32, <256 x i1>, <256 x i8>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index 86c28247e97ef..90690bbc8e208 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -397,87 +397,44 @@ define void @masked_store_v32i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { declare void @llvm.masked.store.v32i32.p0(<32 x i32>, ptr, i32, <32 x i1>) define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { -; RV32-LABEL: masked_store_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: addi a3, a2, 128 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v24, (a2) -; RV32-NEXT: vle64.v v8, (a3) -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v7, v24, v8 -; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle64.v v24, (a2) -; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmseq.vv v0, v16, v8 -; RV32-NEXT: addi a0, a1, 128 -; RV32-NEXT: vse64.v v24, (a0), v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vse64.v v8, (a1), v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: masked_store_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 -; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a2) -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vle64.v v16, (a2) -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmseq.vi v8, v16, 0 -; RV64-NEXT: vse64.v v24, (a1), v0.t -; RV64-NEXT: addi a0, a1, 128 -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a0), v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: masked_store_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: addi a2, a2, 128 +; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vi v8, v16, 0 +; CHECK-NEXT: vse64.v v24, (a1), v0.t +; CHECK-NEXT: addi a0, a1, 128 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %m = load <32 x i64>, ptr %m_ptr %mask = icmp eq <32 x i64> %m, zeroinitializer %val = load <32 x i64>, ptr %val_ptr @@ -683,3 +640,6 @@ define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ret void } declare void @llvm.masked.store.v256i8.p0(<256 x i8>, ptr, i32, <256 x i1>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 5601bd5ee7a3a..805a3c640957b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1346,93 +1346,48 @@ define <16 x i64> @vadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vadd_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB108_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB108_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vadd_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB108_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB108_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vadd_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB108_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB108_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vadd_vi_v32i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB109_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB109_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: ret -; -; RV64-LABEL: vadd_vi_v32i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB109_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB109_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v16, v16, -1 -; RV64-NEXT: ret +; CHECK-LABEL: vadd_vi_v32i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB109_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB109_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v16, -1 +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1440,49 +1395,26 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; FIXME: We don't match vadd.vi on RV32. define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vadd_vx_v32i64_evl12: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vadd_vx_v32i64_evl12: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vadd_vx_v32i64_evl12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vadd_vx_v32i64_evl27: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vadd_vx_v32i64_evl27: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vadd_vx_v32i64_evl27: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll index d414be76672ab..c413dd86f3712 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -1139,18 +1139,16 @@ define <11 x i64> @vand_vv_v11i64_unmasked(<11 x i64> %va, <11 x i64> %b, i32 ze define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vand_vx_v11i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v16, v0 -; RV32-NEXT: lui a3, 341 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: vmerge.vxm v24, v24, a0, v0 -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vand_vx_v11i64: @@ -1167,16 +1165,16 @@ define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zero define <11 x i64> @vand_vx_v11i64_unmasked(<11 x i64> %va, i64 %b, i32 zeroext %evl) { ; RV32-LABEL: vand_vx_v11i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a3, 32 -; RV32-NEXT: lui a4, 341 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: vmerge.vxm v16, v16, a0, v0 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vand_vx_v11i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 0b0d758ad8ded..6adc6ba9621a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -1018,51 +1018,27 @@ define <16 x i64> @vmax_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % declare <32 x i64> @llvm.vp.smax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vmax_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB74_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB74_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmax.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmax.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vmax_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB74_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB74_2: -; RV64-NEXT: li a2, -1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmax.vx v8, v8, a2, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmax.vx v16, v16, a2, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vmax_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB74_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB74_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a2, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmax.vx v16, v16, a2, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index 98e630a0e59e5..baeb372c017e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -1017,51 +1017,27 @@ define <16 x i64> @vmaxu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext declare <32 x i64> @llvm.vp.umax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vmaxu_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB74_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB74_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vmaxu_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB74_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB74_2: -; RV64-NEXT: li a2, -1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmaxu.vx v8, v8, a2, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmaxu.vx v16, v16, a2, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vmaxu_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB74_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB74_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a2, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v16, v16, a2, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index a6e3764b37550..d0c21ce05c025 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -1018,51 +1018,27 @@ define <16 x i64> @vmin_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % declare <32 x i64> @llvm.vp.smin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vmin_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB74_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB74_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmin.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmin.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vmin_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB74_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB74_2: -; RV64-NEXT: li a2, -1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmin.vx v8, v8, a2, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmin.vx v16, v16, a2, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vmin_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB74_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB74_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a2, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmin.vx v16, v16, a2, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index c59b65edd1ec1..a730ba4729d25 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -1017,51 +1017,27 @@ define <16 x i64> @vminu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext declare <32 x i64> @llvm.vp.umin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vminu_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB74_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB74_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vminu.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vminu.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vminu_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB74_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB74_2: -; RV64-NEXT: li a2, -1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vminu.vx v8, v8, a2, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vminu.vx v16, v16, a2, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vminu_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB74_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB74_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a2, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vminu.vx v16, v16, a2, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index df2c83028e5df..c5dd6ac344a37 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -1359,93 +1359,48 @@ define <16 x i64> @vsadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { declare <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vsadd_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB108_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB108_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vsadd_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB108_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB108_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vsadd_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB108_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB108_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vsadd_vi_v32i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB109_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB109_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsadd.vv v8, v8, v24 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsadd.vv v16, v16, v24 -; RV32-NEXT: ret -; -; RV64-LABEL: vsadd_vi_v32i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB109_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB109_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsadd.vi v8, v8, -1 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsadd.vi v16, v16, -1 -; RV64-NEXT: ret +; CHECK-LABEL: vsadd_vi_v32i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB109_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB109_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1 +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1453,59 +1408,31 @@ define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; FIXME: We don't match vsadd.vi on RV32. define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vsadd_vx_v32i64_evl12: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma -; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vsadd_vx_v32i64_evl12: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma -; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vsadd_vx_v32i64_evl12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vsadd_vx_v32i64_evl27: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vsadd_vx_v32i64_evl27: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vsadd_vx_v32i64_evl27: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index f50dadf019910..17d9c437590a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -1355,93 +1355,48 @@ define <16 x i64> @vsaddu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { declare <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vsaddu_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB108_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB108_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vsaddu_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB108_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB108_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vsaddu_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB108_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB108_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vsaddu_vi_v32i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB109_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB109_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsaddu.vv v8, v8, v24 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsaddu.vv v16, v16, v24 -; RV32-NEXT: ret -; -; RV64-LABEL: vsaddu_vi_v32i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB109_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB109_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsaddu.vi v8, v8, -1 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsaddu.vi v16, v16, -1 -; RV64-NEXT: ret +; CHECK-LABEL: vsaddu_vi_v32i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB109_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB109_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1 +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1449,59 +1404,31 @@ define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; FIXME: We don't match vsaddu.vi on RV32. define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vsaddu_vx_v32i64_evl12: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma -; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vsaddu_vx_v32i64_evl12: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma -; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vsaddu_vx_v32i64_evl12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vsaddu_vx_v32i64_evl27: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vsaddu_vx_v32i64_evl27: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vsaddu_vx_v32i64_evl27: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index b82ca70477ba3..90e1b5ce55752 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -1399,95 +1399,50 @@ define <16 x i64> @vssub_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { declare <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vssub_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB108_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB108_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vssub.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vssub.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vssub_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB108_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB108_2: -; RV64-NEXT: li a2, -1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vssub.vx v8, v8, a2, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vssub.vx v16, v16, a2, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vssub_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB108_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB108_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a2, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a2, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vssub_vi_v32i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB109_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB109_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vssub.vv v8, v8, v24 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vssub.vv v16, v16, v24 -; RV32-NEXT: ret -; -; RV64-LABEL: vssub_vi_v32i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB109_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB109_2: -; RV64-NEXT: li a2, -1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vssub.vx v8, v8, a2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vssub.vx v16, v16, a2 -; RV64-NEXT: ret +; CHECK-LABEL: vssub_vi_v32i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB109_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB109_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a2 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a2 +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1495,61 +1450,33 @@ define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; FIXME: We don't match vssub.vi on RV32. define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vssub_vx_v32i64_evl12: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV32-NEXT: vssub.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma -; RV32-NEXT: vssub.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vssub_vx_v32i64_evl12: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV64-NEXT: vssub.vx v8, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma -; RV64-NEXT: vssub.vx v16, v16, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vssub_vx_v32i64_evl12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vssub_vx_v32i64_evl27: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vssub.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vssub.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vssub_vx_v32i64_evl27: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vssub.vx v8, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV64-NEXT: vssub.vx v16, v16, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vssub_vx_v32i64_evl27: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index 6d8ed563f02bd..59899ab8b9994 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -1394,95 +1394,50 @@ define <16 x i64> @vssubu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { declare <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vssubu_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB108_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB108_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vssubu_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB108_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB108_2: -; RV64-NEXT: li a2, -1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vssubu.vx v8, v8, a2, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vssubu.vx v16, v16, a2, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vssubu_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB108_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB108_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a2, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a2, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vssubu_vi_v32i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB109_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB109_2: -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vssubu.vv v8, v8, v24 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vssubu.vv v16, v16, v24 -; RV32-NEXT: ret -; -; RV64-LABEL: vssubu_vi_v32i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB109_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB109_2: -; RV64-NEXT: li a2, -1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vssubu.vx v8, v8, a2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vssubu.vx v16, v16, a2 -; RV64-NEXT: ret +; CHECK-LABEL: vssubu_vi_v32i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB109_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB109_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a2 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a2 +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1490,61 +1445,33 @@ define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; FIXME: We don't match vssubu.vi on RV32. define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vssubu_vx_v32i64_evl12: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma -; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vssubu_vx_v32i64_evl12: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma -; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vssubu_vx_v32i64_evl12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vssubu_vx_v32i64_evl27: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vssubu_vx_v32i64_evl27: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vssubu_vx_v32i64_evl27: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index dc27158cfb31f..162f7e34536a7 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -599,13 +599,13 @@ define void @test_srem_vec(ptr %X) nounwind { ; ; RV32MV-LABEL: test_srem_vec: ; RV32MV: # %bb.0: -; RV32MV-NEXT: addi sp, sp, -48 -; RV32MV-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32MV-NEXT: addi sp, sp, -64 +; RV32MV-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s2, 48(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s3, 44(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s4, 40(sp) # 4-byte Folded Spill ; RV32MV-NEXT: csrr a1, vlenb ; RV32MV-NEXT: slli a1, a1, 1 ; RV32MV-NEXT: sub sp, sp, a1 @@ -624,29 +624,33 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: srai s3, a0, 31 ; RV32MV-NEXT: srli a1, a1, 1 ; RV32MV-NEXT: slli a1, a1, 31 -; RV32MV-NEXT: lw a0, 0(s0) ; RV32MV-NEXT: srai s4, a1, 31 +; RV32MV-NEXT: lw a0, 0(s0) ; RV32MV-NEXT: slli a1, a3, 31 ; RV32MV-NEXT: srai a1, a1, 31 +; RV32MV-NEXT: li a2, 1 +; RV32MV-NEXT: sw a2, 20(sp) +; RV32MV-NEXT: li a2, -1 +; RV32MV-NEXT: sw a2, 16(sp) ; RV32MV-NEXT: li a2, 6 ; RV32MV-NEXT: li a3, 0 ; RV32MV-NEXT: call __moddi3 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32MV-NEXT: vmv.v.x v8, a0 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1 -; RV32MV-NEXT: addi a0, sp, 16 +; RV32MV-NEXT: addi a0, sp, 32 ; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; RV32MV-NEXT: li a2, 7 ; RV32MV-NEXT: mv a0, s2 ; RV32MV-NEXT: mv a1, s4 ; RV32MV-NEXT: li a3, 0 ; RV32MV-NEXT: call __moddi3 -; RV32MV-NEXT: addi a2, sp, 16 +; RV32MV-NEXT: addi a2, sp, 32 ; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32MV-NEXT: vslide1down.vx v8, v8, a0 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1 -; RV32MV-NEXT: addi a0, sp, 16 +; RV32MV-NEXT: addi a0, sp, 32 ; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; RV32MV-NEXT: li a2, -5 ; RV32MV-NEXT: li a3, -1 @@ -654,18 +658,17 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: mv a1, s3 ; RV32MV-NEXT: call __moddi3 ; RV32MV-NEXT: addi a2, sp, 16 -; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32MV-NEXT: vlse64.v v8, (a2), zero +; RV32MV-NEXT: addi a2, sp, 32 +; RV32MV-NEXT: vl2r.v v10, (a2) # Unknown-size Folded Reload ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32MV-NEXT: vslide1down.vx v8, v8, a0 -; RV32MV-NEXT: vslide1down.vx v8, v8, a1 -; RV32MV-NEXT: vslidedown.vi v8, v8, 2 -; RV32MV-NEXT: li a0, 511 -; RV32MV-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32MV-NEXT: vmv.v.x v10, a0 -; RV32MV-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32MV-NEXT: vsext.vf4 v12, v10 -; RV32MV-NEXT: vand.vv v8, v8, v12 -; RV32MV-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32MV-NEXT: vslide1down.vx v10, v10, a0 +; RV32MV-NEXT: vslide1down.vx v10, v10, a1 +; RV32MV-NEXT: vslidedown.vi v10, v10, 2 +; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32MV-NEXT: vand.vv v8, v10, v8 +; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32MV-NEXT: vmv.v.i v10, 1 ; RV32MV-NEXT: vmv.v.i v11, 0 ; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, tu, ma @@ -712,13 +715,13 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: csrr a0, vlenb ; RV32MV-NEXT: slli a0, a0, 1 ; RV32MV-NEXT: add sp, sp, a0 -; RV32MV-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32MV-NEXT: addi sp, sp, 48 +; RV32MV-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; RV32MV-NEXT: addi sp, sp, 64 ; RV32MV-NEXT: ret ; ; RV64MV-LABEL: test_srem_vec: