From 74ea8fe4c16490c41d60521323c38e4f0b9c0c15 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 6 Mar 2024 15:09:35 +0800 Subject: [PATCH 1/3] [RISCV] Handle scalable ops with EEW > 2 dests in combineBinOp_VLToVWBinOp_VL We can remove the restriction that the narrow type needs to be exactly EEW / 2 for scalable ISD::{ADD,SUB,MUL} nodes. This allows us to perform the combine even if we can't fully fold the extend into the widening op. VP intrinsics already do this, since they are lowered to _VL nodes which don't have this restriction. The "exactly EEW / 2" narrow type restriction prevented us from emitting V{S,Z}EXT_VL nodes with i1 element types which crash when we try to select them, since no other legal type is double the size of i1. So to preserve this, this also restricts the combine to only run after the legalize vector ops phase, at which point all unselectable i1 vectors should be custom lowered away. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 14 +- .../RISCV/rvv/vscale-vw-web-simplification.ll | 38 +- llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 462 +++++++++--------- llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll | 384 +++++++-------- llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll | 320 ++++++------ 5 files changed, 613 insertions(+), 605 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 08678a859ae2b..025137707aef3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13652,16 +13652,6 @@ struct NodeExtensionHelper { if (!VT.isVector()) break; - SDValue NarrowElt = OrigOperand.getOperand(0); - MVT NarrowVT = NarrowElt.getSimpleValueType(); - - unsigned ScalarBits = VT.getScalarSizeInBits(); - unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits(); - - // Ensure the extension's semantic is equivalent to rvv vzext or vsext. - if (ScalarBits != NarrowScalarBits * 2) - break; - SupportsZExt = Opc == ISD::ZERO_EXTEND; SupportsSExt = Opc == ISD::SIGN_EXTEND; @@ -14112,7 +14102,9 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; - if (DCI.isBeforeLegalize()) + // Don't perform this until types are legalized and any legal i1 types are + // custom lowered to avoid introducing unselectable V{S,Z}EXT_VLs. + if (DCI.isBeforeLegalizeOps()) return SDValue(); if (!NodeExtensionHelper::isSupportedRoot(N)) diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll index 972fa66917a56..e56dca0732bb4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll @@ -283,18 +283,19 @@ define @vwop_vscale_sext_i8i32_multiple_users(ptr %x, ptr %y, ; ; FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; FOLDING-NEXT: vle8.v v8, (a0) ; FOLDING-NEXT: vle8.v v9, (a1) ; FOLDING-NEXT: vle8.v v10, (a2) -; FOLDING-NEXT: vsext.vf4 v11, v8 -; FOLDING-NEXT: vsext.vf4 v8, v9 -; FOLDING-NEXT: vsext.vf4 v9, v10 -; FOLDING-NEXT: vmul.vv v8, v11, v8 -; FOLDING-NEXT: vadd.vv v10, v11, v9 -; FOLDING-NEXT: vsub.vv v9, v11, v9 -; FOLDING-NEXT: vor.vv v8, v8, v10 -; FOLDING-NEXT: vor.vv v8, v8, v9 +; FOLDING-NEXT: vsext.vf2 v11, v8 +; FOLDING-NEXT: vsext.vf2 v8, v9 +; FOLDING-NEXT: vsext.vf2 v9, v10 +; FOLDING-NEXT: vwmul.vv v10, v11, v8 +; FOLDING-NEXT: vwadd.vv v8, v11, v9 +; FOLDING-NEXT: vwsub.vv v12, v11, v9 +; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: vor.vv v8, v8, v12 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y @@ -563,18 +564,19 @@ define @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y, ; ; FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; FOLDING-NEXT: vle8.v v8, (a0) ; FOLDING-NEXT: vle8.v v9, (a1) ; FOLDING-NEXT: vle8.v v10, (a2) -; FOLDING-NEXT: vzext.vf4 v11, v8 -; FOLDING-NEXT: vzext.vf4 v8, v9 -; FOLDING-NEXT: vzext.vf4 v9, v10 -; FOLDING-NEXT: vmul.vv v8, v11, v8 -; FOLDING-NEXT: vadd.vv v10, v11, v9 -; FOLDING-NEXT: vsub.vv v9, v11, v9 -; FOLDING-NEXT: vor.vv v8, v8, v10 -; FOLDING-NEXT: vor.vv v8, v8, v9 +; FOLDING-NEXT: vzext.vf2 v11, v8 +; FOLDING-NEXT: vzext.vf2 v8, v9 +; FOLDING-NEXT: vzext.vf2 v9, v10 +; FOLDING-NEXT: vwmulu.vv v10, v11, v8 +; FOLDING-NEXT: vwaddu.vv v8, v11, v9 +; FOLDING-NEXT: vwsubu.vv v12, v11, v9 +; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: vor.vv v8, v8, v12 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index a559fbf2bc8a7..4152e61c0541a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -421,10 +421,10 @@ define @vwaddu_wx_nxv8i64_nxv8i32( %va, i32 define @vwadd_vv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v8, v9 -; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -435,10 +435,10 @@ define @vwadd_vv_nxv1i64_nxv1i16( %va, @vwaddu_vv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwaddu_vv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v8, v9 -; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -451,10 +451,10 @@ define @vwadd_vx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v8, v9 -; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -469,10 +469,10 @@ define @vwaddu_vx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v8, v9 -; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -485,9 +485,9 @@ define @vwaddu_vx_nxv1i64_nxv1i16( %va, i16 define @vwadd_wv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v9 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v9 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -497,9 +497,9 @@ define @vwadd_wv_nxv1i64_nxv1i16( %va, @vwaddu_wv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v9 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -511,9 +511,9 @@ define @vwadd_wx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v9 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v9 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -527,9 +527,9 @@ define @vwaddu_wx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v9 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -541,10 +541,10 @@ define @vwaddu_wx_nxv1i64_nxv1i16( %va, i16 define @vwadd_vv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -555,10 +555,10 @@ define @vwadd_vv_nxv2i64_nxv2i16( %va, @vwaddu_vv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -571,10 +571,10 @@ define @vwadd_vx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -589,10 +589,10 @@ define @vwaddu_vx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -605,9 +605,9 @@ define @vwaddu_vx_nxv2i64_nxv2i16( %va, i16 define @vwadd_wv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v10 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v11, v10 +; CHECK-NEXT: vwadd.wv v8, v8, v11 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -617,9 +617,9 @@ define @vwadd_wv_nxv2i64_nxv2i16( %va, @vwaddu_wv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v12, v10 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v11, v10 +; CHECK-NEXT: vwaddu.wv v8, v8, v11 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -631,9 +631,9 @@ define @vwadd_wx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v10 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v11, v10 +; CHECK-NEXT: vwadd.wv v8, v8, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -647,9 +647,9 @@ define @vwaddu_wx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v12, v10 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v11, v10 +; CHECK-NEXT: vwaddu.wv v8, v8, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -661,10 +661,10 @@ define @vwaddu_wx_nxv2i64_nxv2i16( %va, i16 define @vwadd_vv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v16, v9 -; CHECK-NEXT: vadd.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsext.vf2 v14, v9 +; CHECK-NEXT: vwadd.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -675,10 +675,10 @@ define @vwadd_vv_nxv4i64_nxv4i16( %va, @vwaddu_vv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v12, v8 -; CHECK-NEXT: vzext.vf4 v16, v9 -; CHECK-NEXT: vadd.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vzext.vf2 v14, v9 +; CHECK-NEXT: vwaddu.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -691,10 +691,10 @@ define @vwadd_vx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v16, v9 -; CHECK-NEXT: vadd.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsext.vf2 v14, v9 +; CHECK-NEXT: vwadd.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -709,10 +709,10 @@ define @vwaddu_vx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v12, v8 -; CHECK-NEXT: vzext.vf4 v16, v9 -; CHECK-NEXT: vadd.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vzext.vf2 v14, v9 +; CHECK-NEXT: vwaddu.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -725,9 +725,9 @@ define @vwaddu_vx_nxv4i64_nxv4i16( %va, i16 define @vwadd_wv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v12 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v14, v12 +; CHECK-NEXT: vwadd.wv v8, v8, v14 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -737,9 +737,9 @@ define @vwadd_wv_nxv4i64_nxv4i16( %va, @vwaddu_wv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v12 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v14, v12 +; CHECK-NEXT: vwaddu.wv v8, v8, v14 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -751,9 +751,9 @@ define @vwadd_wx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v12 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v14, v12 +; CHECK-NEXT: vwadd.wv v8, v8, v14 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -767,9 +767,9 @@ define @vwaddu_wx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v12 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v14, v12 +; CHECK-NEXT: vwaddu.wv v8, v8, v14 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -781,10 +781,10 @@ define @vwaddu_wx_nxv4i64_nxv4i16( %va, i16 define @vwadd_vv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v24, v10 -; CHECK-NEXT: vadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsext.vf2 v20, v10 +; CHECK-NEXT: vwadd.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -795,10 +795,10 @@ define @vwadd_vv_nxv8i64_nxv8i16( %va, @vwaddu_vv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v8 -; CHECK-NEXT: vzext.vf4 v24, v10 -; CHECK-NEXT: vadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vzext.vf2 v20, v10 +; CHECK-NEXT: vwaddu.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -811,10 +811,10 @@ define @vwadd_vx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v24, v10 -; CHECK-NEXT: vadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsext.vf2 v20, v10 +; CHECK-NEXT: vwadd.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -829,10 +829,10 @@ define @vwaddu_vx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v8 -; CHECK-NEXT: vzext.vf4 v24, v10 -; CHECK-NEXT: vadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vzext.vf2 v20, v10 +; CHECK-NEXT: vwaddu.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -845,9 +845,9 @@ define @vwaddu_vx_nxv8i64_nxv8i16( %va, i16 define @vwadd_wv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v24, v16 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v20, v16 +; CHECK-NEXT: vwadd.wv v8, v8, v20 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -857,9 +857,9 @@ define @vwadd_wv_nxv8i64_nxv8i16( %va, @vwaddu_wv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v24, v16 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v20, v16 +; CHECK-NEXT: vwaddu.wv v8, v8, v20 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -871,9 +871,9 @@ define @vwadd_wx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v24, v16 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v20, v16 +; CHECK-NEXT: vwadd.wv v8, v8, v20 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -887,9 +887,9 @@ define @vwaddu_wx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v24, v16 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v20, v16 +; CHECK-NEXT: vwaddu.wv v8, v8, v20 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -901,10 +901,10 @@ define @vwaddu_wx_nxv8i64_nxv8i16( %va, i16 define @vwadd_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v8, v9 -; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -915,10 +915,10 @@ define @vwadd_vv_nxv1i64_nxv1i8( %va, @vwaddu_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwaddu_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v8, v9 -; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -931,10 +931,10 @@ define @vwadd_vx_nxv1i64_nxv1i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v8, v9 -; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -949,10 +949,10 @@ define @vwaddu_vx_nxv1i64_nxv1i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v8, v9 -; CHECK-NEXT: vadd.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -965,9 +965,9 @@ define @vwaddu_vx_nxv1i64_nxv1i8( %va, i8 %b define @vwadd_wv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v9 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v9 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -977,9 +977,9 @@ define @vwadd_wv_nxv1i64_nxv1i8( %va, @vwaddu_wv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v9 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -991,9 +991,9 @@ define @vwadd_wx_nxv1i64_nxv1i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v9 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v9 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1007,9 +1007,9 @@ define @vwaddu_wx_nxv1i64_nxv1i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v9 -; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1021,10 +1021,10 @@ define @vwaddu_wx_nxv1i64_nxv1i8( %va, i8 % define @vwadd_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v12, v9 -; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1035,10 +1035,10 @@ define @vwadd_vv_nxv2i64_nxv2i8( %va, @vwaddu_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v12, v9 -; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -1051,10 +1051,10 @@ define @vwadd_vx_nxv2i64_nxv2i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v12, v9 -; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1069,10 +1069,10 @@ define @vwaddu_vx_nxv2i64_nxv2i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v12, v9 -; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1085,9 +1085,9 @@ define @vwaddu_vx_nxv2i64_nxv2i8( %va, i8 %b define @vwadd_wv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v10 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v11, v10 +; CHECK-NEXT: vwadd.wv v8, v8, v11 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -1097,9 +1097,9 @@ define @vwadd_wv_nxv2i64_nxv2i8( %va, @vwaddu_wv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf8 v12, v10 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v11, v10 +; CHECK-NEXT: vwaddu.wv v8, v8, v11 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -1111,9 +1111,9 @@ define @vwadd_wx_nxv2i64_nxv2i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v10 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v11, v10 +; CHECK-NEXT: vwadd.wv v8, v8, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1127,9 +1127,9 @@ define @vwaddu_wx_nxv2i64_nxv2i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf8 v12, v10 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v11, v10 +; CHECK-NEXT: vwaddu.wv v8, v8, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1141,10 +1141,10 @@ define @vwaddu_wx_nxv2i64_nxv2i8( %va, i8 % define @vwadd_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v8 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vadd.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vsext.vf4 v14, v9 +; CHECK-NEXT: vwadd.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1155,10 +1155,10 @@ define @vwadd_vv_nxv4i64_nxv4i8( %va, @vwaddu_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf8 v12, v8 -; CHECK-NEXT: vzext.vf8 v16, v9 -; CHECK-NEXT: vadd.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v14, v9 +; CHECK-NEXT: vwaddu.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -1171,10 +1171,10 @@ define @vwadd_vx_nxv4i64_nxv4i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v8 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vadd.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vsext.vf4 v14, v9 +; CHECK-NEXT: vwadd.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1189,10 +1189,10 @@ define @vwaddu_vx_nxv4i64_nxv4i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf8 v12, v8 -; CHECK-NEXT: vzext.vf8 v16, v9 -; CHECK-NEXT: vadd.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v14, v9 +; CHECK-NEXT: vwaddu.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1205,9 +1205,9 @@ define @vwaddu_vx_nxv4i64_nxv4i8( %va, i8 %b define @vwadd_wv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v12 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v14, v12 +; CHECK-NEXT: vwadd.wv v8, v8, v14 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -1217,9 +1217,9 @@ define @vwadd_wv_nxv4i64_nxv4i8( %va, @vwaddu_wv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf8 v16, v12 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v14, v12 +; CHECK-NEXT: vwaddu.wv v8, v8, v14 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -1231,9 +1231,9 @@ define @vwadd_wx_nxv4i64_nxv4i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v12 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v14, v12 +; CHECK-NEXT: vwadd.wv v8, v8, v14 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1247,9 +1247,9 @@ define @vwaddu_wx_nxv4i64_nxv4i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf8 v16, v12 -; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v14, v12 +; CHECK-NEXT: vwaddu.wv v8, v8, v14 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1261,10 +1261,10 @@ define @vwaddu_wx_nxv4i64_nxv4i8( %va, i8 % define @vwadd_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsext.vf8 v24, v9 -; CHECK-NEXT: vadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vsext.vf4 v20, v9 +; CHECK-NEXT: vwadd.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1275,10 +1275,10 @@ define @vwadd_vv_nxv8i64_nxv8i8( %va, @vwaddu_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf8 v16, v8 -; CHECK-NEXT: vzext.vf8 v24, v9 -; CHECK-NEXT: vadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v8 +; CHECK-NEXT: vzext.vf4 v20, v9 +; CHECK-NEXT: vwaddu.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -1291,10 +1291,10 @@ define @vwadd_vx_nxv8i64_nxv8i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsext.vf8 v24, v9 -; CHECK-NEXT: vadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vsext.vf4 v20, v9 +; CHECK-NEXT: vwadd.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1309,10 +1309,10 @@ define @vwaddu_vx_nxv8i64_nxv8i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf8 v16, v8 -; CHECK-NEXT: vzext.vf8 v24, v9 -; CHECK-NEXT: vadd.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v8 +; CHECK-NEXT: vzext.vf4 v20, v9 +; CHECK-NEXT: vwaddu.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1325,9 +1325,9 @@ define @vwaddu_vx_nxv8i64_nxv8i8( %va, i8 %b define @vwadd_wv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v24, v16 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v20, v16 +; CHECK-NEXT: vwadd.wv v8, v8, v20 ; CHECK-NEXT: ret %vc = sext %vb to %vd = add %va, %vc @@ -1337,9 +1337,9 @@ define @vwadd_wv_nxv8i64_nxv8i8( %va, @vwaddu_wv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf8 v24, v16 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v20, v16 +; CHECK-NEXT: vwaddu.wv v8, v8, v20 ; CHECK-NEXT: ret %vc = zext %vb to %vd = add %va, %vc @@ -1351,9 +1351,9 @@ define @vwadd_wx_nxv8i64_nxv8i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v24, v16 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v20, v16 +; CHECK-NEXT: vwadd.wv v8, v8, v20 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1367,9 +1367,9 @@ define @vwaddu_wx_nxv8i64_nxv8i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf8 v24, v16 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v20, v16 +; CHECK-NEXT: vwaddu.wv v8, v8, v20 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1377,3 +1377,17 @@ define @vwaddu_wx_nxv8i64_nxv8i8( %va, i8 % %vc = add %va, %vb ret %vc } + +; Make sure that we don't introduce any V{S,Z}EXT_VL nodes with i1 types from +; combineBinOp_VLToVWBinOp_VL, since they can't be selected. +define @foo( %va, %vb, ptr %p) { + %vc = zext %va to + %vd = add %vc, %vb + +; Introduce an illegal type so that the DAG changes after legalizing +; types. Otherwise the legalize vector ops phase will be run immediately after +; the legalize types phase, and the zext will already be in non-i1 form by the +; time combineBinOp_VLToVWBinOp_VL is called. + store i9 42, ptr %p + ret %vd +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll index 3634162eefd64..539a4bdb27ad5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll @@ -341,10 +341,10 @@ define @vwmulsu_vx_nxv8i64_nxv8i32( %va, i3 define @vwmul_vv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -355,10 +355,10 @@ define @vwmul_vv_nxv1i64_nxv1i16( %va, @vwmulu_vv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -369,10 +369,10 @@ define @vwmulu_vv_nxv1i64_nxv1i16( %va, @vwmulsu_vv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwmulsu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -385,10 +385,10 @@ define @vwmul_vx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -403,10 +403,10 @@ define @vwmulu_vx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -421,10 +421,10 @@ define @vwmulsu_vx_nxv1i64_nxv1i16( %va, i1 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwmulsu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -437,10 +437,10 @@ define @vwmulsu_vx_nxv1i64_nxv1i16( %va, i1 define @vwmul_vv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -451,10 +451,10 @@ define @vwmul_vv_nxv2i64_nxv2i16( %va, @vwmulu_vv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -465,10 +465,10 @@ define @vwmulu_vv_nxv2i64_nxv2i16( %va, @vwmulsu_vv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwmulsu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -481,10 +481,10 @@ define @vwmul_vx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -499,10 +499,10 @@ define @vwmulu_vx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -517,10 +517,10 @@ define @vwmulsu_vx_nxv2i64_nxv2i16( %va, i1 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v11, v9 +; CHECK-NEXT: vwmulsu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -533,10 +533,10 @@ define @vwmulsu_vx_nxv2i64_nxv2i16( %va, i1 define @vwmul_vv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsext.vf2 v14, v9 +; CHECK-NEXT: vwmul.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -547,10 +547,10 @@ define @vwmul_vv_nxv4i64_nxv4i16( %va, @vwmulu_vv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v12, v8 -; CHECK-NEXT: vzext.vf4 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vzext.vf2 v14, v9 +; CHECK-NEXT: vwmulu.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -561,10 +561,10 @@ define @vwmulu_vv_nxv4i64_nxv4i16( %va, @vwmulsu_vv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vzext.vf4 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vzext.vf2 v14, v9 +; CHECK-NEXT: vwmulsu.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -577,10 +577,10 @@ define @vwmul_vx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsext.vf2 v14, v9 +; CHECK-NEXT: vwmul.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -595,10 +595,10 @@ define @vwmulu_vx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v12, v8 -; CHECK-NEXT: vzext.vf4 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v12, v8 +; CHECK-NEXT: vzext.vf2 v14, v9 +; CHECK-NEXT: vwmulu.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -613,10 +613,10 @@ define @vwmulsu_vx_nxv4i64_nxv4i16( %va, i1 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vzext.vf4 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vzext.vf2 v14, v9 +; CHECK-NEXT: vwmulsu.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -629,10 +629,10 @@ define @vwmulsu_vx_nxv4i64_nxv4i16( %va, i1 define @vwmul_vv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v24, v10 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsext.vf2 v20, v10 +; CHECK-NEXT: vwmul.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -643,10 +643,10 @@ define @vwmul_vv_nxv8i64_nxv8i16( %va, @vwmulu_vv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v8 -; CHECK-NEXT: vzext.vf4 v24, v10 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vzext.vf2 v20, v10 +; CHECK-NEXT: vwmulu.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -657,10 +657,10 @@ define @vwmulu_vv_nxv8i64_nxv8i16( %va, @vwmulsu_vv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vzext.vf4 v24, v10 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vzext.vf2 v20, v10 +; CHECK-NEXT: vwmulsu.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -673,10 +673,10 @@ define @vwmul_vx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v24, v10 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsext.vf2 v20, v10 +; CHECK-NEXT: vwmul.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -691,10 +691,10 @@ define @vwmulu_vx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v8 -; CHECK-NEXT: vzext.vf4 v24, v10 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vzext.vf2 v20, v10 +; CHECK-NEXT: vwmulu.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -709,10 +709,10 @@ define @vwmulsu_vx_nxv8i64_nxv8i16( %va, i1 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vzext.vf4 v24, v10 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vzext.vf2 v20, v10 +; CHECK-NEXT: vwmulsu.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i16 0 %splat = shufflevector %head, undef, zeroinitializer @@ -725,10 +725,10 @@ define @vwmulsu_vx_nxv8i64_nxv8i16( %va, i1 define @vwmul_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -739,10 +739,10 @@ define @vwmul_vv_nxv1i64_nxv1i8( %va, @vwmulu_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -753,10 +753,10 @@ define @vwmulu_vv_nxv1i64_nxv1i8( %va, @vwmulsu_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwmulsu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -769,10 +769,10 @@ define @vwmul_vx_nxv1i64_nxv1i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -787,10 +787,10 @@ define @vwmulu_vx_nxv1i64_nxv1i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -805,10 +805,10 @@ define @vwmulsu_vx_nxv1i64_nxv1i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v8, v9 -; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwmulsu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -821,10 +821,10 @@ define @vwmulsu_vx_nxv1i64_nxv1i8( %va, i8 % define @vwmul_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -835,10 +835,10 @@ define @vwmul_vv_nxv2i64_nxv2i8( %va, @vwmulu_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -849,10 +849,10 @@ define @vwmulu_vv_nxv2i64_nxv2i8( %va, @vwmulsu_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwmulsu.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -865,10 +865,10 @@ define @vwmul_vx_nxv2i64_nxv2i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -883,10 +883,10 @@ define @vwmulu_vx_nxv2i64_nxv2i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -901,10 +901,10 @@ define @vwmulsu_vx_nxv2i64_nxv2i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vzext.vf8 v12, v9 -; CHECK-NEXT: vmul.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vzext.vf4 v11, v9 +; CHECK-NEXT: vwmulsu.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -917,10 +917,10 @@ define @vwmulsu_vx_nxv2i64_nxv2i8( %va, i8 % define @vwmul_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v8 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vsext.vf4 v14, v9 +; CHECK-NEXT: vwmul.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -931,10 +931,10 @@ define @vwmul_vv_nxv4i64_nxv4i8( %va, @vwmulu_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf8 v12, v8 -; CHECK-NEXT: vzext.vf8 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v14, v9 +; CHECK-NEXT: vwmulu.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -945,10 +945,10 @@ define @vwmulu_vv_nxv4i64_nxv4i8( %va, @vwmulsu_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v8 -; CHECK-NEXT: vzext.vf8 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v14, v9 +; CHECK-NEXT: vwmulsu.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -961,10 +961,10 @@ define @vwmul_vx_nxv4i64_nxv4i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v8 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vsext.vf4 v14, v9 +; CHECK-NEXT: vwmul.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -979,10 +979,10 @@ define @vwmulu_vx_nxv4i64_nxv4i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf8 v12, v8 -; CHECK-NEXT: vzext.vf8 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v14, v9 +; CHECK-NEXT: vwmulu.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -997,10 +997,10 @@ define @vwmulsu_vx_nxv4i64_nxv4i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v8 -; CHECK-NEXT: vzext.vf8 v16, v9 -; CHECK-NEXT: vmul.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vzext.vf4 v14, v9 +; CHECK-NEXT: vwmulsu.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1013,10 +1013,10 @@ define @vwmulsu_vx_nxv4i64_nxv4i8( %va, i8 % define @vwmul_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsext.vf8 v24, v9 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vsext.vf4 v20, v9 +; CHECK-NEXT: vwmul.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1027,10 +1027,10 @@ define @vwmul_vv_nxv8i64_nxv8i8( %va, @vwmulu_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf8 v16, v8 -; CHECK-NEXT: vzext.vf8 v24, v9 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v8 +; CHECK-NEXT: vzext.vf4 v20, v9 +; CHECK-NEXT: vwmulu.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = zext %va to %vd = zext %vb to @@ -1041,10 +1041,10 @@ define @vwmulu_vv_nxv8i64_nxv8i8( %va, @vwmulsu_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vzext.vf8 v24, v9 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vzext.vf4 v20, v9 +; CHECK-NEXT: vwmulsu.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = sext %va to %vd = zext %vb to @@ -1057,10 +1057,10 @@ define @vwmul_vx_nxv8i64_nxv8i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsext.vf8 v24, v9 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vsext.vf4 v20, v9 +; CHECK-NEXT: vwmul.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1075,10 +1075,10 @@ define @vwmulu_vx_nxv8i64_nxv8i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf8 v16, v8 -; CHECK-NEXT: vzext.vf8 v24, v9 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v16, v8 +; CHECK-NEXT: vzext.vf4 v20, v9 +; CHECK-NEXT: vwmulu.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1093,10 +1093,10 @@ define @vwmulsu_vx_nxv8i64_nxv8i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vzext.vf8 v24, v9 -; CHECK-NEXT: vmul.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vzext.vf4 v20, v9 +; CHECK-NEXT: vwmulsu.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i8 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll index 123469ade0ed4..852814d648bfc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll @@ -421,10 +421,10 @@ define @vwsubu_wx_nxv8i64_nxv8i32( %va, i32 define @vwsub_vv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v8, v9 -; CHECK-NEXT: vsub.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -451,10 +451,10 @@ define @vwsub_vx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v8, v9 -; CHECK-NEXT: vsub.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -483,9 +483,9 @@ define @vwsubu_vx_nxv1i64_nxv1i16( %va, i16 define @vwsub_wv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v9 -; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v9 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -495,9 +495,9 @@ define @vwsub_wv_nxv1i64_nxv1i16( %va, @vwsubu_wv_nxv1i64_nxv1i16( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v9 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -509,9 +509,9 @@ define @vwsub_wx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v9 -; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v9 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -525,9 +525,9 @@ define @vwsubu_wx_nxv1i64_nxv1i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v9 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -539,10 +539,10 @@ define @vwsubu_wx_nxv1i64_nxv1i16( %va, i16 define @vwsub_vv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vsub.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -569,10 +569,10 @@ define @vwsub_vx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v10, v8 -; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vsub.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v11, v9 +; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -601,9 +601,9 @@ define @vwsubu_vx_nxv2i64_nxv2i16( %va, i16 define @vwsub_wv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v10 -; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v11, v10 +; CHECK-NEXT: vwsub.wv v8, v8, v11 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -613,9 +613,9 @@ define @vwsub_wv_nxv2i64_nxv2i16( %va, @vwsubu_wv_nxv2i64_nxv2i16( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v12, v10 -; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v11, v10 +; CHECK-NEXT: vwsubu.wv v8, v8, v11 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -627,9 +627,9 @@ define @vwsub_wx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v10 -; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v11, v10 +; CHECK-NEXT: vwsub.wv v8, v8, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -643,9 +643,9 @@ define @vwsubu_wx_nxv2i64_nxv2i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf4 v12, v10 -; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v11, v10 +; CHECK-NEXT: vwsubu.wv v8, v8, v11 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -657,10 +657,10 @@ define @vwsubu_wx_nxv2i64_nxv2i16( %va, i16 define @vwsub_vv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v16, v9 -; CHECK-NEXT: vsub.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsext.vf2 v14, v9 +; CHECK-NEXT: vwsub.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -687,10 +687,10 @@ define @vwsub_vx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vsext.vf4 v16, v9 -; CHECK-NEXT: vsub.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v12, v8 +; CHECK-NEXT: vsext.vf2 v14, v9 +; CHECK-NEXT: vwsub.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -719,9 +719,9 @@ define @vwsubu_vx_nxv4i64_nxv4i16( %va, i16 define @vwsub_wv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v12 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v14, v12 +; CHECK-NEXT: vwsub.wv v8, v8, v14 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -731,9 +731,9 @@ define @vwsub_wv_nxv4i64_nxv4i16( %va, @vwsubu_wv_nxv4i64_nxv4i16( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v12 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v14, v12 +; CHECK-NEXT: vwsubu.wv v8, v8, v14 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -745,9 +745,9 @@ define @vwsub_wx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v12 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf2 v14, v12 +; CHECK-NEXT: vwsub.wv v8, v8, v14 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -761,9 +761,9 @@ define @vwsubu_wx_nxv4i64_nxv4i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf4 v16, v12 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v14, v12 +; CHECK-NEXT: vwsubu.wv v8, v8, v14 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -775,10 +775,10 @@ define @vwsubu_wx_nxv4i64_nxv4i16( %va, i16 define @vwsub_vv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v24, v10 -; CHECK-NEXT: vsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsext.vf2 v20, v10 +; CHECK-NEXT: vwsub.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -805,10 +805,10 @@ define @vwsub_vx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v16, v8 -; CHECK-NEXT: vsext.vf4 v24, v10 -; CHECK-NEXT: vsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vsext.vf2 v20, v10 +; CHECK-NEXT: vwsub.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -837,9 +837,9 @@ define @vwsubu_vx_nxv8i64_nxv8i16( %va, i16 define @vwsub_wv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v24, v16 -; CHECK-NEXT: vsub.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v20, v16 +; CHECK-NEXT: vwsub.wv v8, v8, v20 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -849,9 +849,9 @@ define @vwsub_wv_nxv8i64_nxv8i16( %va, @vwsubu_wv_nxv8i64_nxv8i16( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v24, v16 -; CHECK-NEXT: vsub.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v20, v16 +; CHECK-NEXT: vwsubu.wv v8, v8, v20 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -863,9 +863,9 @@ define @vwsub_wx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf4 v24, v16 -; CHECK-NEXT: vsub.vv v8, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf2 v20, v16 +; CHECK-NEXT: vwsub.wv v8, v8, v20 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -879,9 +879,9 @@ define @vwsubu_wx_nxv8i64_nxv8i16( %va, i16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf4 v24, v16 -; CHECK-NEXT: vsub.vv v8, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf2 v20, v16 +; CHECK-NEXT: vwsubu.wv v8, v8, v20 ; CHECK-NEXT: ret %head = insertelement poison, i16 %b, i16 0 %splat = shufflevector %head, poison, zeroinitializer @@ -893,10 +893,10 @@ define @vwsubu_wx_nxv8i64_nxv8i16( %va, i16 define @vwsub_vv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v8, v9 -; CHECK-NEXT: vsub.vv v8, v10, v8 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -923,10 +923,10 @@ define @vwsub_vx_nxv1i64_nxv1i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v8, v9 -; CHECK-NEXT: vsub.vv v8, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -955,9 +955,9 @@ define @vwsubu_vx_nxv1i64_nxv1i8( %va, i8 %b define @vwsub_wv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v9 -; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v9 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -967,9 +967,9 @@ define @vwsub_wv_nxv1i64_nxv1i8( %va, @vwsubu_wv_nxv1i64_nxv1i8( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v9 -; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -981,9 +981,9 @@ define @vwsub_wx_nxv1i64_nxv1i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v9 -; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v9 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -997,9 +997,9 @@ define @vwsubu_wx_nxv1i64_nxv1i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vzext.vf8 v10, v9 -; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1011,10 +1011,10 @@ define @vwsubu_wx_nxv1i64_nxv1i8( %va, i8 % define @vwsub_vv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v12, v9 -; CHECK-NEXT: vsub.vv v8, v10, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1041,10 +1041,10 @@ define @vwsub_vx_nxv2i64_nxv2i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v10, v8 -; CHECK-NEXT: vsext.vf8 v12, v9 -; CHECK-NEXT: vsub.vv v8, v10, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v10, v8 +; CHECK-NEXT: vsext.vf4 v11, v9 +; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1073,9 +1073,9 @@ define @vwsubu_vx_nxv2i64_nxv2i8( %va, i8 %b define @vwsub_wv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v10 -; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v11, v10 +; CHECK-NEXT: vwsub.wv v8, v8, v11 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -1085,9 +1085,9 @@ define @vwsub_wv_nxv2i64_nxv2i8( %va, @vwsubu_wv_nxv2i64_nxv2i8( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf8 v12, v10 -; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v11, v10 +; CHECK-NEXT: vwsubu.wv v8, v8, v11 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -1099,9 +1099,9 @@ define @vwsub_wx_nxv2i64_nxv2i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v10 -; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v11, v10 +; CHECK-NEXT: vwsub.wv v8, v8, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1115,9 +1115,9 @@ define @vwsubu_wx_nxv2i64_nxv2i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf8 v12, v10 -; CHECK-NEXT: vsub.vv v8, v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v11, v10 +; CHECK-NEXT: vwsubu.wv v8, v8, v11 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1129,10 +1129,10 @@ define @vwsubu_wx_nxv2i64_nxv2i8( %va, i8 % define @vwsub_vv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v8 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsub.vv v8, v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vsext.vf4 v14, v9 +; CHECK-NEXT: vwsub.vv v8, v12, v14 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1159,10 +1159,10 @@ define @vwsub_vx_nxv4i64_nxv4i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v12, v8 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsub.vv v8, v12, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v12, v8 +; CHECK-NEXT: vsext.vf4 v14, v9 +; CHECK-NEXT: vwsub.vv v8, v12, v14 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1191,9 +1191,9 @@ define @vwsubu_vx_nxv4i64_nxv4i8( %va, i8 %b define @vwsub_wv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v12 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v14, v12 +; CHECK-NEXT: vwsub.wv v8, v8, v14 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -1203,9 +1203,9 @@ define @vwsub_wv_nxv4i64_nxv4i8( %va, @vwsubu_wv_nxv4i64_nxv4i8( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf8 v16, v12 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v14, v12 +; CHECK-NEXT: vwsubu.wv v8, v8, v14 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -1217,9 +1217,9 @@ define @vwsub_wx_nxv4i64_nxv4i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v12 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsext.vf4 v14, v12 +; CHECK-NEXT: vwsub.wv v8, v8, v14 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1233,9 +1233,9 @@ define @vwsubu_wx_nxv4i64_nxv4i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vzext.vf8 v16, v12 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf4 v14, v12 +; CHECK-NEXT: vwsubu.wv v8, v8, v14 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1247,10 +1247,10 @@ define @vwsubu_wx_nxv4i64_nxv4i8( %va, i8 % define @vwsub_vv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsext.vf8 v24, v9 -; CHECK-NEXT: vsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vsext.vf4 v20, v9 +; CHECK-NEXT: vwsub.vv v8, v16, v20 ; CHECK-NEXT: ret %vc = sext %va to %vd = sext %vb to @@ -1277,10 +1277,10 @@ define @vwsub_vx_nxv8i64_nxv8i8( %va, i8 %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsext.vf8 v24, v9 -; CHECK-NEXT: vsub.vv v8, v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v16, v8 +; CHECK-NEXT: vsext.vf4 v20, v9 +; CHECK-NEXT: vwsub.vv v8, v16, v20 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1309,9 +1309,9 @@ define @vwsubu_vx_nxv8i64_nxv8i8( %va, i8 %b define @vwsub_wv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v24, v16 -; CHECK-NEXT: vsub.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v20, v16 +; CHECK-NEXT: vwsub.wv v8, v8, v20 ; CHECK-NEXT: ret %vc = sext %vb to %vd = sub %va, %vc @@ -1321,9 +1321,9 @@ define @vwsub_wv_nxv8i64_nxv8i8( %va, @vwsubu_wv_nxv8i64_nxv8i8( %va, %vb) { ; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf8 v24, v16 -; CHECK-NEXT: vsub.vv v8, v8, v24 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v20, v16 +; CHECK-NEXT: vwsubu.wv v8, v8, v20 ; CHECK-NEXT: ret %vc = zext %vb to %vd = sub %va, %vc @@ -1335,9 +1335,9 @@ define @vwsub_wx_nxv8i64_nxv8i8( %va, i8 %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v24, v16 -; CHECK-NEXT: vsub.vv v8, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsext.vf4 v20, v16 +; CHECK-NEXT: vwsub.wv v8, v8, v20 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1351,9 +1351,9 @@ define @vwsubu_wx_nxv8i64_nxv8i8( %va, i8 % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vzext.vf8 v24, v16 -; CHECK-NEXT: vsub.vv v8, v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vzext.vf4 v20, v16 +; CHECK-NEXT: vwsubu.wv v8, v8, v20 ; CHECK-NEXT: ret %head = insertelement poison, i8 %b, i8 0 %splat = shufflevector %head, poison, zeroinitializer From b6c1682420e72d25aa8cf9350639d79f05710e6c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 18 Mar 2024 16:16:10 +0800 Subject: [PATCH 2/3] Check for i1 in the narrow op instead of relying on legalize vector ops, rename test case --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 25 +++++++++++++++------ llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 23 ++++++++++++++++--- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 025137707aef3..48c72bdc64613 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13644,10 +13644,13 @@ struct NodeExtensionHelper { SupportsFPExt = false; EnforceOneUse = true; CheckMask = true; + SDValue NarrowOp; unsigned Opc = OrigOperand.getOpcode(); switch (Opc) { case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: { + NarrowOp = OrigOperand.getOperand(0); + MVT VT = OrigOperand.getSimpleValueType(); if (!VT.isVector()) break; @@ -13660,16 +13663,19 @@ struct NodeExtensionHelper { break; } case RISCVISD::VZEXT_VL: + NarrowOp = OrigOperand.getOperand(0); SupportsZExt = true; Mask = OrigOperand.getOperand(1); VL = OrigOperand.getOperand(2); break; case RISCVISD::VSEXT_VL: + NarrowOp = OrigOperand.getOperand(0); SupportsSExt = true; Mask = OrigOperand.getOperand(1); VL = OrigOperand.getOperand(2); break; case RISCVISD::FP_EXTEND_VL: + NarrowOp = OrigOperand.getOperand(0); SupportsFPExt = true; Mask = OrigOperand.getOperand(1); VL = OrigOperand.getOperand(2); @@ -13688,13 +13694,13 @@ struct NodeExtensionHelper { break; // Get the scalar value. - SDValue Op = OrigOperand.getOperand(1); + NarrowOp = OrigOperand.getOperand(1); // See if we have enough sign bits or zero bits in the scalar to use a // widening opcode by splatting to smaller element size. MVT VT = Root->getSimpleValueType(0); unsigned EltBits = VT.getScalarSizeInBits(); - unsigned ScalarBits = Op.getValueSizeInBits(); + unsigned ScalarBits = NarrowOp.getValueSizeInBits(); // Make sure we're getting all element bits from the scalar register. // FIXME: Support implicit sign extension of vmv.v.x? if (ScalarBits < EltBits) @@ -13706,9 +13712,9 @@ struct NodeExtensionHelper { if (NarrowSize < 8) break; - if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) + if (DAG.ComputeMaxSignificantBits(NarrowOp) <= NarrowSize) SupportsSExt = true; - if (DAG.MaskedValueIsZero(Op, + if (DAG.MaskedValueIsZero(NarrowOp, APInt::getBitsSetFrom(ScalarBits, NarrowSize))) SupportsZExt = true; break; @@ -13716,6 +13722,13 @@ struct NodeExtensionHelper { default: break; } + + // i1 types are legal but we can't select V{S,Z}EXT_VLs with them. + if (NarrowOp && NarrowOp.getSimpleValueType().getScalarType() == MVT::i1) { + SupportsSExt = false; + SupportsZExt = false; + SupportsFPExt = false; + } } /// Check if \p Root supports any extension folding combines. @@ -14102,9 +14115,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; - // Don't perform this until types are legalized and any legal i1 types are - // custom lowered to avoid introducing unselectable V{S,Z}EXT_VLs. - if (DCI.isBeforeLegalizeOps()) + if (DCI.isBeforeLegalize()) return SDValue(); if (!NodeExtensionHelper::isSupportedRoot(N)) diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index 4152e61c0541a..66a7eea18be50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s define @vwadd_vv_nxv1i64_nxv1i32( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i32: @@ -1380,7 +1380,24 @@ define @vwaddu_wx_nxv8i64_nxv8i8( %va, i8 % ; Make sure that we don't introduce any V{S,Z}EXT_VL nodes with i1 types from ; combineBinOp_VLToVWBinOp_VL, since they can't be selected. -define @foo( %va, %vb, ptr %p) { +define @i1_zext( %va, %vb, ptr %p) { +; RV32-LABEL: i1_zext: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vadd.vv v8, v9, v8 +; RV32-NEXT: li a1, 42 +; RV32-NEXT: sh a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: i1_zext: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vadd.vi v8, v8, 1, v0.t +; RV64-NEXT: li a1, 42 +; RV64-NEXT: sh a1, 0(a0) +; RV64-NEXT: ret %vc = zext %va to %vd = add %vc, %vb From 8ac08200f5580714d275198e2e355ab6d3e5a1c0 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 19 Mar 2024 14:57:28 +0800 Subject: [PATCH 3/3] Only check for i1 types in ISD::{SIGN,ZERO}_EXTEND --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 27 ++++++++------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 48c72bdc64613..8fb1564b61454 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13644,17 +13644,20 @@ struct NodeExtensionHelper { SupportsFPExt = false; EnforceOneUse = true; CheckMask = true; - SDValue NarrowOp; unsigned Opc = OrigOperand.getOpcode(); switch (Opc) { case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: { - NarrowOp = OrigOperand.getOperand(0); - MVT VT = OrigOperand.getSimpleValueType(); if (!VT.isVector()) break; + SDValue NarrowElt = OrigOperand.getOperand(0); + MVT NarrowVT = NarrowElt.getSimpleValueType(); + // i1 types are legal but we can't select V{S,Z}EXT_VLs with them. + if (NarrowVT.getVectorElementType() == MVT::i1) + break; + SupportsZExt = Opc == ISD::ZERO_EXTEND; SupportsSExt = Opc == ISD::SIGN_EXTEND; @@ -13663,19 +13666,16 @@ struct NodeExtensionHelper { break; } case RISCVISD::VZEXT_VL: - NarrowOp = OrigOperand.getOperand(0); SupportsZExt = true; Mask = OrigOperand.getOperand(1); VL = OrigOperand.getOperand(2); break; case RISCVISD::VSEXT_VL: - NarrowOp = OrigOperand.getOperand(0); SupportsSExt = true; Mask = OrigOperand.getOperand(1); VL = OrigOperand.getOperand(2); break; case RISCVISD::FP_EXTEND_VL: - NarrowOp = OrigOperand.getOperand(0); SupportsFPExt = true; Mask = OrigOperand.getOperand(1); VL = OrigOperand.getOperand(2); @@ -13694,13 +13694,13 @@ struct NodeExtensionHelper { break; // Get the scalar value. - NarrowOp = OrigOperand.getOperand(1); + SDValue Op = OrigOperand.getOperand(1); // See if we have enough sign bits or zero bits in the scalar to use a // widening opcode by splatting to smaller element size. MVT VT = Root->getSimpleValueType(0); unsigned EltBits = VT.getScalarSizeInBits(); - unsigned ScalarBits = NarrowOp.getValueSizeInBits(); + unsigned ScalarBits = Op.getValueSizeInBits(); // Make sure we're getting all element bits from the scalar register. // FIXME: Support implicit sign extension of vmv.v.x? if (ScalarBits < EltBits) @@ -13712,9 +13712,9 @@ struct NodeExtensionHelper { if (NarrowSize < 8) break; - if (DAG.ComputeMaxSignificantBits(NarrowOp) <= NarrowSize) + if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) SupportsSExt = true; - if (DAG.MaskedValueIsZero(NarrowOp, + if (DAG.MaskedValueIsZero(Op, APInt::getBitsSetFrom(ScalarBits, NarrowSize))) SupportsZExt = true; break; @@ -13722,13 +13722,6 @@ struct NodeExtensionHelper { default: break; } - - // i1 types are legal but we can't select V{S,Z}EXT_VLs with them. - if (NarrowOp && NarrowOp.getSimpleValueType().getScalarType() == MVT::i1) { - SupportsSExt = false; - SupportsZExt = false; - SupportsFPExt = false; - } } /// Check if \p Root supports any extension folding combines.