diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 53fcc527e615d..fb95844643c40 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -52,12 +52,60 @@ class RISCVCodeGenPrepare : public FunctionPass, } bool visitInstruction(Instruction &I) { return false; } + bool visitBinaryOperator(BinaryOperator &BO); bool visitAnd(BinaryOperator &BO); bool visitIntrinsicInst(IntrinsicInst &I); }; } // end anonymous namespace +/// InstCombine will canonicalize selects of binary ops where the identity is +/// zero to zexts: +/// +/// select c, (add x, 1), x -> add x, (zext c) +/// +/// On RISC-V though, a zext of an i1 vector will be lowered as a vmv.v.i and a +/// vmerge.vim: +/// +/// vmv.v.i v12, 0 +/// vmerge.vim v9, v12, 1, v0 +/// vadd.vv v8, v8, v9 +/// +/// Reverse this transform so that we pull the select outside of the binary op, +/// which allows us to fold it into a masked op: +/// +/// vadd.vi v8, v8, 1, v0.t +bool RISCVCodeGenPrepare::visitBinaryOperator(BinaryOperator &BO) { + if (!BO.getType()->isVectorTy()) + return false; + + // TODO: We could allow sub if we did a non-commutative match + Constant *Identity = ConstantExpr::getIdentity(&BO, BO.getType()); + if (!Identity || !Identity->isNullValue()) + return false; + + using namespace PatternMatch; + + Value *Mask, *RHS; + if (!match(&BO, m_c_BinOp(m_OneUse(m_ZExt(m_Value(Mask))), m_Value(RHS)))) + return false; + + if (!Mask->getType()->isIntOrIntVectorTy(1)) + return false; + + IRBuilder<> Builder(&BO); + Value *Splat = ConstantInt::get(BO.getType(), 1); + Value *NewBO = Builder.CreateBinOp(BO.getOpcode(), RHS, Splat); + if (Instruction *I = dyn_cast(NewBO)) + I->copyIRFlags(&BO); + Value *Select = Builder.CreateSelect(Mask, NewBO, RHS); + + BO.replaceAllUsesWith(Select); + BO.eraseFromParent(); + + return true; +} + // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill // the upper 32 bits with ones. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll index 954edf872aff8..43479ed184039 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll @@ -168,13 +168,14 @@ define <8 x i64> @vaaddu_vv_v8i64_floor(<8 x i64> %x, <8 x i64> %y) { define <8 x i1> @vaaddu_vv_v8i1_floor(<8 x i1> %x, <8 x i1> %y) { ; CHECK-LABEL: vaaddu_vv_v8i1_floor: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: csrwi vxrm, 2 -; CHECK-NEXT: vaaddu.vv v8, v10, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -421,13 +422,16 @@ define <8 x i64> @vaaddu_vv_v8i64_ceil(<8 x i64> %x, <8 x i64> %y) { define <8 x i1> @vaaddu_vv_v8i1_ceil(<8 x i1> %x, <8 x i1> %y) { ; CHECK-LABEL: vaaddu_vv_v8i1_ceil: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vaaddu.vv v8, v10, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: csrwi vxrm, 2 +; CHECK-NEXT: vaaddu.vx v8, v8, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll index 4c5835afd49e6..b5a3401b66808 100644 --- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll @@ -42,3 +42,64 @@ vector.body: exit: ret float %acc } + +define @i1_zext_add( %a, %b) { +; CHECK-LABEL: i1_zext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %add = add %b, %zext + ret %add +} + +define @i1_zext_add_commuted( %a, %b) { +; CHECK-LABEL: i1_zext_add_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %add = add %zext, %b + ret %add +} + +define @i1_zext_add_multi_use( %a, %b, ptr %p) { +; CHECK-LABEL: i1_zext_add_multi_use: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vs1r.v v9, (a0) +; CHECK-NEXT: ret + %zext = zext %a to + %add = add %b, %zext + store %zext, ptr %p + ret %add +} + +define @i1_zext_sub( %a, %b) { +; CHECK-LABEL: i1_zext_sub: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: ret + %zext = zext %a to + %sub = sub %b, %zext + ret %sub +} + +define @i1_zext_or( %a, %b) { +; CHECK-LABEL: i1_zext_or: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vor.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %or = or %b, %zext + ret %or +} diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll index 006fc269050b0..1b898aa5d5229 100644 --- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll +++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll @@ -44,3 +44,81 @@ vector.body: exit: ret float %acc } + +define @i1_zext_add( %a, %b) { +; CHECK-LABEL: define @i1_zext_add( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ZEXT:%.*]] = zext [[A]] to +; CHECK-NEXT: [[TMP1:%.*]] = add [[B]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = select [[A]], [[TMP1]], [[B]] +; CHECK-NEXT: ret [[TMP2]] +; + %zext = zext %a to + %add = add %b, %zext + ret %add +} + +define @i1_zext_add_commuted( %a, %b) { +; CHECK-LABEL: define @i1_zext_add_commuted( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ZEXT:%.*]] = zext [[A]] to +; CHECK-NEXT: [[TMP1:%.*]] = add [[B]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = select [[A]], [[TMP1]], [[B]] +; CHECK-NEXT: ret [[TMP2]] +; + %zext = zext %a to + %add = add %zext, %b + ret %add +} + +define @i1_zext_add_multi_use( %a, %b, ptr %p) { +; CHECK-LABEL: define @i1_zext_add_multi_use( +; CHECK-SAME: [[A:%.*]], [[B:%.*]], ptr [[P:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ZEXT:%.*]] = zext [[A]] to +; CHECK-NEXT: [[TMP2:%.*]] = add [[B]], [[ZEXT]] +; CHECK-NEXT: store [[ZEXT]], ptr [[P]], align 8 +; CHECK-NEXT: ret [[TMP2]] +; + %zext = zext %a to + %add = add %b, %zext + store %zext, ptr %p + ret %add +} + +define @i1_zext_add_flags( %a, %b) { +; CHECK-LABEL: define @i1_zext_add_flags( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ZEXT:%.*]] = zext [[A]] to +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw [[B]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = select [[A]], [[TMP1]], [[B]] +; CHECK-NEXT: ret [[TMP2]] +; + %zext = zext %a to + %add = add nuw nsw %b, %zext + ret %add +} + +define @i1_zext_sub( %a, %b) { +; CHECK-LABEL: define @i1_zext_sub( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ZEXT:%.*]] = zext [[A]] to +; CHECK-NEXT: [[TMP2:%.*]] = sub [[B]], [[ZEXT]] +; CHECK-NEXT: ret [[TMP2]] +; + %zext = zext %a to + %sub = sub %b, %zext + ret %sub +} + +define @i1_zext_or( %a, %b) { +; CHECK-LABEL: define @i1_zext_or( +; CHECK-SAME: [[A:%.*]], [[B:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ZEXT:%.*]] = zext [[A]] to +; CHECK-NEXT: [[TMP1:%.*]] = or [[B]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = select [[A]], [[TMP1]], [[B]] +; CHECK-NEXT: ret [[TMP2]] +; + %zext = zext %a to + %or = or %b, %zext + ret %or +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index 66e6883dd1d3e..f97481abe4b7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -1365,23 +1365,13 @@ define @vwaddu_wx_nxv8i64_nxv8i8( %va, i8 % ; Make sure that we don't introduce any V{S,Z}EXT_VL nodes with i1 types from ; combineBinOp_VLToVWBinOp_VL, since they can't be selected. define @i1_zext( %va, %vb, ptr %p) { -; RV32-LABEL: i1_zext: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: li a1, 42 -; RV32-NEXT: sh a1, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: i1_zext: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; RV64-NEXT: vadd.vi v8, v8, 1, v0.t -; RV64-NEXT: li a1, 42 -; RV64-NEXT: sh a1, 0(a0) -; RV64-NEXT: ret +; CHECK-LABEL: i1_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: sh a1, 0(a0) +; CHECK-NEXT: ret %vc = zext %va to %vd = add %vc, %vb @@ -1466,3 +1456,6 @@ define @vwadd_wv_disjoint_or( %x.i32, %x.i32, %y.i32 ret %or } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}}