From 888726dab042c304ee90ec8fe1a1b217682f4893 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Mon, 25 Mar 2024 21:37:42 +0800 Subject: [PATCH 1/7] [SDAG] Use shifts if ISD::MUL is illegal when lowering ISD::CTPOP/ISD::VP_CTPOP We can avoid libcalls. Fixes #86205 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 37 +- .../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 9 +- llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll | 766 +++++++----------- .../CodeGen/RISCV/ctz_zero_return_test.ll | 102 +-- llvm/test/CodeGen/RISCV/rv32xtheadbb.ll | 96 +-- llvm/test/CodeGen/RISCV/rv32zbb.ll | 447 +++++----- .../RISCV/rv64-legal-i32/rv64xtheadbb.ll | 15 +- .../CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll | 30 +- llvm/test/CodeGen/RISCV/rv64xtheadbb.ll | 209 +++-- llvm/test/CodeGen/RISCV/rv64zbb.ll | 438 +++++----- llvm/test/CodeGen/RISCV/sextw-removal.ll | 29 +- 11 files changed, 889 insertions(+), 1289 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8be03b66e155f..b92f790604403 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8710,11 +8710,19 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { } // v = (v * 0x01010101...) >> (Len - 8) - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); - return DAG.getNode(ISD::SRL, dl, VT, - DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, dl, ShVT)); + SDValue V; + if (isOperationLegalOrCustomOrPromote(ISD::MUL, VT)) { + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01); + } else { + V = Op; + for (unsigned Shift = 8; Shift < Len; Shift *= 2) + V = DAG.getNode(ISD::ADD, dl, VT, V, + DAG.getNode(ISD::SHL, dl, VT, V, + DAG.getShiftAmountConstant(Shift, VT, dl))); + } + return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT)); } SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { @@ -8767,10 +8775,21 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { return Op; // v = (v * 0x01010101...) >> (Len - 8) - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); - return DAG.getNode(ISD::VP_LSHR, dl, VT, - DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL), + SDValue V; + if (isOperationLegalOrCustomOrPromote(ISD::VP_MUL, VT)) { + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL); + } else { + V = Op; + for (unsigned Shift = 8; Shift < Len; Shift *= 2) + V = DAG.getNode(ISD::VP_ADD, dl, VT, V, + DAG.getNode(ISD::VP_SHL, dl, VT, V, + DAG.getShiftAmountConstant(Shift, VT, dl), + Mask, VL), + Mask, VL); + } + return DAG.getNode(ISD::VP_LSHR, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT), Mask, VL); } diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll index 9fa3f5076bb22..3a6cf4c4e0ed2 100644 --- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -285,10 +285,11 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; LA64-NEXT: lu12i.w $a1, 61680 ; LA64-NEXT: ori $a1, $a1, 3855 ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 4112 -; LA64-NEXT: ori $a1, $a1, 257 -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24 +; LA64-NEXT: slli.d $a1, $a0, 8 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 16 +; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 29, 24 ; LA64-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index 455e6e54c9b39..a9b7e7e06d519 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -1160,8 +1160,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: beqz a0, .LBB10_2 ; RV32I-NEXT: # %bb.1: # %cond.false -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -1189,61 +1187,57 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB10_2: ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: ret ; -; RV64I-LABEL: test_ctlz_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: beqz a1, .LBB10_2 -; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: srliw a1, a0, 1 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: li a0, 32 -; RV64I-NEXT: ret +; RV64NOZBB-LABEL: test_ctlz_i32: +; RV64NOZBB: # %bb.0: +; RV64NOZBB-NEXT: sext.w a1, a0 +; RV64NOZBB-NEXT: beqz a1, .LBB10_2 +; RV64NOZBB-NEXT: # %bb.1: # %cond.false +; RV64NOZBB-NEXT: srliw a1, a0, 1 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: srliw a1, a0, 2 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: srliw a1, a0, 4 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: srliw a1, a0, 8 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: srliw a1, a0, 16 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: not a0, a0 +; RV64NOZBB-NEXT: srli a1, a0, 1 +; RV64NOZBB-NEXT: lui a2, 349525 +; RV64NOZBB-NEXT: addiw a2, a2, 1365 +; RV64NOZBB-NEXT: and a1, a1, a2 +; RV64NOZBB-NEXT: sub a0, a0, a1 +; RV64NOZBB-NEXT: lui a1, 209715 +; RV64NOZBB-NEXT: addiw a1, a1, 819 +; RV64NOZBB-NEXT: and a2, a0, a1 +; RV64NOZBB-NEXT: srli a0, a0, 2 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: add a0, a2, a0 +; RV64NOZBB-NEXT: srli a1, a0, 4 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: lui a1, 61681 +; RV64NOZBB-NEXT: addi a1, a1, -241 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: slli a1, a0, 8 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: slli a1, a0, 16 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: srliw a0, a0, 24 +; RV64NOZBB-NEXT: ret +; RV64NOZBB-NEXT: .LBB10_2: +; RV64NOZBB-NEXT: li a0, 32 +; RV64NOZBB-NEXT: ret ; ; RV32M-LABEL: test_ctlz_i32: ; RV32M: # %bb.0: @@ -1285,47 +1279,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32M-NEXT: li a0, 32 ; RV32M-NEXT: ret ; -; RV64M-LABEL: test_ctlz_i32: -; RV64M: # %bb.0: -; RV64M-NEXT: sext.w a1, a0 -; RV64M-NEXT: beqz a1, .LBB10_2 -; RV64M-NEXT: # %bb.1: # %cond.false -; RV64M-NEXT: srliw a1, a0, 1 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: srliw a1, a0, 2 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: srliw a1, a0, 4 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: srliw a1, a0, 8 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: srliw a1, a0, 16 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: not a0, a0 -; RV64M-NEXT: srli a1, a0, 1 -; RV64M-NEXT: lui a2, 349525 -; RV64M-NEXT: addiw a2, a2, 1365 -; RV64M-NEXT: and a1, a1, a2 -; RV64M-NEXT: sub a0, a0, a1 -; RV64M-NEXT: lui a1, 209715 -; RV64M-NEXT: addiw a1, a1, 819 -; RV64M-NEXT: and a2, a0, a1 -; RV64M-NEXT: srli a0, a0, 2 -; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: add a0, a2, a0 -; RV64M-NEXT: srli a1, a0, 4 -; RV64M-NEXT: add a0, a0, a1 -; RV64M-NEXT: lui a1, 61681 -; RV64M-NEXT: addi a1, a1, -241 -; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: lui a1, 4112 -; RV64M-NEXT: addi a1, a1, 257 -; RV64M-NEXT: mul a0, a0, a1 -; RV64M-NEXT: srliw a0, a0, 24 -; RV64M-NEXT: ret -; RV64M-NEXT: .LBB10_2: -; RV64M-NEXT: li a0, 32 -; RV64M-NEXT: ret -; ; RV32ZBB-LABEL: test_ctlz_i32: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: clz a0, a0 @@ -1354,19 +1307,16 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: test_ctlz_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB11_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -1377,28 +1327,26 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -1409,43 +1357,27 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB11_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB11_3 -; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: srli a0, s1, 24 -; RV32I-NEXT: .LBB11_3: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctlz_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB11_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -1481,14 +1413,13 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB11_2: ; RV64I-NEXT: li a0, 64 @@ -1831,8 +1762,6 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV32I-LABEL: test_ctlz_i32_zero_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -1860,52 +1789,48 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV64I-LABEL: test_ctlz_i32_zero_undef: -; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: srliw a1, a0, 1 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64NOZBB-LABEL: test_ctlz_i32_zero_undef: +; RV64NOZBB: # %bb.0: +; RV64NOZBB-NEXT: srliw a1, a0, 1 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: srliw a1, a0, 2 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: srliw a1, a0, 4 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: srliw a1, a0, 8 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: srliw a1, a0, 16 +; RV64NOZBB-NEXT: or a0, a0, a1 +; RV64NOZBB-NEXT: not a0, a0 +; RV64NOZBB-NEXT: srli a1, a0, 1 +; RV64NOZBB-NEXT: lui a2, 349525 +; RV64NOZBB-NEXT: addiw a2, a2, 1365 +; RV64NOZBB-NEXT: and a1, a1, a2 +; RV64NOZBB-NEXT: sub a0, a0, a1 +; RV64NOZBB-NEXT: lui a1, 209715 +; RV64NOZBB-NEXT: addiw a1, a1, 819 +; RV64NOZBB-NEXT: and a2, a0, a1 +; RV64NOZBB-NEXT: srli a0, a0, 2 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: add a0, a2, a0 +; RV64NOZBB-NEXT: srli a1, a0, 4 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: lui a1, 61681 +; RV64NOZBB-NEXT: addi a1, a1, -241 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: slli a1, a0, 8 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: slli a1, a0, 16 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: srliw a0, a0, 24 +; RV64NOZBB-NEXT: ret ; ; RV32M-LABEL: test_ctlz_i32_zero_undef: ; RV32M: # %bb.0: @@ -1942,41 +1867,6 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV32M-NEXT: srli a0, a0, 24 ; RV32M-NEXT: ret ; -; RV64M-LABEL: test_ctlz_i32_zero_undef: -; RV64M: # %bb.0: -; RV64M-NEXT: srliw a1, a0, 1 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: srliw a1, a0, 2 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: srliw a1, a0, 4 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: srliw a1, a0, 8 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: srliw a1, a0, 16 -; RV64M-NEXT: or a0, a0, a1 -; RV64M-NEXT: not a0, a0 -; RV64M-NEXT: srli a1, a0, 1 -; RV64M-NEXT: lui a2, 349525 -; RV64M-NEXT: addiw a2, a2, 1365 -; RV64M-NEXT: and a1, a1, a2 -; RV64M-NEXT: sub a0, a0, a1 -; RV64M-NEXT: lui a1, 209715 -; RV64M-NEXT: addiw a1, a1, 819 -; RV64M-NEXT: and a2, a0, a1 -; RV64M-NEXT: srli a0, a0, 2 -; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: add a0, a2, a0 -; RV64M-NEXT: srli a1, a0, 4 -; RV64M-NEXT: add a0, a0, a1 -; RV64M-NEXT: lui a1, 61681 -; RV64M-NEXT: addi a1, a1, -241 -; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: lui a1, 4112 -; RV64M-NEXT: addi a1, a1, 257 -; RV64M-NEXT: mul a0, a0, a1 -; RV64M-NEXT: srliw a0, a0, 24 -; RV64M-NEXT: ret -; ; RV32ZBB-LABEL: test_ctlz_i32_zero_undef: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: clz a0, a0 @@ -2005,19 +1895,16 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind { ; RV32I-LABEL: test_ctlz_i64_zero_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB15_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -2028,28 +1915,26 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB15_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -2060,41 +1945,25 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB15_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB15_3 -; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srli a0, s1, 24 -; RV32I-NEXT: .LBB15_3: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctlz_i64_zero_undef: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -2130,14 +1999,13 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctlz_i64_zero_undef: @@ -2464,8 +2332,6 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32I-LABEL: test_ctpop_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: addi a2, a2, 1365 @@ -2482,41 +2348,37 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV64I-LABEL: test_ctpop_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64NOZBB-LABEL: test_ctpop_i32: +; RV64NOZBB: # %bb.0: +; RV64NOZBB-NEXT: srli a1, a0, 1 +; RV64NOZBB-NEXT: lui a2, 349525 +; RV64NOZBB-NEXT: addiw a2, a2, 1365 +; RV64NOZBB-NEXT: and a1, a1, a2 +; RV64NOZBB-NEXT: sub a0, a0, a1 +; RV64NOZBB-NEXT: lui a1, 209715 +; RV64NOZBB-NEXT: addiw a1, a1, 819 +; RV64NOZBB-NEXT: and a2, a0, a1 +; RV64NOZBB-NEXT: srli a0, a0, 2 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: add a0, a2, a0 +; RV64NOZBB-NEXT: srli a1, a0, 4 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: lui a1, 61681 +; RV64NOZBB-NEXT: addi a1, a1, -241 +; RV64NOZBB-NEXT: and a0, a0, a1 +; RV64NOZBB-NEXT: slli a1, a0, 8 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: slli a1, a0, 16 +; RV64NOZBB-NEXT: add a0, a0, a1 +; RV64NOZBB-NEXT: srliw a0, a0, 24 +; RV64NOZBB-NEXT: ret ; ; RV32M-LABEL: test_ctpop_i32: ; RV32M: # %bb.0: @@ -2542,30 +2404,6 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32M-NEXT: srli a0, a0, 24 ; RV32M-NEXT: ret ; -; RV64M-LABEL: test_ctpop_i32: -; RV64M: # %bb.0: -; RV64M-NEXT: srli a1, a0, 1 -; RV64M-NEXT: lui a2, 349525 -; RV64M-NEXT: addiw a2, a2, 1365 -; RV64M-NEXT: and a1, a1, a2 -; RV64M-NEXT: sub a0, a0, a1 -; RV64M-NEXT: lui a1, 209715 -; RV64M-NEXT: addiw a1, a1, 819 -; RV64M-NEXT: and a2, a0, a1 -; RV64M-NEXT: srli a0, a0, 2 -; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: add a0, a2, a0 -; RV64M-NEXT: srli a1, a0, 4 -; RV64M-NEXT: add a0, a0, a1 -; RV64M-NEXT: lui a1, 61681 -; RV64M-NEXT: addi a1, a1, -241 -; RV64M-NEXT: and a0, a0, a1 -; RV64M-NEXT: lui a1, 4112 -; RV64M-NEXT: addi a1, a1, 257 -; RV64M-NEXT: mul a0, a0, a1 -; RV64M-NEXT: srliw a0, a0, 24 -; RV64M-NEXT: ret -; ; RV32ZBB-LABEL: test_ctpop_i32: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: cpop a0, a0 @@ -2578,8 +2416,6 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; ; RV32XTHEADBB-LABEL: test_ctpop_i32: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: addi sp, sp, -16 -; RV32XTHEADBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32XTHEADBB-NEXT: srli a1, a0, 1 ; RV32XTHEADBB-NEXT: lui a2, 349525 ; RV32XTHEADBB-NEXT: addi a2, a2, 1365 @@ -2596,18 +2432,15 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32XTHEADBB-NEXT: lui a1, 61681 ; RV32XTHEADBB-NEXT: addi a1, a1, -241 ; RV32XTHEADBB-NEXT: and a0, a0, a1 -; RV32XTHEADBB-NEXT: lui a1, 4112 -; RV32XTHEADBB-NEXT: addi a1, a1, 257 -; RV32XTHEADBB-NEXT: call __mulsi3 +; RV32XTHEADBB-NEXT: slli a1, a0, 8 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: slli a1, a0, 16 +; RV32XTHEADBB-NEXT: add a0, a0, a1 ; RV32XTHEADBB-NEXT: srli a0, a0, 24 -; RV32XTHEADBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: addi sp, sp, 16 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctpop_i32: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: addi sp, sp, -16 -; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64XTHEADBB-NEXT: srli a1, a0, 1 ; RV64XTHEADBB-NEXT: lui a2, 349525 ; RV64XTHEADBB-NEXT: addiw a2, a2, 1365 @@ -2622,14 +2455,13 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV64XTHEADBB-NEXT: srli a1, a0, 4 ; RV64XTHEADBB-NEXT: add a0, a0, a1 ; RV64XTHEADBB-NEXT: lui a1, 61681 -; RV64XTHEADBB-NEXT: addiw a1, a1, -241 +; RV64XTHEADBB-NEXT: addi a1, a1, -241 ; RV64XTHEADBB-NEXT: and a0, a0, a1 -; RV64XTHEADBB-NEXT: lui a1, 4112 -; RV64XTHEADBB-NEXT: addiw a1, a1, 257 -; RV64XTHEADBB-NEXT: call __muldi3 +; RV64XTHEADBB-NEXT: slli a1, a0, 8 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: slli a1, a0, 16 +; RV64XTHEADBB-NEXT: add a0, a0, a1 ; RV64XTHEADBB-NEXT: srliw a0, a0, 24 -; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64XTHEADBB-NEXT: addi sp, sp, 16 ; RV64XTHEADBB-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 @@ -2638,65 +2470,48 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV32I-LABEL: test_ctpop_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s2, a2, 1365 -; RV32I-NEXT: and a0, a0, s2 -; RV32I-NEXT: sub a1, a1, a0 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi s3, a0, 819 -; RV32I-NEXT: and a0, a1, s3 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a4, a1, a2 ; RV32I-NEXT: srli a1, a1, 2 -; RV32I-NEXT: and a1, a1, s3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s4, a1, -241 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s0, 1 -; RV32I-NEXT: and a0, a0, s2 -; RV32I-NEXT: sub s0, s0, a0 -; RV32I-NEXT: and a0, s0, s3 -; RV32I-NEXT: srli s0, s0, 2 -; RV32I-NEXT: and a1, s0, s3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: add a1, a4, a1 +; RV32I-NEXT: srli a4, a1, 4 +; RV32I-NEXT: add a1, a1, a4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a5, a1, 8 +; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: slli a5, a1, 16 +; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: and a3, a5, a3 +; RV32I-NEXT: sub a0, a0, a3 +; RV32I-NEXT: and a3, a0, a2 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: add a0, a3, a0 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 16 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: add a0, a0, s5 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctpop_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -2719,14 +2534,13 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctpop_i64: @@ -2814,65 +2628,48 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; ; RV32XTHEADBB-LABEL: test_ctpop_i64: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: addi sp, sp, -32 -; RV32XTHEADBB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: mv s0, a0 -; RV32XTHEADBB-NEXT: srli a0, a1, 1 -; RV32XTHEADBB-NEXT: lui a2, 349525 -; RV32XTHEADBB-NEXT: addi s2, a2, 1365 -; RV32XTHEADBB-NEXT: and a0, a0, s2 -; RV32XTHEADBB-NEXT: sub a1, a1, a0 -; RV32XTHEADBB-NEXT: lui a0, 209715 -; RV32XTHEADBB-NEXT: addi s3, a0, 819 -; RV32XTHEADBB-NEXT: and a0, a1, s3 +; RV32XTHEADBB-NEXT: srli a2, a1, 1 +; RV32XTHEADBB-NEXT: lui a3, 349525 +; RV32XTHEADBB-NEXT: addi a3, a3, 1365 +; RV32XTHEADBB-NEXT: and a2, a2, a3 +; RV32XTHEADBB-NEXT: sub a1, a1, a2 +; RV32XTHEADBB-NEXT: lui a2, 209715 +; RV32XTHEADBB-NEXT: addi a2, a2, 819 +; RV32XTHEADBB-NEXT: and a4, a1, a2 ; RV32XTHEADBB-NEXT: srli a1, a1, 2 -; RV32XTHEADBB-NEXT: and a1, a1, s3 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: srli a1, a0, 4 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: lui a1, 61681 -; RV32XTHEADBB-NEXT: addi s4, a1, -241 -; RV32XTHEADBB-NEXT: and a0, a0, s4 -; RV32XTHEADBB-NEXT: lui a1, 4112 -; RV32XTHEADBB-NEXT: addi s1, a1, 257 -; RV32XTHEADBB-NEXT: mv a1, s1 -; RV32XTHEADBB-NEXT: call __mulsi3 -; RV32XTHEADBB-NEXT: srli s5, a0, 24 -; RV32XTHEADBB-NEXT: srli a0, s0, 1 -; RV32XTHEADBB-NEXT: and a0, a0, s2 -; RV32XTHEADBB-NEXT: sub s0, s0, a0 -; RV32XTHEADBB-NEXT: and a0, s0, s3 -; RV32XTHEADBB-NEXT: srli s0, s0, 2 -; RV32XTHEADBB-NEXT: and a1, s0, s3 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: srli a1, a0, 4 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: and a0, a0, s4 -; RV32XTHEADBB-NEXT: mv a1, s1 -; RV32XTHEADBB-NEXT: call __mulsi3 +; RV32XTHEADBB-NEXT: and a1, a1, a2 +; RV32XTHEADBB-NEXT: add a1, a4, a1 +; RV32XTHEADBB-NEXT: srli a4, a1, 4 +; RV32XTHEADBB-NEXT: add a1, a1, a4 +; RV32XTHEADBB-NEXT: lui a4, 61681 +; RV32XTHEADBB-NEXT: addi a4, a4, -241 +; RV32XTHEADBB-NEXT: and a1, a1, a4 +; RV32XTHEADBB-NEXT: slli a5, a1, 8 +; RV32XTHEADBB-NEXT: add a1, a1, a5 +; RV32XTHEADBB-NEXT: slli a5, a1, 16 +; RV32XTHEADBB-NEXT: add a1, a1, a5 +; RV32XTHEADBB-NEXT: srli a1, a1, 24 +; RV32XTHEADBB-NEXT: srli a5, a0, 1 +; RV32XTHEADBB-NEXT: and a3, a5, a3 +; RV32XTHEADBB-NEXT: sub a0, a0, a3 +; RV32XTHEADBB-NEXT: and a3, a0, a2 +; RV32XTHEADBB-NEXT: srli a0, a0, 2 +; RV32XTHEADBB-NEXT: and a0, a0, a2 +; RV32XTHEADBB-NEXT: add a0, a3, a0 +; RV32XTHEADBB-NEXT: srli a2, a0, 4 +; RV32XTHEADBB-NEXT: add a0, a0, a2 +; RV32XTHEADBB-NEXT: and a0, a0, a4 +; RV32XTHEADBB-NEXT: slli a2, a0, 8 +; RV32XTHEADBB-NEXT: add a0, a0, a2 +; RV32XTHEADBB-NEXT: slli a2, a0, 16 +; RV32XTHEADBB-NEXT: add a0, a0, a2 ; RV32XTHEADBB-NEXT: srli a0, a0, 24 -; RV32XTHEADBB-NEXT: add a0, a0, s5 +; RV32XTHEADBB-NEXT: add a0, a0, a1 ; RV32XTHEADBB-NEXT: li a1, 0 -; RV32XTHEADBB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: addi sp, sp, 32 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctpop_i64: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: addi sp, sp, -16 -; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64XTHEADBB-NEXT: srli a1, a0, 1 ; RV64XTHEADBB-NEXT: lui a2, 349525 ; RV64XTHEADBB-NEXT: addiw a2, a2, 1365 @@ -2895,14 +2692,13 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV64XTHEADBB-NEXT: slli a2, a1, 32 ; RV64XTHEADBB-NEXT: add a1, a1, a2 ; RV64XTHEADBB-NEXT: and a0, a0, a1 -; RV64XTHEADBB-NEXT: lui a1, 4112 -; RV64XTHEADBB-NEXT: addiw a1, a1, 257 -; RV64XTHEADBB-NEXT: slli a2, a1, 32 -; RV64XTHEADBB-NEXT: add a1, a1, a2 -; RV64XTHEADBB-NEXT: call __muldi3 +; RV64XTHEADBB-NEXT: slli a1, a0, 8 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: slli a1, a0, 16 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: slli a1, a0, 32 +; RV64XTHEADBB-NEXT: add a0, a0, a1 ; RV64XTHEADBB-NEXT: srli a0, a0, 56 -; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64XTHEADBB-NEXT: addi sp, sp, 16 ; RV64XTHEADBB-NEXT: ret %1 = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll index adf614435b31d..9ae30e646fdbf 100644 --- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll +++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll @@ -602,19 +602,16 @@ define signext i32 @ctlz(i64 %b) nounwind { ; ; RV32I-LABEL: ctlz: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB7_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -625,28 +622,26 @@ define signext i32 @ctlz(i64 %b) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: andi a0, a0, 63 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -657,41 +652,25 @@ define signext i32 @ctlz(i64 %b) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB7_2 -; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi s1, a0, 32 -; RV32I-NEXT: j .LBB7_3 -; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: srli s1, s1, 24 -; RV32I-NEXT: .LBB7_3: # %entry -; RV32I-NEXT: andi a0, s1, 63 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: andi a0, a0, 63 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -727,15 +706,14 @@ define signext i32 @ctlz(i64 %b) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 2 ; RV64I-NEXT: srli a0, a0, 58 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll index 3731b9719445e..b45ab135fa1c7 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -11,8 +11,6 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %cond.false -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -40,12 +38,11 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: li a0, 32 @@ -64,19 +61,16 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: ctlz_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -87,28 +81,26 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -119,35 +111,21 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB1_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB1_3 -; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srli a0, s1, 24 -; RV32I-NEXT: .LBB1_3: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32XTHEADBB-LABEL: ctlz_i64: diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 36c107061795c..7e6c3f9c87d27 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -11,8 +11,6 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %cond.false -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -40,12 +38,11 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: li a0, 32 @@ -64,19 +61,16 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: ctlz_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -87,28 +81,26 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -119,35 +111,21 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB1_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB1_3 -; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srli a0, s1, 24 -; RV32I-NEXT: .LBB1_3: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctlz_i64: @@ -275,8 +253,6 @@ declare i32 @llvm.ctpop.i32(i32) define i32 @ctpop_i32(i32 %a) nounwind { ; RV32I-LABEL: ctpop_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: addi a2, a2, 1365 @@ -293,12 +269,11 @@ define i32 @ctpop_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_i32: @@ -390,58 +365,42 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind { ; RV32I-LABEL: ctpop_v2i32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s3, a2, 1365 -; RV32I-NEXT: and a1, a1, s3 -; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s4, a1, 819 -; RV32I-NEXT: and a1, a0, s4 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a4, a0, a2 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s5, a1, -241 -; RV32I-NEXT: and a0, a0, s5 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s2, a0, 24 -; RV32I-NEXT: srli a0, s0, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub s0, s0, a0 -; RV32I-NEXT: and a0, s0, s4 -; RV32I-NEXT: srli s0, s0, 2 -; RV32I-NEXT: and a1, s0, s4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s5 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: add a0, a4, a0 +; RV32I-NEXT: srli a4, a0, 4 +; RV32I-NEXT: add a0, a0, a4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: slli a5, a0, 8 +; RV32I-NEXT: add a0, a0, a5 +; RV32I-NEXT: slli a5, a0, 16 +; RV32I-NEXT: add a0, a0, a5 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: srli a5, a1, 1 +; RV32I-NEXT: and a3, a5, a3 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: and a3, a1, a2 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: srli a2, a1, 4 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a2, a1, 8 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: slli a2, a1, 16 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: srli a1, a1, 24 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_v2i32: @@ -558,59 +517,44 @@ declare i64 @llvm.ctpop.i64(i64) define i64 @ctpop_i64(i64 %a) nounwind { ; RV32I-LABEL: ctpop_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s2, a2, 1365 -; RV32I-NEXT: and a0, a0, s2 -; RV32I-NEXT: sub a1, a1, a0 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi s3, a0, 819 -; RV32I-NEXT: and a0, a1, s3 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a4, a1, a2 ; RV32I-NEXT: srli a1, a1, 2 -; RV32I-NEXT: and a1, a1, s3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s4, a1, -241 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s0, 1 -; RV32I-NEXT: and a0, a0, s2 -; RV32I-NEXT: sub s0, s0, a0 -; RV32I-NEXT: and a0, s0, s3 -; RV32I-NEXT: srli s0, s0, 2 -; RV32I-NEXT: and a1, s0, s3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: add a1, a4, a1 +; RV32I-NEXT: srli a4, a1, 4 +; RV32I-NEXT: add a1, a1, a4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a5, a1, 8 +; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: slli a5, a1, 16 +; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: and a3, a5, a3 +; RV32I-NEXT: sub a0, a0, a3 +; RV32I-NEXT: and a3, a0, a2 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: add a0, a3, a0 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 16 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: add a0, a0, s5 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_i64: @@ -738,99 +682,82 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV32I-LABEL: ctpop_v2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -48 -; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: lw s2, 8(a1) -; RV32I-NEXT: lw s5, 12(a1) -; RV32I-NEXT: lw s6, 0(a1) -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s3, a2, 1365 -; RV32I-NEXT: and a1, a1, s3 -; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s4, a1, 819 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s7, a1, -241 -; RV32I-NEXT: and a0, a0, s7 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s8, a0, 24 -; RV32I-NEXT: srli a0, s6, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s6, a0 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s7 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: add s8, a0, s8 -; RV32I-NEXT: srli a0, s5, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s5, a0 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s7 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s2, a0 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s7 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: add a0, a0, s5 -; RV32I-NEXT: sw zero, 12(s0) -; RV32I-NEXT: sw zero, 4(s0) -; RV32I-NEXT: sw a0, 8(s0) -; RV32I-NEXT: sw s8, 0(s0) -; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: srli a5, a3, 1 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: sub a3, a3, a5 +; RV32I-NEXT: lui a5, 209715 +; RV32I-NEXT: addi a5, a5, 819 +; RV32I-NEXT: and a7, a3, a5 +; RV32I-NEXT: srli a3, a3, 2 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: add a3, a7, a3 +; RV32I-NEXT: srli a7, a3, 4 +; RV32I-NEXT: add a3, a3, a7 +; RV32I-NEXT: lui a7, 61681 +; RV32I-NEXT: addi a7, a7, -241 +; RV32I-NEXT: and a3, a3, a7 +; RV32I-NEXT: slli t0, a3, 8 +; RV32I-NEXT: add a3, a3, t0 +; RV32I-NEXT: slli t0, a3, 16 +; RV32I-NEXT: add a3, a3, t0 +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: srli t0, a1, 1 +; RV32I-NEXT: and t0, t0, a6 +; RV32I-NEXT: sub a1, a1, t0 +; RV32I-NEXT: and t0, a1, a5 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: add a1, t0, a1 +; RV32I-NEXT: srli t0, a1, 4 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: slli t0, a1, 8 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: slli t0, a1, 16 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: srli a3, a4, 1 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: sub a4, a4, a3 +; RV32I-NEXT: and a3, a4, a5 +; RV32I-NEXT: srli a4, a4, 2 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: srli a4, a3, 4 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: and a3, a3, a7 +; RV32I-NEXT: slli a4, a3, 8 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: slli a4, a3, 16 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: srli a4, a2, 1 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: sub a2, a2, a4 +; RV32I-NEXT: and a4, a2, a5 +; RV32I-NEXT: srli a2, a2, 2 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: add a2, a4, a2 +; RV32I-NEXT: srli a4, a2, 4 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: slli a4, a2, 8 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: slli a4, a2, 16 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: srli a2, a2, 24 +; RV32I-NEXT: add a2, a2, a3 +; RV32I-NEXT: sw zero, 12(a0) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_v2i64: diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll index 73bfc6480b4d7..acd63f24bb8f7 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll @@ -317,8 +317,6 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -354,14 +352,13 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 64 diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll index 7feef4dad4116..b0e447b71178b 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll @@ -307,8 +307,6 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -344,14 +342,13 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 64 @@ -623,8 +620,6 @@ declare i64 @llvm.ctpop.i64(i64) define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I-LABEL: ctpop_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -647,14 +642,13 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_i64: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll index 1f62ea9f56819..6cdab888ffcde 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -11,8 +11,6 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB0_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -38,14 +36,13 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: li a0, 32 @@ -66,8 +63,6 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -93,14 +88,13 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: j .LBB1_3 ; RV64I-NEXT: .LBB1_2: ; RV64I-NEXT: li a0, 32 @@ -125,50 +119,45 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; RV64I-LABEL: log2_ceil_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: addiw a0, a0, -1 -; RV64I-NEXT: li s0, 32 -; RV64I-NEXT: li a1, 32 -; RV64I-NEXT: beqz a0, .LBB2_2 +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: li a2, 32 +; RV64I-NEXT: beqz a1, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: srliw a1, a0, 1 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: srliw a2, a1, 1 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 2 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 16 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: srli a2, a1, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a1, a0, 24 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 24 ; RV64I-NEXT: .LBB2_2: # %cond.end -; RV64I-NEXT: sub a0, s0, a1 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64XTHEADBB-LABEL: log2_ceil_i32: @@ -189,48 +178,42 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; RV64I-LABEL: findLastSet_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: srliw a0, a0, 1 -; RV64I-NEXT: or a0, s0, a0 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: srliw a2, a1, 2 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 16 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: srli a2, a1, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: xori a0, a0, 31 -; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 24 +; RV64I-NEXT: xori a1, a1, 31 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBB-LABEL: findLastSet_i32: @@ -256,10 +239,6 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: srliw a0, a0, 1 ; RV64I-NEXT: beqz a0, .LBB4_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: .cfi_def_cfa_offset 16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -285,14 +264,13 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB4_2: ; RV64I-NEXT: li a0, 32 @@ -317,8 +295,6 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -354,14 +330,13 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 64 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 2269d8d04c9cb..4d5ef5db86057 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -11,8 +11,6 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB0_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -38,14 +36,13 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: li a0, 32 @@ -64,8 +61,6 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -91,14 +86,13 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: j .LBB1_3 ; RV64I-NEXT: .LBB1_2: ; RV64I-NEXT: li a0, 32 @@ -121,50 +115,45 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; RV64I-LABEL: log2_ceil_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: addiw a0, a0, -1 -; RV64I-NEXT: li s0, 32 -; RV64I-NEXT: li a1, 32 -; RV64I-NEXT: beqz a0, .LBB2_2 +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: li a2, 32 +; RV64I-NEXT: beqz a1, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: srliw a1, a0, 1 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: srliw a2, a1, 1 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 2 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 16 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: srli a2, a1, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a1, a0, 24 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 24 ; RV64I-NEXT: .LBB2_2: # %cond.end -; RV64I-NEXT: sub a0, s0, a1 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: log2_ceil_i32: @@ -183,48 +172,42 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; RV64I-LABEL: findLastSet_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: srliw a0, a0, 1 -; RV64I-NEXT: or a0, s0, a0 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: srliw a2, a1, 2 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 16 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: srli a2, a1, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: xori a0, a0, 31 -; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 24 +; RV64I-NEXT: xori a1, a1, 31 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: findLastSet_i32: @@ -248,10 +231,6 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: srliw a0, a0, 1 ; RV64I-NEXT: beqz a0, .LBB4_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: .cfi_def_cfa_offset 16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -277,14 +256,13 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB4_2: ; RV64I-NEXT: li a0, 32 @@ -307,8 +285,6 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -344,14 +320,13 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 64 @@ -544,8 +519,6 @@ declare i32 @llvm.ctpop.i32(i32) define signext i32 @ctpop_i32(i32 signext %a) nounwind { ; RV64I-LABEL: ctpop_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -560,14 +533,13 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_i32: @@ -657,8 +629,6 @@ define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind { define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; RV64I-LABEL: ctpop_i32_load: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 @@ -674,14 +644,13 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_i32_load: @@ -699,58 +668,42 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind { ; RV64I-LABEL: ctpop_v2i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw s3, a2, 1365 -; RV64I-NEXT: and a1, a1, s3 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw s4, a1, 819 -; RV64I-NEXT: and a1, a0, s4 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a4, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, s4 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw s5, a1, -241 -; RV64I-NEXT: and a0, a0, s5 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw s1, a1, 257 -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw s2, a0, 24 -; RV64I-NEXT: srli a0, s0, 1 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: sub s0, s0, a0 -; RV64I-NEXT: and a0, s0, s4 -; RV64I-NEXT: srli s0, s0, 2 -; RV64I-NEXT: and a1, s0, s4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, s5 -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a1, a0, 24 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a4, a0 +; RV64I-NEXT: srli a4, a0, 4 +; RV64I-NEXT: add a0, a0, a4 +; RV64I-NEXT: lui a4, 61681 +; RV64I-NEXT: addi a4, a4, -241 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: slli a5, a0, 8 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: slli a5, a0, 16 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: srli a5, a1, 1 +; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: sub a1, a1, a3 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 24 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_v2i32: @@ -875,8 +828,6 @@ declare i64 @llvm.ctpop.i64(i64) define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I-LABEL: ctpop_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -899,14 +850,13 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_i64: @@ -998,66 +948,52 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV64I-LABEL: ctpop_v2i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a3, a2, 32 -; RV64I-NEXT: add s3, a2, a3 -; RV64I-NEXT: and a1, a1, s3 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add s4, a1, a2 -; RV64I-NEXT: and a1, a0, s4 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a4, a2, 32 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: and a4, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, s4 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a4, a0 +; RV64I-NEXT: srli a4, a0, 4 +; RV64I-NEXT: add a0, a0, a4 +; RV64I-NEXT: lui a4, 61681 +; RV64I-NEXT: addiw a4, a4, -241 +; RV64I-NEXT: slli a5, a4, 32 +; RV64I-NEXT: add a4, a4, a5 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: slli a5, a0, 8 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: slli a5, a0, 16 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: slli a5, a0, 32 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: srli a5, a1, 1 +; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: sub a1, a1, a3 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add s5, a1, a2 -; RV64I-NEXT: and a0, a0, s5 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw s1, a1, 257 -; RV64I-NEXT: slli a1, s1, 32 -; RV64I-NEXT: add s1, s1, a1 -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srli s2, a0, 56 -; RV64I-NEXT: srli a0, s0, 1 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: sub s0, s0, a0 -; RV64I-NEXT: and a0, s0, s4 -; RV64I-NEXT: srli s0, s0, 2 -; RV64I-NEXT: and a1, s0, s4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, s5 -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srli a1, a1, 56 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_v2i64: diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index f707cb31e3ece..2b1846199d2a0 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -169,12 +169,11 @@ define signext i32 @test4(ptr %p, i32 signext %b) nounwind { define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-LABEL: test5: ; RV64I: # %bb.0: # %bb -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: sraw a0, a0, a1 ; RV64I-NEXT: lui a1, 349525 ; RV64I-NEXT: addiw s0, a1, 1365 @@ -182,8 +181,6 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-NEXT: addiw s1, a1, 819 ; RV64I-NEXT: lui a1, 61681 ; RV64I-NEXT: addi s2, a1, -241 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addi s3, a1, 257 ; RV64I-NEXT: .LBB4_1: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call bar @@ -198,16 +195,18 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: and a0, a0, s2 -; RV64I-NEXT: mul a0, a0, s3 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: bnez a1, .LBB4_1 ; RV64I-NEXT: # %bb.2: # %bb7 -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: test5: From 02baa6c3df759e4166b0ca8aa312133340684c84 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Tue, 26 Mar 2024 16:23:20 +0800 Subject: [PATCH 2/7] Add GISel support --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 15 ++++-- .../legalize-ctpop-no-implicit-float.mir | 4 +- .../Mips/GlobalISel/legalizer/ctpop.mir | 4 +- .../legalizer/legalize-ctlz-rv32.mir | 50 +++++++++---------- .../legalizer/legalize-ctlz-rv64.mir | 38 +++++++------- .../legalizer/legalize-ctpop-rv32.mir | 21 ++++---- .../legalizer/legalize-ctpop-rv64.mir | 15 +++--- .../legalizer/legalize-cttz-rv32.mir | 42 ++++++++-------- .../legalizer/legalize-cttz-rv64.mir | 30 ++++++----- 9 files changed, 110 insertions(+), 109 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c3a23ea0ad373..baf36792f8e42 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -6362,12 +6362,21 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks. auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01))); - auto ResTmp = B.buildMul(Ty, B8Count, MulMask); // Shift count result from 8 high bits to low bits. auto C_SizeM8 = B.buildConstant(Ty, Size - 8); - B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); - + if (isSupported({TargetOpcode::G_MUL, {Ty, Ty}})) { + auto ResTmp = B.buildMul(Ty, B8Count, MulMask); + B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + } else { + auto ResTmp = B8Count; + for (unsigned Shift = 8; Shift < Size; Shift *= 2) { + auto ShiftC = B.buildConstant(Ty, Shift); + auto Shl = B.buildShl(Ty, ResTmp, ShiftC); + ResTmp = B.buildAdd(Ty, ResTmp, Shl); + } + B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + } MI.eraseFromParent(); return Legalized; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir index d2352be81503d..27f2f0bafa95a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir @@ -37,6 +37,7 @@ body: | ; CHECK-NEXT: %ctpop:_(s32) = G_LSHR [[MUL]], [[C7]](s64) ; CHECK-NEXT: $w0 = COPY %ctpop(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s32 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -77,11 +78,12 @@ body: | ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C5]] ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] ; CHECK-NEXT: %ctpop:_(s64) = G_LSHR [[MUL]], [[C7]](s64) ; CHECK-NEXT: $x0 = COPY %ctpop(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; ; CHECK-CSSC-LABEL: name: s64 ; CHECK-CSSC: liveins: $x0 ; CHECK-CSSC-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir index 4c0b3c6177219..f518e9ec9e589 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir @@ -29,8 +29,8 @@ body: | ; MIPS32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] ; MIPS32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; MIPS32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; MIPS32-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32) ; MIPS32-NEXT: $v0 = COPY [[LSHR3]](s32) ; MIPS32-NEXT: RetRA implicit $v0 @@ -70,8 +70,8 @@ body: | ; MIPS32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] ; MIPS32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; MIPS32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; MIPS32-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32) ; MIPS32-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir index a890a411544e7..00ace42f11509 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir @@ -49,12 +49,10 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] - ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] - ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s32) - ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] + ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C14]](s32) + ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C15]], [[LSHR6]] ; RV32I-NEXT: $x10 = COPY [[SUB1]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -128,12 +126,13 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] - ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] + ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C17]](s32) + ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[SHL]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C18]] - ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C17]](s32) + ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C18]] + ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C16]](s32) ; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C19]], [[LSHR7]] ; RV32I-NEXT: $x10 = COPY [[SUB1]](s32) @@ -201,8 +200,8 @@ body: | ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C10]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]] ; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]] ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C12]](s32) ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[LSHR8]] @@ -267,8 +266,8 @@ body: | ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C11]] ; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]] ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]] ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C13]](s32) ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C14]], [[LSHR8]] @@ -306,8 +305,8 @@ body: | ; RV32I-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD4]], [[C26]] ; RV32I-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]] ; RV32I-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]] ; RV32I-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C28]](s32) ; RV32I-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C29]], [[LSHR17]] @@ -388,12 +387,10 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] - ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] - ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s32) - ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] + ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C14]](s32) + ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C15]], [[LSHR6]] ; RV32I-NEXT: $x10 = COPY [[SUB1]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -467,12 +464,13 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] - ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] + ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C17]](s32) + ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[SHL]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C18]] - ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C17]](s32) + ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C18]] + ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C16]](s32) ; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C19]], [[LSHR7]] ; RV32I-NEXT: $x10 = COPY [[SUB1]](s32) @@ -540,8 +538,8 @@ body: | ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C10]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]] ; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]] ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C12]](s32) ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[LSHR8]] @@ -606,8 +604,8 @@ body: | ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C11]] ; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]] ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]] ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C13]](s32) ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C14]], [[LSHR8]] @@ -645,8 +643,8 @@ body: | ; RV32I-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD4]], [[C26]] ; RV32I-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]] ; RV32I-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]] ; RV32I-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C28]](s32) ; RV32I-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C29]], [[LSHR17]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir index add8a565202df..24a069f98c56a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir @@ -51,12 +51,10 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] ; RV64I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] - ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] - ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s64) - ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] + ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C14]](s64) + ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C15]], [[LSHR6]] ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SUB1]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -135,10 +133,11 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] - ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] + ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C16]](s64) + ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[SHL]] ; RV64I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C17]] + ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C17]] ; RV64I-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C18]](s64) ; RV64I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -283,8 +282,8 @@ body: | ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C11]] ; RV64I-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]] ; RV64I-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]] ; RV64I-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C13]](s64) ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C14]], [[LSHR9]] @@ -351,12 +350,10 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] ; RV64I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] - ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] - ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s64) - ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] + ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C14]](s64) + ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C15]], [[LSHR6]] ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SUB1]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -435,10 +432,11 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] - ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] + ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C16]](s64) + ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[SHL]] ; RV64I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C17]] + ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C17]] ; RV64I-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C18]](s64) ; RV64I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -583,8 +581,8 @@ body: | ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C11]] ; RV64I-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]] ; RV64I-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]] ; RV64I-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C13]](s64) ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C14]], [[LSHR9]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir index d4eb5ebc2e294..09b6763fa6feb 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir @@ -34,10 +34,8 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] - ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32) + ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C8]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -89,12 +87,13 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] - ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] + ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) + ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[SHL]] ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C10]] - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s32) + ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C10]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C8]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -143,8 +142,8 @@ body: | ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -190,8 +189,8 @@ body: | ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32) ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; RV32I-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C8]](s32) @@ -210,8 +209,8 @@ body: | ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C13]] ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C14]] ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C14]] ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C15]](s32) ; RV32I-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[LSHR3]] ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir index e2434ba9301c0..dbc0fe16eaa08 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir @@ -36,10 +36,8 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] - ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s64) + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C8]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -96,10 +94,11 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] - ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s64) + ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[SHL]] ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C9]] + ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C9]] ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C10]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) @@ -205,8 +204,8 @@ body: | ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C5]] ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C7]](s64) ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir index 19555a702b73c..e1381729be7aa 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir @@ -38,10 +38,8 @@ body: | ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] - ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s32) + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -97,12 +95,13 @@ body: | ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C10]](s32) + ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[SHL]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C11]] - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C10]](s32) + ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C11]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C9]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -155,8 +154,8 @@ body: | ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C6]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C8]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -208,8 +207,8 @@ body: | ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32) ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[C10]] @@ -234,8 +233,8 @@ body: | ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C17]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]] ; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]] ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C19]](s32) ; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD3]], [[LSHR7]] ; RV32I-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -303,10 +302,8 @@ body: | ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] - ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s32) + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -362,12 +359,13 @@ body: | ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C10]](s32) + ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[SHL]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C11]] - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C10]](s32) + ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C11]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C9]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -420,8 +418,8 @@ body: | ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C6]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C8]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -473,8 +471,8 @@ body: | ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32) ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[C10]] @@ -499,8 +497,8 @@ body: | ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C17]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]] ; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]] ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C19]](s32) ; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD3]], [[LSHR7]] ; RV32I-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir index e030e3ce2a803..518891429a954 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir @@ -40,10 +40,8 @@ body: | ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] - ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s64) + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -104,10 +102,11 @@ body: | ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C9]](s64) + ; RV64I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[SHL]] ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C10]] + ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C10]] ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C11]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) @@ -221,8 +220,8 @@ body: | ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ADD2]], [[C6]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]] ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64) ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -276,10 +275,8 @@ body: | ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] - ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s64) + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -340,10 +337,11 @@ body: | ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C9]](s64) + ; RV64I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[SHL]] ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C10]] + ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C10]] ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C11]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) @@ -457,8 +455,8 @@ body: | ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ADD2]], [[C6]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]] ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64) ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 From d041aeab23fb7aa7ce2a583480d63a3f353647c3 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 28 Mar 2024 20:34:53 +0800 Subject: [PATCH 3/7] Use getTypeToTransformTo --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 6 +- .../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 9 +- llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll | 300 ++++++++++++------ llvm/test/CodeGen/RISCV/sextw-removal.ll | 29 +- 4 files changed, 223 insertions(+), 121 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b92f790604403..2cf2a388ac4da 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8711,7 +8711,8 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { // v = (v * 0x01010101...) >> (Len - 8) SDValue V; - if (isOperationLegalOrCustomOrPromote(ISD::MUL, VT)) { + if (isOperationLegalOrCustomOrPromote( + ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) { SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01); @@ -8776,7 +8777,8 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { // v = (v * 0x01010101...) >> (Len - 8) SDValue V; - if (isOperationLegalOrCustomOrPromote(ISD::VP_MUL, VT)) { + if (isOperationLegalOrCustomOrPromote( + ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) { SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL); diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll index 3a6cf4c4e0ed2..9fa3f5076bb22 100644 --- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -285,11 +285,10 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; LA64-NEXT: lu12i.w $a1, 61680 ; LA64-NEXT: ori $a1, $a1, 3855 ; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: slli.d $a1, $a0, 8 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: slli.d $a1, $a0, 16 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: bstrpick.d $a0, $a0, 29, 24 +; LA64-NEXT: lu12i.w $a1, 4112 +; LA64-NEXT: ori $a1, $a1, 257 +; LA64-NEXT: mul.d $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24 ; LA64-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index a9b7e7e06d519..549d531e829ea 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -1197,47 +1197,47 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: ret ; -; RV64NOZBB-LABEL: test_ctlz_i32: -; RV64NOZBB: # %bb.0: -; RV64NOZBB-NEXT: sext.w a1, a0 -; RV64NOZBB-NEXT: beqz a1, .LBB10_2 -; RV64NOZBB-NEXT: # %bb.1: # %cond.false -; RV64NOZBB-NEXT: srliw a1, a0, 1 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: srliw a1, a0, 2 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: srliw a1, a0, 4 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: srliw a1, a0, 8 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: srliw a1, a0, 16 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: not a0, a0 -; RV64NOZBB-NEXT: srli a1, a0, 1 -; RV64NOZBB-NEXT: lui a2, 349525 -; RV64NOZBB-NEXT: addiw a2, a2, 1365 -; RV64NOZBB-NEXT: and a1, a1, a2 -; RV64NOZBB-NEXT: sub a0, a0, a1 -; RV64NOZBB-NEXT: lui a1, 209715 -; RV64NOZBB-NEXT: addiw a1, a1, 819 -; RV64NOZBB-NEXT: and a2, a0, a1 -; RV64NOZBB-NEXT: srli a0, a0, 2 -; RV64NOZBB-NEXT: and a0, a0, a1 -; RV64NOZBB-NEXT: add a0, a2, a0 -; RV64NOZBB-NEXT: srli a1, a0, 4 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: lui a1, 61681 -; RV64NOZBB-NEXT: addi a1, a1, -241 -; RV64NOZBB-NEXT: and a0, a0, a1 -; RV64NOZBB-NEXT: slli a1, a0, 8 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: slli a1, a0, 16 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: srliw a0, a0, 24 -; RV64NOZBB-NEXT: ret -; RV64NOZBB-NEXT: .LBB10_2: -; RV64NOZBB-NEXT: li a0, 32 -; RV64NOZBB-NEXT: ret +; RV64I-LABEL: test_ctlz_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: beqz a1, .LBB10_2 +; RV64I-NEXT: # %bb.1: # %cond.false +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB10_2: +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctlz_i32: ; RV32M: # %bb.0: @@ -1279,6 +1279,47 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32M-NEXT: li a0, 32 ; RV32M-NEXT: ret ; +; RV64M-LABEL: test_ctlz_i32: +; RV64M: # %bb.0: +; RV64M-NEXT: sext.w a1, a0 +; RV64M-NEXT: beqz a1, .LBB10_2 +; RV64M-NEXT: # %bb.1: # %cond.false +; RV64M-NEXT: srliw a1, a0, 1 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: srliw a1, a0, 2 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: srliw a1, a0, 4 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: srliw a1, a0, 8 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: srliw a1, a0, 16 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: not a0, a0 +; RV64M-NEXT: srli a1, a0, 1 +; RV64M-NEXT: lui a2, 349525 +; RV64M-NEXT: addiw a2, a2, 1365 +; RV64M-NEXT: and a1, a1, a2 +; RV64M-NEXT: sub a0, a0, a1 +; RV64M-NEXT: lui a1, 209715 +; RV64M-NEXT: addiw a1, a1, 819 +; RV64M-NEXT: and a2, a0, a1 +; RV64M-NEXT: srli a0, a0, 2 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: add a0, a2, a0 +; RV64M-NEXT: srli a1, a0, 4 +; RV64M-NEXT: add a0, a0, a1 +; RV64M-NEXT: lui a1, 61681 +; RV64M-NEXT: addi a1, a1, -241 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: lui a1, 4112 +; RV64M-NEXT: addi a1, a1, 257 +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: srliw a0, a0, 24 +; RV64M-NEXT: ret +; RV64M-NEXT: .LBB10_2: +; RV64M-NEXT: li a0, 32 +; RV64M-NEXT: ret +; ; RV32ZBB-LABEL: test_ctlz_i32: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: clz a0, a0 @@ -1796,41 +1837,41 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: ret ; -; RV64NOZBB-LABEL: test_ctlz_i32_zero_undef: -; RV64NOZBB: # %bb.0: -; RV64NOZBB-NEXT: srliw a1, a0, 1 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: srliw a1, a0, 2 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: srliw a1, a0, 4 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: srliw a1, a0, 8 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: srliw a1, a0, 16 -; RV64NOZBB-NEXT: or a0, a0, a1 -; RV64NOZBB-NEXT: not a0, a0 -; RV64NOZBB-NEXT: srli a1, a0, 1 -; RV64NOZBB-NEXT: lui a2, 349525 -; RV64NOZBB-NEXT: addiw a2, a2, 1365 -; RV64NOZBB-NEXT: and a1, a1, a2 -; RV64NOZBB-NEXT: sub a0, a0, a1 -; RV64NOZBB-NEXT: lui a1, 209715 -; RV64NOZBB-NEXT: addiw a1, a1, 819 -; RV64NOZBB-NEXT: and a2, a0, a1 -; RV64NOZBB-NEXT: srli a0, a0, 2 -; RV64NOZBB-NEXT: and a0, a0, a1 -; RV64NOZBB-NEXT: add a0, a2, a0 -; RV64NOZBB-NEXT: srli a1, a0, 4 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: lui a1, 61681 -; RV64NOZBB-NEXT: addi a1, a1, -241 -; RV64NOZBB-NEXT: and a0, a0, a1 -; RV64NOZBB-NEXT: slli a1, a0, 8 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: slli a1, a0, 16 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: srliw a0, a0, 24 -; RV64NOZBB-NEXT: ret +; RV64I-LABEL: test_ctlz_i32_zero_undef: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 4 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctlz_i32_zero_undef: ; RV32M: # %bb.0: @@ -1867,6 +1908,41 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV32M-NEXT: srli a0, a0, 24 ; RV32M-NEXT: ret ; +; RV64M-LABEL: test_ctlz_i32_zero_undef: +; RV64M: # %bb.0: +; RV64M-NEXT: srliw a1, a0, 1 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: srliw a1, a0, 2 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: srliw a1, a0, 4 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: srliw a1, a0, 8 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: srliw a1, a0, 16 +; RV64M-NEXT: or a0, a0, a1 +; RV64M-NEXT: not a0, a0 +; RV64M-NEXT: srli a1, a0, 1 +; RV64M-NEXT: lui a2, 349525 +; RV64M-NEXT: addiw a2, a2, 1365 +; RV64M-NEXT: and a1, a1, a2 +; RV64M-NEXT: sub a0, a0, a1 +; RV64M-NEXT: lui a1, 209715 +; RV64M-NEXT: addiw a1, a1, 819 +; RV64M-NEXT: and a2, a0, a1 +; RV64M-NEXT: srli a0, a0, 2 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: add a0, a2, a0 +; RV64M-NEXT: srli a1, a0, 4 +; RV64M-NEXT: add a0, a0, a1 +; RV64M-NEXT: lui a1, 61681 +; RV64M-NEXT: addi a1, a1, -241 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: lui a1, 4112 +; RV64M-NEXT: addi a1, a1, 257 +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: srliw a0, a0, 24 +; RV64M-NEXT: ret +; ; RV32ZBB-LABEL: test_ctlz_i32_zero_undef: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: clz a0, a0 @@ -2355,30 +2431,30 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: ret ; -; RV64NOZBB-LABEL: test_ctpop_i32: -; RV64NOZBB: # %bb.0: -; RV64NOZBB-NEXT: srli a1, a0, 1 -; RV64NOZBB-NEXT: lui a2, 349525 -; RV64NOZBB-NEXT: addiw a2, a2, 1365 -; RV64NOZBB-NEXT: and a1, a1, a2 -; RV64NOZBB-NEXT: sub a0, a0, a1 -; RV64NOZBB-NEXT: lui a1, 209715 -; RV64NOZBB-NEXT: addiw a1, a1, 819 -; RV64NOZBB-NEXT: and a2, a0, a1 -; RV64NOZBB-NEXT: srli a0, a0, 2 -; RV64NOZBB-NEXT: and a0, a0, a1 -; RV64NOZBB-NEXT: add a0, a2, a0 -; RV64NOZBB-NEXT: srli a1, a0, 4 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: lui a1, 61681 -; RV64NOZBB-NEXT: addi a1, a1, -241 -; RV64NOZBB-NEXT: and a0, a0, a1 -; RV64NOZBB-NEXT: slli a1, a0, 8 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: slli a1, a0, 16 -; RV64NOZBB-NEXT: add a0, a0, a1 -; RV64NOZBB-NEXT: srliw a0, a0, 24 -; RV64NOZBB-NEXT: ret +; RV64I-LABEL: test_ctpop_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctpop_i32: ; RV32M: # %bb.0: @@ -2404,6 +2480,30 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32M-NEXT: srli a0, a0, 24 ; RV32M-NEXT: ret ; +; RV64M-LABEL: test_ctpop_i32: +; RV64M: # %bb.0: +; RV64M-NEXT: srli a1, a0, 1 +; RV64M-NEXT: lui a2, 349525 +; RV64M-NEXT: addiw a2, a2, 1365 +; RV64M-NEXT: and a1, a1, a2 +; RV64M-NEXT: sub a0, a0, a1 +; RV64M-NEXT: lui a1, 209715 +; RV64M-NEXT: addiw a1, a1, 819 +; RV64M-NEXT: and a2, a0, a1 +; RV64M-NEXT: srli a0, a0, 2 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: add a0, a2, a0 +; RV64M-NEXT: srli a1, a0, 4 +; RV64M-NEXT: add a0, a0, a1 +; RV64M-NEXT: lui a1, 61681 +; RV64M-NEXT: addi a1, a1, -241 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: lui a1, 4112 +; RV64M-NEXT: addi a1, a1, 257 +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: srliw a0, a0, 24 +; RV64M-NEXT: ret +; ; RV32ZBB-LABEL: test_ctpop_i32: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: cpop a0, a0 diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index 2b1846199d2a0..f707cb31e3ece 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -169,11 +169,12 @@ define signext i32 @test4(ptr %p, i32 signext %b) nounwind { define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-LABEL: test5: ; RV64I: # %bb.0: # %bb -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sraw a0, a0, a1 ; RV64I-NEXT: lui a1, 349525 ; RV64I-NEXT: addiw s0, a1, 1365 @@ -181,6 +182,8 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-NEXT: addiw s1, a1, 819 ; RV64I-NEXT: lui a1, 61681 ; RV64I-NEXT: addi s2, a1, -241 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addi s3, a1, 257 ; RV64I-NEXT: .LBB4_1: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call bar @@ -195,18 +198,16 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: and a0, a0, s2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: add a0, a0, a2 -; RV64I-NEXT: slli a2, a0, 16 -; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: mul a0, a0, s3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: bnez a1, .LBB4_1 ; RV64I-NEXT: # %bb.2: # %bb7 -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: test5: From e8823684e2c387c171dedae7145006011e612cb5 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Thu, 28 Mar 2024 21:10:09 +0800 Subject: [PATCH 4/7] Add custom IsMulSupported --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 7 +++- .../legalizer/legalize-ctlz-rv32.mir | 38 ++++++++++--------- .../legalizer/legalize-ctlz-rv64.mir | 34 +++++++++-------- .../legalizer/legalize-ctpop-rv32.mir | 15 ++++---- .../legalizer/legalize-ctpop-rv64.mir | 13 ++++--- .../legalizer/legalize-cttz-rv32.mir | 30 ++++++++------- .../legalizer/legalize-cttz-rv64.mir | 26 +++++++------ 7 files changed, 89 insertions(+), 74 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index baf36792f8e42..51592730c5f0e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -6365,7 +6365,12 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // Shift count result from 8 high bits to low bits. auto C_SizeM8 = B.buildConstant(Ty, Size - 8); - if (isSupported({TargetOpcode::G_MUL, {Ty, Ty}})) { + + auto IsMulSupported = [this](const LLT Ty) { + auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty, Ty}}).Action; + return Action == Legal || Action == WidenScalar || Action == Custom; + }; + if (IsMulSupported(Ty)) { auto ResTmp = B.buildMul(Ty, B8Count, MulMask); B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); } else { diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir index 00ace42f11509..354fc109a4638 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir @@ -49,10 +49,12 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] - ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C14]](s32) - ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C15]], [[LSHR6]] + ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] + ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s32) + ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] ; RV32I-NEXT: $x10 = COPY [[SUB1]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -126,13 +128,12 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] - ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C17]](s32) - ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[SHL]] + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C18]] - ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C16]](s32) + ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C18]] + ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C17]](s32) ; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C19]], [[LSHR7]] ; RV32I-NEXT: $x10 = COPY [[SUB1]](s32) @@ -387,10 +388,12 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] - ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C14]](s32) - ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C15]], [[LSHR6]] + ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] + ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s32) + ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] ; RV32I-NEXT: $x10 = COPY [[SUB1]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -464,13 +467,12 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] - ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C17]](s32) - ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[SHL]] + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C18]] - ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C16]](s32) + ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C18]] + ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C17]](s32) ; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C19]], [[LSHR7]] ; RV32I-NEXT: $x10 = COPY [[SUB1]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir index 24a069f98c56a..38a4b9c6dae38 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir @@ -51,10 +51,12 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] ; RV64I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] - ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C14]](s64) - ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C15]], [[LSHR6]] + ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] + ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s64) + ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SUB1]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -133,11 +135,10 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] - ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C16]](s64) - ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[SHL]] + ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] ; RV64I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C17]] + ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C17]] ; RV64I-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C18]](s64) ; RV64I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -350,10 +351,12 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] ; RV64I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] - ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[C14]](s64) - ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C15]], [[LSHR6]] + ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] + ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s64) + ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SUB1]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -432,11 +435,10 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] ; RV64I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] - ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C16]](s64) - ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND9]], [[SHL]] + ; RV64I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] ; RV64I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C17]] + ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C17]] ; RV64I-NEXT: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C18]](s64) ; RV64I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir index 09b6763fa6feb..c64669cb7341e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir @@ -34,8 +34,10 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] - ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C8]](s32) + ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -87,13 +89,12 @@ body: | ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] - ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) - ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[SHL]] + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C10]] - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C8]](s32) + ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C10]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir index dbc0fe16eaa08..196b367e59271 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir @@ -36,8 +36,10 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] - ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C8]](s64) + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -94,11 +96,10 @@ body: | ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] - ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s64) - ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[SHL]] + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C9]] + ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C9]] ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C10]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir index e1381729be7aa..372becaf08d94 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir @@ -38,8 +38,10 @@ body: | ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s32) + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -95,13 +97,12 @@ body: | ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C10]](s32) - ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[SHL]] + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C11]] - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C9]](s32) + ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C11]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C10]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -302,8 +303,10 @@ body: | ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s32) + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; @@ -359,13 +362,12 @@ body: | ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C10]](s32) - ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[SHL]] + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C11]] - ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C9]](s32) + ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C11]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C10]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 ; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir index 518891429a954..e51a2143efd02 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir @@ -40,8 +40,10 @@ body: | ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s64) + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] + ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -102,11 +104,10 @@ body: | ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C9]](s64) - ; RV64I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[SHL]] + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C10]] + ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C10]] ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C11]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) @@ -275,8 +276,10 @@ body: | ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s64) + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] + ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) ; RV64I-NEXT: $x10 = COPY [[ANYEXT]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -337,11 +340,10 @@ body: | ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD1]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] - ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C9]](s64) - ; RV64I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[SHL]] + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C10]] + ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C10]] ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C11]](s64) ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR3]](s32) From 66e4b9fb2bfd71dbfb8dc207c29f2390153a842a Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Fri, 29 Mar 2024 10:58:54 +0800 Subject: [PATCH 5/7] Use one type index --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 51592730c5f0e..9cc69747a7626 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -6367,7 +6367,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { auto C_SizeM8 = B.buildConstant(Ty, Size - 8); auto IsMulSupported = [this](const LLT Ty) { - auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty, Ty}}).Action; + auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action; return Action == Legal || Action == WidenScalar || Action == Custom; }; if (IsMulSupported(Ty)) { From 1bd62e4c71cf48898f39a7e4729b5ff5e72b2625 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Fri, 29 Mar 2024 14:18:47 +0800 Subject: [PATCH 6/7] Use an immediate variable of shift count --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2cf2a388ac4da..7a5588effcf08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -8718,10 +8719,11 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01); } else { V = Op; - for (unsigned Shift = 8; Shift < Len; Shift *= 2) + for (unsigned Shift = 8; Shift < Len; Shift *= 2) { + SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl); V = DAG.getNode(ISD::ADD, dl, VT, V, - DAG.getNode(ISD::SHL, dl, VT, V, - DAG.getShiftAmountConstant(Shift, VT, dl))); + DAG.getNode(ISD::SHL, dl, VT, V, ShiftC)); + } } return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT)); } @@ -8784,12 +8786,12 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL); } else { V = Op; - for (unsigned Shift = 8; Shift < Len; Shift *= 2) + for (unsigned Shift = 8; Shift < Len; Shift *= 2) { + SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl); V = DAG.getNode(ISD::VP_ADD, dl, VT, V, - DAG.getNode(ISD::VP_SHL, dl, VT, V, - DAG.getShiftAmountConstant(Shift, VT, dl), - Mask, VL), + DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL), Mask, VL); + } } return DAG.getNode(ISD::VP_LSHR, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT), Mask, VL); From 5087f7a8cf028858bd623618385152cae00f8076 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Fri, 29 Mar 2024 14:22:11 +0800 Subject: [PATCH 7/7] Clean includes --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 7a5588effcf08..962f0d98e3be9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h"