diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0e078f9dd88b4..a6b9cc81edde6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15740,8 +15740,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { // if the source is smaller than the dest, we still need an extend. - if (N0.getOperand(0).getValueType().bitsLT(VT)) - return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0)); + if (N0.getOperand(0).getValueType().bitsLT(VT)) { + SDNodeFlags Flags; + if (N0.getOpcode() == ISD::ZERO_EXTEND) + Flags.setNonNeg(N0->getFlags().hasNonNeg()); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags); + } // if the source is larger than the dest, than we just need the truncate. if (N0.getOperand(0).getValueType().bitsGT(VT)) return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b0e3f534e2aaa..5d8db8be9731f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6474,8 +6474,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. if (N1.getOperand(0).getValueType().getScalarType().bitsLT( - VT.getScalarType())) - return getNode(OpOpcode, DL, VT, N1.getOperand(0)); + VT.getScalarType())) { + SDNodeFlags Flags; + if (OpOpcode == ISD::ZERO_EXTEND) + Flags.setNonNeg(N1->getFlags().hasNonNeg()); + return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags); + } if (N1.getOperand(0).getValueType().bitsGT(VT)) return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0)); return N1.getOperand(0); diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll index 249dabba0cc28..32a037918a5a7 100644 --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -484,3 +484,298 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { %res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b) ret i128 %res } + +define i64 @lshr64_shamt32(i64 %a, i32 signext %b) nounwind { +; RV32I-LABEL: lshr64_shamt32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a4, a2, -32 +; RV32I-NEXT: srl a3, a1, a2 +; RV32I-NEXT: bltz a4, .LBB11_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: j .LBB11_3 +; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: .LBB11_3: +; RV32I-NEXT: srai a1, a4, 31 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lshr64_shamt32: +; RV64I: # %bb.0: +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: ret + %zext = zext nneg i32 %b to i64 + %1 = lshr i64 %a, %zext + ret i64 %1 +} + +define i64 @ashr64_shamt32(i64 %a, i32 signext %b) nounwind { +; RV32I-LABEL: ashr64_shamt32: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: addi a4, a2, -32 +; RV32I-NEXT: sra a1, a1, a2 +; RV32I-NEXT: bltz a4, .LBB12_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srai a3, a3, 31 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB12_2: +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: slli a3, a3, 1 +; RV32I-NEXT: sll a2, a3, a2 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ashr64_shamt32: +; RV64I: # %bb.0: +; RV64I-NEXT: sra a0, a0, a1 +; RV64I-NEXT: ret + %zext = zext nneg i32 %b to i64 + %1 = ashr i64 %a, %zext + ret i64 %1 +} + +define i64 @shl64_shamt32(i64 %a, i32 signext %b) nounwind { +; RV32I-LABEL: shl64_shamt32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a4, a2, -32 +; RV32I-NEXT: sll a3, a0, a2 +; RV32I-NEXT: bltz a4, .LBB13_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: j .LBB13_3 +; RV32I-NEXT: .LBB13_2: +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: .LBB13_3: +; RV32I-NEXT: srai a0, a4, 31 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: ret +; +; RV64I-LABEL: shl64_shamt32: +; RV64I: # %bb.0: +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: ret + %zext = zext nneg i32 %b to i64 + %1 = shl i64 %a, %zext + ret i64 %1 +} + +define i128 @lshr128_shamt32(i128 %a, i32 signext %b) nounwind { +; RV32I-LABEL: lshr128_shamt32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sw zero, 16(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 28(sp) +; RV32I-NEXT: srli a6, a2, 3 +; RV32I-NEXT: mv a7, sp +; RV32I-NEXT: andi t0, a2, 31 +; RV32I-NEXT: andi a6, a6, 12 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: add a6, a7, a6 +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) +; RV32I-NEXT: lw a1, 0(a6) +; RV32I-NEXT: lw a3, 4(a6) +; RV32I-NEXT: lw a4, 8(a6) +; RV32I-NEXT: lw a5, 12(a6) +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slli a6, a3, 1 +; RV32I-NEXT: srl a3, a3, a2 +; RV32I-NEXT: slli a7, a4, 1 +; RV32I-NEXT: srl a4, a4, a2 +; RV32I-NEXT: srl a2, a5, a2 +; RV32I-NEXT: slli a5, a5, 1 +; RV32I-NEXT: sll a6, a6, t0 +; RV32I-NEXT: sll a7, a7, t0 +; RV32I-NEXT: sll a5, a5, t0 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a3, a3, a7 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lshr128_shamt32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a4, a2, -64 +; RV64I-NEXT: srl a3, a1, a2 +; RV64I-NEXT: bltz a4, .LBB14_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: j .LBB14_3 +; RV64I-NEXT: .LBB14_2: +; RV64I-NEXT: srl a0, a0, a2 +; RV64I-NEXT: not a2, a2 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: sll a1, a1, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: .LBB14_3: +; RV64I-NEXT: srai a1, a4, 63 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: ret + %zext = zext nneg i32 %b to i128 + %1 = lshr i128 %a, %zext + ret i128 %1 +} + +define i128 @ashr128_shamt32(i128 %a, i32 signext %b) nounwind { +; RV32I-LABEL: ashr128_shamt32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: srli a6, a2, 3 +; RV32I-NEXT: mv a7, sp +; RV32I-NEXT: andi t0, a2, 31 +; RV32I-NEXT: andi a6, a6, 12 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: add a6, a7, a6 +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: sw a1, 16(sp) +; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: sw a1, 28(sp) +; RV32I-NEXT: lw a1, 0(a6) +; RV32I-NEXT: lw a3, 4(a6) +; RV32I-NEXT: lw a4, 8(a6) +; RV32I-NEXT: lw a5, 12(a6) +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slli a6, a3, 1 +; RV32I-NEXT: srl a3, a3, a2 +; RV32I-NEXT: slli a7, a4, 1 +; RV32I-NEXT: srl a4, a4, a2 +; RV32I-NEXT: sra a2, a5, a2 +; RV32I-NEXT: slli a5, a5, 1 +; RV32I-NEXT: sll a6, a6, t0 +; RV32I-NEXT: sll a7, a7, t0 +; RV32I-NEXT: sll a5, a5, t0 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a3, a3, a7 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ashr128_shamt32: +; RV64I: # %bb.0: +; RV64I-NEXT: mv a3, a1 +; RV64I-NEXT: addi a4, a2, -64 +; RV64I-NEXT: sra a1, a1, a2 +; RV64I-NEXT: bltz a4, .LBB15_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: srai a3, a3, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB15_2: +; RV64I-NEXT: srl a0, a0, a2 +; RV64I-NEXT: not a2, a2 +; RV64I-NEXT: slli a3, a3, 1 +; RV64I-NEXT: sll a2, a3, a2 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: ret + %zext = zext nneg i32 %b to i128 + %1 = ashr i128 %a, %zext + ret i128 %1 +} + +define i128 @shl128_shamt32(i128 %a, i32 signext %b) nounwind { +; RV32I-LABEL: shl128_shamt32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: srli a6, a2, 3 +; RV32I-NEXT: addi a7, sp, 16 +; RV32I-NEXT: andi t0, a2, 31 +; RV32I-NEXT: andi a6, a6, 12 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a1, 28(sp) +; RV32I-NEXT: lw a1, 0(a6) +; RV32I-NEXT: lw a3, 4(a6) +; RV32I-NEXT: lw a4, 8(a6) +; RV32I-NEXT: lw a5, 12(a6) +; RV32I-NEXT: xori a6, t0, 31 +; RV32I-NEXT: sll a7, a3, a2 +; RV32I-NEXT: srli t0, a1, 1 +; RV32I-NEXT: sll a5, a5, a2 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: sll a2, a4, a2 +; RV32I-NEXT: srli a3, a3, 1 +; RV32I-NEXT: srli a4, a4, 1 +; RV32I-NEXT: srl t0, t0, a6 +; RV32I-NEXT: srl a3, a3, a6 +; RV32I-NEXT: srl a4, a4, a6 +; RV32I-NEXT: or a6, a7, t0 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a6, 4(a0) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: shl128_shamt32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a4, a2, -64 +; RV64I-NEXT: sll a3, a0, a2 +; RV64I-NEXT: bltz a4, .LBB16_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: j .LBB16_3 +; RV64I-NEXT: .LBB16_2: +; RV64I-NEXT: sll a1, a1, a2 +; RV64I-NEXT: not a2, a2 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: srl a0, a0, a2 +; RV64I-NEXT: or a1, a1, a0 +; RV64I-NEXT: .LBB16_3: +; RV64I-NEXT: srai a0, a4, 63 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: ret + %zext = zext nneg i32 %b to i128 + %1 = shl i128 %a, %zext + ret i128 %1 +}