diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 6ec98e2789884..7a461e86820aa 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -72,14 +72,14 @@ HANDLE_LIBCALL(UREM_I64, "__umoddi3") HANDLE_LIBCALL(UREM_I128, "__umodti3") HANDLE_LIBCALL(SDIVREM_I8, nullptr) HANDLE_LIBCALL(SDIVREM_I16, nullptr) -HANDLE_LIBCALL(SDIVREM_I32, nullptr) -HANDLE_LIBCALL(SDIVREM_I64, nullptr) -HANDLE_LIBCALL(SDIVREM_I128, nullptr) +HANDLE_LIBCALL(SDIVREM_I32, "__divmodsi4") +HANDLE_LIBCALL(SDIVREM_I64, "__divmoddi4") +HANDLE_LIBCALL(SDIVREM_I128, "__divmodti4") HANDLE_LIBCALL(UDIVREM_I8, nullptr) HANDLE_LIBCALL(UDIVREM_I16, nullptr) -HANDLE_LIBCALL(UDIVREM_I32, nullptr) -HANDLE_LIBCALL(UDIVREM_I64, nullptr) -HANDLE_LIBCALL(UDIVREM_I128, nullptr) +HANDLE_LIBCALL(UDIVREM_I32, "__udivmodsi4") +HANDLE_LIBCALL(UDIVREM_I64, "__udivmoddi4") +HANDLE_LIBCALL(UDIVREM_I128, "__udivmodti4") HANDLE_LIBCALL(NEG_I32, "__negsi2") HANDLE_LIBCALL(NEG_I64, "__negdi2") HANDLE_LIBCALL(CTLZ_I32, "__clzsi2") diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll index 24e740fd143d1..a933f7a45eb9c 100644 --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -213,19 +213,12 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __divsi3@plt -; RV32I-NEXT: add a0, s1, a0 +; RV32I-NEXT: addi a2, sp, 8 +; RV32I-NEXT: call __divmodsi4@plt +; RV32I-NEXT: lw a1, 8(sp) +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -246,23 +239,16 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind { ; ; RV64I-LABEL: combine_srem_sdiv: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sext.w s0, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __divdi3@plt -; RV64I-NEXT: addw a0, s1, a0 -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: mv a2, sp +; RV64I-NEXT: call __divmoddi4@plt +; RV64I-NEXT: ld a1, 0(sp) +; RV64I-NEXT: addw a0, a1, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: combine_srem_sdiv: diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index b5f1efa4b160b..08c167a80255c 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -382,64 +382,48 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s1, 0(a1) -; RV32I-NEXT: lh s2, 4(a1) -; RV32I-NEXT: lh s3, 8(a1) -; RV32I-NEXT: lh s4, 12(a1) -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s7, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s8, a0 +; RV32I-NEXT: lh s0, 0(a1) +; RV32I-NEXT: lh s1, 4(a1) +; RV32I-NEXT: lh s2, 8(a1) +; RV32I-NEXT: lh a3, 12(a1) +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: call __divsi3@plt +; RV32I-NEXT: addi a2, sp, 12 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: call __divmodsi4@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: call __divsi3@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: addi a2, sp, 16 ; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __divsi3@plt +; RV32I-NEXT: call __divmodsi4@plt ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: addi a2, sp, 20 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __divsi3@plt -; RV32I-NEXT: add a0, s8, a0 -; RV32I-NEXT: add s2, s7, s2 -; RV32I-NEXT: add s3, s6, s3 -; RV32I-NEXT: add s4, s5, s4 -; RV32I-NEXT: sh s4, 6(s0) -; RV32I-NEXT: sh s3, 4(s0) -; RV32I-NEXT: sh s2, 2(s0) -; RV32I-NEXT: sh a0, 0(s0) +; RV32I-NEXT: call __divmodsi4@plt +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: addi a2, sp, 8 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __divmodsi4@plt +; RV32I-NEXT: lw a1, 8(sp) +; RV32I-NEXT: lw a2, 20(sp) +; RV32I-NEXT: lw a3, 16(sp) +; RV32I-NEXT: lw a4, 12(sp) +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a2, a2, s1 +; RV32I-NEXT: add a3, a3, s2 +; RV32I-NEXT: add a4, a4, s4 +; RV32I-NEXT: sh a4, 6(s3) +; RV32I-NEXT: sh a3, 4(s3) +; RV32I-NEXT: sh a2, 2(s3) +; RV32I-NEXT: sh a0, 0(s3) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; @@ -499,64 +483,48 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s1, 0(a1) -; RV64I-NEXT: lh s2, 8(a1) -; RV64I-NEXT: lh s3, 16(a1) -; RV64I-NEXT: lh s4, 24(a1) -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s5, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 -; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s6, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s7, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s8, a0 +; RV64I-NEXT: lh s0, 0(a1) +; RV64I-NEXT: lh s1, 8(a1) +; RV64I-NEXT: lh s2, 16(a1) +; RV64I-NEXT: lh a3, 24(a1) +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: call __divdi3@plt +; RV64I-NEXT: addi a2, sp, 8 +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: call __divmoddi4@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 -; RV64I-NEXT: call __divdi3@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: addi a2, sp, 16 ; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: call __divdi3@plt +; RV64I-NEXT: call __divmoddi4@plt ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: addi a2, sp, 24 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __divdi3@plt -; RV64I-NEXT: add a0, s8, a0 -; RV64I-NEXT: add s2, s7, s2 -; RV64I-NEXT: add s3, s6, s3 -; RV64I-NEXT: add s4, s5, s4 -; RV64I-NEXT: sh s4, 6(s0) -; RV64I-NEXT: sh s3, 4(s0) -; RV64I-NEXT: sh s2, 2(s0) -; RV64I-NEXT: sh a0, 0(s0) +; RV64I-NEXT: call __divmoddi4@plt +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: mv a2, sp +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __divmoddi4@plt +; RV64I-NEXT: ld a1, 0(sp) +; RV64I-NEXT: ld a2, 24(sp) +; RV64I-NEXT: ld a3, 16(sp) +; RV64I-NEXT: ld a4, 8(sp) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: add a2, a2, s1 +; RV64I-NEXT: add a3, a3, s2 +; RV64I-NEXT: add a4, a4, s4 +; RV64I-NEXT: sh a4, 6(s3) +; RV64I-NEXT: sh a3, 4(s3) +; RV64I-NEXT: sh a2, 2(s3) +; RV64I-NEXT: sh a0, 0(s3) ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll index 3d181c3a30d09..b7f2027449286 100644 --- a/llvm/test/CodeGen/RISCV/urem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -113,19 +113,12 @@ define i32 @combine_urem_udiv(i32 %x) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __udivsi3@plt -; RV32I-NEXT: add a0, s1, a0 +; RV32I-NEXT: addi a2, sp, 8 +; RV32I-NEXT: call __udivmodsi4@plt +; RV32I-NEXT: lw a1, 8(sp) +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -146,24 +139,17 @@ define i32 @combine_urem_udiv(i32 %x) nounwind { ; ; RV64I-LABEL: combine_urem_udiv: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli s0, a0, 32 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __udivdi3@plt -; RV64I-NEXT: add a0, s1, a0 -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: mv a2, sp +; RV64I-NEXT: call __udivmoddi4@plt +; RV64I-NEXT: ld a1, 0(sp) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: combine_urem_udiv: diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index a38ae17f19df3..dac99ae964140 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -323,64 +323,48 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s1, 0(a1) -; RV32I-NEXT: lhu s2, 4(a1) -; RV32I-NEXT: lhu s3, 8(a1) -; RV32I-NEXT: lhu s4, 12(a1) -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s7, a0 -; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s8, a0 +; RV32I-NEXT: lhu s0, 0(a1) +; RV32I-NEXT: lhu s1, 4(a1) +; RV32I-NEXT: lhu s2, 8(a1) +; RV32I-NEXT: lhu a3, 12(a1) +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: call __udivsi3@plt +; RV32I-NEXT: addi a2, sp, 12 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: call __udivmodsi4@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: call __udivsi3@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: addi a2, sp, 16 ; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: call __udivsi3@plt +; RV32I-NEXT: call __udivmodsi4@plt ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: addi a2, sp, 20 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __udivsi3@plt -; RV32I-NEXT: add a0, s8, a0 -; RV32I-NEXT: add s2, s7, s2 -; RV32I-NEXT: add s3, s6, s3 -; RV32I-NEXT: add s4, s5, s4 -; RV32I-NEXT: sh s4, 6(s0) -; RV32I-NEXT: sh s3, 4(s0) -; RV32I-NEXT: sh s2, 2(s0) -; RV32I-NEXT: sh a0, 0(s0) +; RV32I-NEXT: call __udivmodsi4@plt +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: addi a2, sp, 8 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __udivmodsi4@plt +; RV32I-NEXT: lw a1, 8(sp) +; RV32I-NEXT: lw a2, 20(sp) +; RV32I-NEXT: lw a3, 16(sp) +; RV32I-NEXT: lw a4, 12(sp) +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a2, a2, s1 +; RV32I-NEXT: add a3, a3, s2 +; RV32I-NEXT: add a4, a4, s4 +; RV32I-NEXT: sh a4, 6(s3) +; RV32I-NEXT: sh a3, 4(s3) +; RV32I-NEXT: sh a2, 2(s3) +; RV32I-NEXT: sh a0, 0(s3) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; @@ -424,64 +408,48 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s1, 0(a1) -; RV64I-NEXT: lhu s2, 8(a1) -; RV64I-NEXT: lhu s3, 16(a1) -; RV64I-NEXT: lhu s4, 24(a1) -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s5, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 -; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s6, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s7, a0 -; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s8, a0 +; RV64I-NEXT: lhu s0, 0(a1) +; RV64I-NEXT: lhu s1, 8(a1) +; RV64I-NEXT: lhu s2, 16(a1) +; RV64I-NEXT: lhu a3, 24(a1) +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s4 -; RV64I-NEXT: call __udivdi3@plt +; RV64I-NEXT: addi a2, sp, 8 +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: call __udivmoddi4@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 -; RV64I-NEXT: call __udivdi3@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: addi a2, sp, 16 ; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: call __udivdi3@plt +; RV64I-NEXT: call __udivmoddi4@plt ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: addi a2, sp, 24 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __udivdi3@plt -; RV64I-NEXT: add a0, s8, a0 -; RV64I-NEXT: add s2, s7, s2 -; RV64I-NEXT: add s3, s6, s3 -; RV64I-NEXT: add s4, s5, s4 -; RV64I-NEXT: sh s4, 6(s0) -; RV64I-NEXT: sh s3, 4(s0) -; RV64I-NEXT: sh s2, 2(s0) -; RV64I-NEXT: sh a0, 0(s0) +; RV64I-NEXT: call __udivmoddi4@plt +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: mv a2, sp +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __udivmoddi4@plt +; RV64I-NEXT: ld a1, 0(sp) +; RV64I-NEXT: ld a2, 24(sp) +; RV64I-NEXT: ld a3, 16(sp) +; RV64I-NEXT: ld a4, 8(sp) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: add a2, a2, s1 +; RV64I-NEXT: add a3, a3, s2 +; RV64I-NEXT: add a4, a4, s4 +; RV64I-NEXT: sh a4, 6(s3) +; RV64I-NEXT: sh a3, 4(s3) +; RV64I-NEXT: sh a2, 2(s3) +; RV64I-NEXT: sh a0, 0(s3) ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: ret ;