-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LoongArch] Use div.w/mod.w to eliminate unnecessary sign-extend for sdiv/srem i32. #117298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: None (tangaac) ChangesFull diff: https://github.com/llvm/llvm-project/pull/117298.diff 4 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5c567ed4a6f724..a202d9e9b5b50c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -141,7 +141,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
- setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
+ Custom);
setOperationAction(ISD::LROUND, MVT::i32, Custom);
}
@@ -2629,8 +2630,12 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("Unexpected opcode");
+ case ISD::SDIV:
+ return LoongArchISD::DIV_W;
case ISD::UDIV:
return LoongArchISD::DIV_WU;
+ case ISD::SREM:
+ return LoongArchISD::MOD_W;
case ISD::UREM:
return LoongArchISD::MOD_WU;
case ISD::SHL:
@@ -2827,7 +2832,9 @@ void LoongArchTargetLowering::ReplaceNodeResults(
"Unexpected custom legalisation");
Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
break;
+ case ISD::SDIV:
case ISD::UDIV:
+ case ISD::SREM:
case ISD::UREM:
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
@@ -4667,7 +4674,9 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BITREV_W)
NODE_NAME_CASE(ROTR_W)
NODE_NAME_CASE(ROTL_W)
+ NODE_NAME_CASE(DIV_W)
NODE_NAME_CASE(DIV_WU)
+ NODE_NAME_CASE(MOD_W)
NODE_NAME_CASE(MOD_WU)
NODE_NAME_CASE(CLZ_W)
NODE_NAME_CASE(CTZ_W)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 1aa686695b49b8..5a47dfb257175f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -44,6 +44,8 @@ enum NodeType : unsigned {
ROTR_W,
// unsigned 32-bit integer division
+ DIV_W,
+ MOD_W,
DIV_WU,
MOD_WU,
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index cd1500229f4aa9..b9d4f00717a565 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -85,7 +85,9 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
+def loongarch_div_w : SDNode<"LoongArchISD::DIV_W", SDT_LoongArchIntBinOpW>;
def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>;
+def loongarch_mod_w : SDNode<"LoongArchISD::MOD_W", SDT_LoongArchIntBinOpW>;
def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>;
def loongarch_crc_w_b_w
: SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
@@ -1156,10 +1158,12 @@ def : PatGprGpr<sub, SUB_D>;
def : PatGprGpr<sdiv, DIV_D>;
def : PatGprGpr_32<sdiv, DIV_W>;
def : PatGprGpr<udiv, DIV_DU>;
+def : PatGprGpr<loongarch_div_w, DIV_W>;
def : PatGprGpr<loongarch_div_wu, DIV_WU>;
def : PatGprGpr<srem, MOD_D>;
def : PatGprGpr_32<srem, MOD_W>;
def : PatGprGpr<urem, MOD_DU>;
+def : PatGprGpr<loongarch_mod_w, MOD_W>;
def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
def : PatGprGpr<shiftop<rotr>, ROTR_D>;
def : PatGprGpr<shiftopw<loongarch_rotr_w>, ROTR_W>;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index c5af79157eaadc..99824f6d7718e7 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -121,7 +121,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: addi.w $a0, $a0, 0
-; LA64-NEXT: div.d $a0, $a0, $a1
+; LA64-NEXT: div.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: sdiv_i32:
@@ -137,7 +137,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB3_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -156,7 +156,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-LABEL: sdiv_ui32_si32_si32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: div.d $a0, $a0, $a1
+; LA64-NEXT: div.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: sdiv_ui32_si32_si32:
@@ -170,7 +170,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-TRAP-LABEL: sdiv_ui32_si32_si32:
; LA64-TRAP: # %bb.0: # %entry
-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB4_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -693,7 +693,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: addi.w $a0, $a0, 0
-; LA64-NEXT: mod.d $a0, $a0, $a1
+; LA64-NEXT: mod.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: srem_i32:
@@ -709,7 +709,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB19_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -728,7 +728,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-LABEL: srem_ui32_si32_si32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: mod.d $a0, $a0, $a1
+; LA64-NEXT: mod.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: srem_ui32_si32_si32:
@@ -742,7 +742,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-TRAP-LABEL: srem_ui32_si32_si32:
; LA64-TRAP: # %bb.0: # %entry
-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB20_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -763,7 +763,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: addi.w $a0, $a0, 0
-; LA64-NEXT: mod.d $a0, $a0, $a1
+; LA64-NEXT: mod.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: srem_si32_ui32_ui32:
@@ -779,7 +779,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB21_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -798,7 +798,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-LABEL: srem_si32_si32_si32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: mod.d $a0, $a0, $a1
+; LA64-NEXT: mod.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: srem_si32_si32_si32:
@@ -812,7 +812,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-TRAP-LABEL: srem_si32_si32_si32:
; LA64-TRAP: # %bb.0: # %entry
-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB22_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. This is helpful for eliminating unnecessary sign extensions. There are no test cases for this because the OptW pass already performs similar optimizations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This pr needs rebase as #116764 is merged.
39b432c
to
50bc710
Compare
No description provided.