-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[GISel][RISCV] Use isSExtCheaperThanZExt when widening G_ICMP. #120032
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Sign extending i32->i64 is more efficient than zero extend for RV64.
@llvm/pr-subscribers-llvm-globalisel Author: Craig Topper (topperc) ChangesSign extending i32->i64 is more efficient than zero extend for RV64. Patch is 40.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120032.diff 16 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 2fc8ef6a52a528..6dc4c2d54196e4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3077,10 +3077,17 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
- unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
- MI.getOperand(1).getPredicate()))
- ? TargetOpcode::G_SEXT
- : TargetOpcode::G_ZEXT;
+ LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ unsigned ExtOpcode =
+ (CmpInst::isSigned(Pred) ||
+ TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
+ getApproximateEVTForLLT(WideTy, Ctx)))
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
widenScalarSrc(MI, WideTy, 2, ExtOpcode);
widenScalarSrc(MI, WideTy, 3, ExtOpcode);
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
index 66eb4372aefadb..cb2037f5fb0271 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll
@@ -215,8 +215,7 @@ define i32 @fneg_d(double %a, double %b) nounwind {
; RV64I-NEXT: slli a1, a1, 63
; RV64I-NEXT: xor a1, a0, a1
; RV64I-NEXT: call __eqdf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
index 81d3381449bc87..aeed219d99555d 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
@@ -117,25 +117,14 @@ define i32 @fcvt_wu_d(double %a) nounwind {
}
define i32 @fcvt_wu_d_multiple_use(double %x, ptr %y) nounwind {
-; RV32IFD-LABEL: fcvt_wu_d_multiple_use:
-; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; RV32IFD-NEXT: bnez a0, .LBB4_2
-; RV32IFD-NEXT: # %bb.1:
-; RV32IFD-NEXT: li a0, 1
-; RV32IFD-NEXT: .LBB4_2:
-; RV32IFD-NEXT: ret
-;
-; RV64IFD-LABEL: fcvt_wu_d_multiple_use:
-; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
-; RV64IFD-NEXT: slli a1, a0, 32
-; RV64IFD-NEXT: srli a1, a1, 32
-; RV64IFD-NEXT: bnez a1, .LBB4_2
-; RV64IFD-NEXT: # %bb.1:
-; RV64IFD-NEXT: li a0, 1
-; RV64IFD-NEXT: .LBB4_2:
-; RV64IFD-NEXT: ret
+; CHECKIFD-LABEL: fcvt_wu_d_multiple_use:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fcvt.wu.d a0, fa0, rtz
+; CHECKIFD-NEXT: bnez a0, .LBB4_2
+; CHECKIFD-NEXT: # %bb.1:
+; CHECKIFD-NEXT: li a0, 1
+; CHECKIFD-NEXT: .LBB4_2:
+; CHECKIFD-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_d_multiple_use:
; RV32I: # %bb.0:
@@ -155,8 +144,7 @@ define i32 @fcvt_wu_d_multiple_use(double %x, ptr %y) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __fixunsdfsi
-; RV64I-NEXT: slli a1, a0, 32
-; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: bnez a1, .LBB4_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a0, 1
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
index 7ef1af22370a1a..dfa76a2e1531be 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
@@ -28,7 +28,7 @@ define i32 @fcmp_false(double %a, double %b) nounwind {
ret i32 %2
}
-; FIXME: slli+srli on RV64 are unnecessary
+; FIXME: slli+srli on RV64 are unnecessary
define i32 @fcmp_oeq(double %a, double %b) nounwind {
; CHECKIFD-LABEL: fcmp_oeq:
; CHECKIFD: # %bb.0:
@@ -50,8 +50,7 @@ define i32 @fcmp_oeq(double %a, double %b) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __eqdf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -194,7 +193,7 @@ define i32 @fcmp_ole(double %a, double %b) nounwind {
ret i32 %2
}
-; FIXME: slli+srli on RV64 are unnecessary
+; FIXME: slli+srli on RV64 are unnecessary
define i32 @fcmp_one(double %a, double %b) nounwind {
; CHECKIFD-LABEL: fcmp_one:
; CHECKIFD: # %bb.0:
@@ -244,14 +243,12 @@ define i32 @fcmp_one(double %a, double %b) nounwind {
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: call __eqdf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez s2, a0
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s1
; RV64I-NEXT: call __unorddf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: and a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -265,7 +262,7 @@ define i32 @fcmp_one(double %a, double %b) nounwind {
ret i32 %2
}
-; FIXME: slli+srli on RV64 are unnecessary
+; FIXME: slli+srli on RV64 are unnecessary
define i32 @fcmp_ord(double %a, double %b) nounwind {
; CHECKIFD-LABEL: fcmp_ord:
; CHECKIFD: # %bb.0:
@@ -289,8 +286,7 @@ define i32 @fcmp_ord(double %a, double %b) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __unorddf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -300,7 +296,7 @@ define i32 @fcmp_ord(double %a, double %b) nounwind {
ret i32 %2
}
-; FIXME: slli+srli on RV64 are unnecessary
+; FIXME: slli+srli on RV64 are unnecessary
define i32 @fcmp_ueq(double %a, double %b) nounwind {
; CHECKIFD-LABEL: fcmp_ueq:
; CHECKIFD: # %bb.0:
@@ -351,14 +347,12 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: call __eqdf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz s2, a0
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s1
; RV64I-NEXT: call __unorddf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -508,7 +502,7 @@ define i32 @fcmp_ule(double %a, double %b) nounwind {
ret i32 %2
}
-; FIXME: slli+srli on RV64 are unnecessary
+; FIXME: slli+srli on RV64 are unnecessary
define i32 @fcmp_une(double %a, double %b) nounwind {
; CHECKIFD-LABEL: fcmp_une:
; CHECKIFD: # %bb.0:
@@ -531,8 +525,7 @@ define i32 @fcmp_une(double %a, double %b) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __nedf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -542,7 +535,7 @@ define i32 @fcmp_une(double %a, double %b) nounwind {
ret i32 %2
}
-; FIXME: slli+srli on RV64 are unnecessary
+; FIXME: slli+srli on RV64 are unnecessary
define i32 @fcmp_uno(double %a, double %b) nounwind {
; CHECKIFD-LABEL: fcmp_uno:
; CHECKIFD: # %bb.0:
@@ -567,8 +560,7 @@ define i32 @fcmp_uno(double %a, double %b) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __unorddf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll
index 3a60856665742a..fdeda0c273f6d0 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll
@@ -210,8 +210,7 @@ define i32 @fneg_s(float %a, float %b) nounwind {
; RV64I-NEXT: lui a1, 524288
; RV64I-NEXT: xor a1, a0, a1
; RV64I-NEXT: call __eqsf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
index 51df36f5eee05d..1820ecf3b5056c 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
@@ -65,25 +65,14 @@ define i32 @fcvt_wu_s(float %a) nounwind {
; Test where the fptoui has multiple uses, one of which causes a sext to be
; inserted on RV64.
define i32 @fcvt_wu_s_multiple_use(float %x, ptr %y) nounwind {
-; RV32IF-LABEL: fcvt_wu_s_multiple_use:
-; RV32IF: # %bb.0:
-; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz
-; RV32IF-NEXT: bnez a0, .LBB2_2
-; RV32IF-NEXT: # %bb.1:
-; RV32IF-NEXT: li a0, 1
-; RV32IF-NEXT: .LBB2_2:
-; RV32IF-NEXT: ret
-;
-; RV64IF-LABEL: fcvt_wu_s_multiple_use:
-; RV64IF: # %bb.0:
-; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
-; RV64IF-NEXT: slli a1, a0, 32
-; RV64IF-NEXT: srli a1, a1, 32
-; RV64IF-NEXT: bnez a1, .LBB2_2
-; RV64IF-NEXT: # %bb.1:
-; RV64IF-NEXT: li a0, 1
-; RV64IF-NEXT: .LBB2_2:
-; RV64IF-NEXT: ret
+; CHECKIF-LABEL: fcvt_wu_s_multiple_use:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: fcvt.wu.s a0, fa0, rtz
+; CHECKIF-NEXT: bnez a0, .LBB2_2
+; CHECKIF-NEXT: # %bb.1:
+; CHECKIF-NEXT: li a0, 1
+; CHECKIF-NEXT: .LBB2_2:
+; CHECKIF-NEXT: ret
;
; RV32I-LABEL: fcvt_wu_s_multiple_use:
; RV32I: # %bb.0:
@@ -103,8 +92,7 @@ define i32 @fcvt_wu_s_multiple_use(float %x, ptr %y) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __fixunssfsi
-; RV64I-NEXT: slli a1, a0, 32
-; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: bnez a1, .LBB2_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a0, 1
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-fcmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-fcmp.ll
index bdd779d4761099..475b67bda9ae93 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-fcmp.ll
@@ -50,8 +50,7 @@ define i32 @fcmp_oeq(float %a, float %b) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __eqsf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -236,14 +235,12 @@ define i32 @fcmp_one(float %a, float %b) nounwind {
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: call __eqsf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez s2, a0
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s1
; RV64I-NEXT: call __unordsf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: and a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -281,8 +278,7 @@ define i32 @fcmp_ord(float %a, float %b) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __unordsf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -335,14 +331,12 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: call __eqsf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: seqz s2, a0
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: mv a1, s1
; RV64I-NEXT: call __unordsf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -516,8 +510,7 @@ define i32 @fcmp_une(float %a, float %b) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __nesf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -552,8 +545,7 @@ define i32 @fcmp_uno(float %a, float %b) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: call __unordsf2
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
index 35c7fdfb33fe44..05730a710b4d8e 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
@@ -997,32 +997,27 @@ define i1 @fpclass(float %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 522240
; RV64I-NEXT: slli a2, a0, 33
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: li a3, 1
; RV64I-NEXT: lui a4, 2048
+; RV64I-NEXT: lui a5, 520192
; RV64I-NEXT: srli a2, a2, 33
-; RV64I-NEXT: seqz a5, a2
-; RV64I-NEXT: xor a6, a2, a1
-; RV64I-NEXT: seqz a6, a6
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: lui a6, 520192
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: addiw a6, a4, -1
; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: sub a3, a2, a3
+; RV64I-NEXT: subw a3, a2, a3
+; RV64I-NEXT: sltu a3, a3, a6
+; RV64I-NEXT: xor a6, a2, a1
; RV64I-NEXT: sltu a1, a1, a2
-; RV64I-NEXT: sub a2, a2, a4
-; RV64I-NEXT: addiw a4, a4, -1
+; RV64I-NEXT: subw a4, a2, a4
+; RV64I-NEXT: seqz a2, a2
; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: slli a3, a3, 32
-; RV64I-NEXT: slli a2, a2, 32
-; RV64I-NEXT: srli a3, a3, 32
-; RV64I-NEXT: srli a2, a2, 32
-; RV64I-NEXT: sltu a3, a3, a4
-; RV64I-NEXT: or a1, a5, a1
-; RV64I-NEXT: sltu a2, a2, a6
+; RV64I-NEXT: seqz a6, a6
+; RV64I-NEXT: sltu a4, a4, a5
; RV64I-NEXT: and a3, a3, a0
+; RV64I-NEXT: or a2, a2, a6
+; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: or a1, a1, a3
-; RV64I-NEXT: and a0, a2, a0
+; RV64I-NEXT: and a0, a4, a0
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
%cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639)
@@ -1200,8 +1195,7 @@ define i1 @isposinf_fpclass(float %x) {
; RV64I-LABEL: isposinf_fpclass:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 522240
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ret
@@ -1233,10 +1227,8 @@ define i1 @isneginf_fpclass(float %x) {
;
; RV64I-LABEL: isneginf_fpclass:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: li a1, 511
-; RV64I-NEXT: srli a0, a0, 32
-; RV64I-NEXT: slli a1, a1, 23
+; RV64I-NEXT: lui a1, 1046528
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ret
@@ -1302,8 +1294,7 @@ define i1 @isposfinite_fpclass(float %x) {
; RV64I-LABEL: isposfinite_fpclass:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 522240
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: sltu a0, a0, a1
; RV64I-NEXT: ret
%1 = call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite"
@@ -1340,9 +1331,8 @@ define i1 @isnegfinite_fpclass(float %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 522240
; RV64I-NEXT: slli a2, a0, 33
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: srli a2, a2, 33
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: xor a0, a0, a2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: sltu a1, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/fp128.ll b/llvm/test/CodeGen/RISCV/GlobalISel/fp128.ll
index eb48c90e14f803..978a6b0dc024c1 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/fp128.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/fp128.ll
@@ -124,8 +124,7 @@ define i1 @fcmp(fp128 %x, fp128 %y) nounwind {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: call __eqtf2
-; CHECK-NEXT: slli a0, a0, 32
-; CHECK-NEXT: srli a0, a0, 32
+; CHECK-NEXT: sext.w a0, a0
; CHECK-NEXT: seqz a0, a0
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-addo-subo-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-addo-subo-rv64.mir
index b815c37401716e..f2ec70933261eb 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-addo-subo-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-addo-subo-rv64.mir
@@ -336,10 +336,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
- ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]]
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 32
+ ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
; CHECK-NEXT: $x10 = COPY [[ADD]](s64)
; CHECK-NEXT: $x11 = COPY [[ICMP]](s64)
; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11
@@ -455,10 +454,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY]], [[COPY1]]
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
- ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[AND]](s64), [[AND1]]
+ ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32
+ ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(ult), [[SEXT_INREG]](s64), [[SEXT_INREG1]]
; CHECK-NEXT: $x10 = COPY [[SUB]](s64)
; CHECK-NEXT: $x11 = COPY [[ICMP]](s64)
; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-icmp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalIS...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nicer if this was bundled into the requested legalize action
Curious: why is it correct to do a sign-extend for an unsigned comparison promotion? |
eq / ne / true / false aren't really signed or unsigned, you just need to treat the high bits consistently in both operands |
Right but those aren't the only conditions potentially being handled here? There's also the |
This isn't limited eq / ne / true / false. It works for ugt/uge/ult/ule too. Sign extending an unsigned value doesn't change 0x00000000-0x7ffffff and it makes 0x8000000-0xffffffff into even larger numbers without affecting their relative order. |
Makes sense. |
Similar to what we do for unsigned comparisons after llvm#120032.
Sign extending i32->i64 is more efficient than zero extend for RV64.