-
Notifications
You must be signed in to change notification settings - Fork 13.5k
DAG: Lower single infinity is.fpclass tests to fcmp #100380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAG: Lower single infinity is.fpclass tests to fcmp #100380
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesInstCombine also should have taken care of this, but this Full diff: https://github.com/llvm/llvm-project/pull/100380.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index f5207d8b9d124..bb09b0d1140fc 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -202,6 +202,8 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFP) {
case fcSubnormal | fcZero | fcNan:
return InvertedTest;
case fcInf | fcNan:
+ case fcPosInf | fcNan:
+ case fcNegInf | fcNan:
// If we're trying to use fcmp, we can take advantage of the nan check
// behavior of the compare (but this is more instructions in the integer
// expansion).
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ba7c89a33f604..0036c182ab9db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8599,6 +8599,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
+ // See if we can fold an | fcNan into an unordered compare.
+ FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
+
+ // Can't fold the ordered check if we're only testing for snan or qnan
+ // individually.
+ if ((FPTestMask & fcNan) != fcNan)
+ OrderedFPTestMask = FPTestMask;
+
+ const bool IsOrdered = FPTestMask == OrderedFPTestMask;
+
if (std::optional<bool> IsCmp0 =
isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
IsCmp0 && (isCondCodeLegalOrCustom(
@@ -8618,18 +8628,29 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
return DAG.getSetCC(DL, ResultVT, Op, Op,
IsInvertedFP ? ISD::SETO : ISD::SETUO);
- bool IsOrderedInf = FPTestMask == fcInf;
- if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
- isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
- : UnorderedCmpOpcode,
- OperandVT.getScalarType().getSimpleVT()) &&
- isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+ if (OrderedFPTestMask == fcInf &&
+ isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
+ : UnorderedCmpOpcode,
+ OperandVT.getScalarType().getSimpleVT())) {
// isinf(x) --> fabs(x) == inf
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
SDValue Inf =
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
- IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
+ IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
+ }
+
+ if (OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) {
+ // isposinf(x) --> x == inf
+ // isneginf(x) --> x == -inf
+ // isposinf(x) || nan --> x u== inf
+ // isneginf(x) || nan --> x u== -inf
+
+ SDValue Inf = DAG.getConstantFP(
+ APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
+ OperandVT);
+ return DAG.getSetCC(DL, ResultVT, Op, Inf,
+ IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
}
}
diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
index 52d294ca01720..56d3ba7cd7b2b 100644
--- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
@@ -265,23 +265,24 @@ entry:
define i1 @is_posinf_f80(x86_fp80 %x) nounwind {
; X86-LABEL: is_posinf_f80:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl $32767, %eax # imm = 0x7FFF
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: sete %al
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setae %al
; X86-NEXT: retl
;
; X64-LABEL: is_posinf_f80:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
-; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; X64-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: xorq $32767, %rax # imm = 0x7FFF
-; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: sete %al
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; X64-NEXT: fxch %st(1)
+; X64-NEXT: fucompi %st(1), %st
+; X64-NEXT: fstp %st(0)
+; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 512) ; 0x200 = "+inf"
@@ -291,23 +292,22 @@ entry:
define i1 @is_neginf_f80(x86_fp80 %x) nounwind {
; X86-LABEL: is_neginf_f80:
; X86: # %bb.0: # %entry
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl $65535, %eax # imm = 0xFFFF
-; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: sete %al
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setae %al
; X86-NEXT: retl
;
; X64-LABEL: is_neginf_f80:
; X64: # %bb.0: # %entry
-; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
-; X64-NEXT: xorq $65535, %rax # imm = 0xFFFF
-; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; X64-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete %al
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; X64-NEXT: fucompi %st(1), %st
+; X64-NEXT: fstp %st(0)
+; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 4) ; "-inf"
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 86cee3d2c8e69..ba7d13aca03d1 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -186,15 +186,20 @@ entry:
define i1 @is_plus_inf_f(float %x) {
; X86-LABEL: is_plus_inf_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
-; X86-NEXT: sete %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setae %al
; X86-NEXT: retl
;
; X64-LABEL: is_plus_inf_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: sete %al
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
@@ -204,15 +209,20 @@ entry:
define i1 @is_minus_inf_f(float %x) {
; X86-LABEL: is_minus_inf_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
-; X86-NEXT: sete %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setae %al
; X86-NEXT: retl
;
; X64-LABEL: is_minus_inf_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X64-NEXT: sete %al
+; X64-NEXT: movss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: ucomiss %xmm0, %xmm1
+; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
@@ -222,15 +232,20 @@ entry:
define i1 @not_is_minus_inf_f(float %x) {
; X86-LABEL: not_is_minus_inf_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
-; X86-NEXT: setne %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: not_is_minus_inf_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X64-NEXT: setne %al
+; X64-NEXT: movss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: ucomiss %xmm0, %xmm1
+; X64-NEXT: setb %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf"
@@ -2116,24 +2131,19 @@ entry:
define i1 @is_plus_inf_or_nan_f(float %x) {
; X86-LABEL: is_plus_inf_or_nan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: sete %cl
-; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X86-NEXT: setge %al
-; X86-NEXT: orb %cl, %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: is_plus_inf_or_nan_f:
; X64: # %bb.0:
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: sete %cl
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X64-NEXT: setge %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: sete %al
; X64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 515) ; 0x200|0x3 = "+inf|nan"
ret i1 %class
@@ -2142,24 +2152,19 @@ define i1 @is_plus_inf_or_nan_f(float %x) {
define i1 @is_minus_inf_or_nan_f(float %x) {
; X86-LABEL: is_minus_inf_or_nan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X86-NEXT: sete %cl
-; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X86-NEXT: setge %al
-; X86-NEXT: orb %cl, %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: is_minus_inf_or_nan_f:
; X64: # %bb.0:
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X64-NEXT: sete %cl
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X64-NEXT: setge %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: sete %al
; X64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 7) ; "-inf|nan"
ret i1 %class
@@ -2168,24 +2173,19 @@ define i1 @is_minus_inf_or_nan_f(float %x) {
define i1 @not_is_plus_inf_or_nan_f(float %x) {
; X86-LABEL: not_is_plus_inf_or_nan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X86-NEXT: sete %cl
-; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
-; X86-NEXT: orb %cl, %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: not_is_plus_inf_or_nan_f:
; X64: # %bb.0:
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X64-NEXT: sete %cl
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setne %al
; X64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 508) ; ~(0x200|0x3) = "~(+inf|nan)"
ret i1 %class
@@ -2194,24 +2194,19 @@ define i1 @not_is_plus_inf_or_nan_f(float %x) {
define i1 @not_is_minus_inf_or_nan_f(float %x) {
; X86-LABEL: not_is_minus_inf_or_nan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: sete %cl
-; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
-; X86-NEXT: orb %cl, %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: not_is_minus_inf_or_nan_f:
; X64: # %bb.0:
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: sete %cl
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setne %al
; X64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1016) ; "~(-inf|nan)"
ret i1 %class
|
37ecc7b
to
1b48c68
Compare
Just as with #100378 the changes in tests demonstrates that the produced code becomes worse. In what cases this patch makes improvement? Can it be limited to such cases? |
1e5da52
to
b62d261
Compare
1b48c68
to
fc46244
Compare
b62d261
to
1246ce4
Compare
fc46244
to
6226f31
Compare
1246ce4
to
f0ea19f
Compare
6226f31
to
eaf47a2
Compare
f0ea19f
to
238f697
Compare
eaf47a2
to
7d48a38
Compare
238f697
to
72509a7
Compare
7d48a38
to
f4df5b3
Compare
InstCombine also should have taken care of this, but this should be helpful when the fcmp based lowering strategy tries to combine multiple tests.
f4df5b3
to
226d977
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM (although it'd have been nice if we could check the comparison value as well instead of it being lost behind a constant pool load - not your problem though).
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/5031 Here is the relevant piece of the build log for the reference
|
InstCombine also should have taken care of this, but this
should be helpful when the fcmp based lowering strategy tries
to combine multiple tests.