-
Notifications
You must be signed in to change notification settings - Fork 13.5k
DAG: Lower is.fpclass fcSubnormal|fcZero to fabs(x) < smallest_normal #100390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAG: Lower is.fpclass fcSubnormal|fcZero to fabs(x) < smallest_normal #100390
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesProduces better code on x86_64 only in the unordered case. Not Full diff: https://github.com/llvm/llvm-project/pull/100390.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4227f7bec9ec8..0dd17238cbcd2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8677,6 +8677,27 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
}
}
+
+ if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
+ // TODO: Could handle ordered case, but it produces worse code for
+ // x86. Maybe handle ordered if fabs is free?
+
+ ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
+ ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
+
+ if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
+ OperandVT.getScalarType().getSimpleVT())) {
+ // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
+
+ // TODO: Maybe only makes sense if fabs is free. Integer test of
+ // exponent bits seems better for x86.
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
+ SDValue SmallestNormal = DAG.getConstantFP(
+ APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
+ return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
+ IsOrdered ? OrderedOp : UnorderedOp);
+ }
+ }
}
// Some checks may be represented as inversion of simpler check, for example
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index ba7d13aca03d1..1dce80f6bd9db 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -2597,24 +2597,22 @@ define i1 @issubnormal_or_nan_f(float %x) {
define i1 @issubnormal_or_zero_or_nan_f(float %x) {
; X86-LABEL: issubnormal_or_zero_or_nan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: sete %cl
-; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X86-NEXT: setge %al
-; X86-NEXT: orb %cl, %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fabs
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: issubnormal_or_zero_or_nan_f:
; X64: # %bb.0:
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: sete %cl
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X64-NEXT: setge %al
-; X64-NEXT: orb %cl, %al
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setb %al
; X64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan"
ret i1 %class
@@ -2768,24 +2766,22 @@ define i1 @not_issubnormal_or_nan_f(float %x) {
define i1 @not_issubnormal_or_zero_or_nan_f(float %x) {
; X86-LABEL: not_issubnormal_or_zero_or_nan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setne %cl
-; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X86-NEXT: setl %al
-; X86-NEXT: andb %cl, %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fabs
+; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setae %al
; X86-NEXT: retl
;
; X64-LABEL: not_issubnormal_or_zero_or_nan_f:
; X64: # %bb.0:
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setne %cl
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X64-NEXT: setl %al
-; X64-NEXT: andb %cl, %al
+; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: setae %al
; X64-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 780) ; ~(0xf0|0x3) = ~"subnormal|zero|nan"
ret i1 %class
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense to me.
For the ordered case I think this would only be profitable if fabs is free and you don't have integer "test"-style instructions.
Produces better code on x86_64 only in the unordered case. Not sure what the exact condition should be to avoid the regression. Free fabs might do it, or maybe requires legality checks for the alternative integer expansion.
574701f
to
4e2e236
Compare
Produces better code on x86_64 only in the unordered case. Not
sure what the exact condition should be to avoid the regression. Free
fabs might do it, or maybe requires legality checks for the alternative
integer expansion.