Skip to content

Commit 574701f

Browse files
committed
DAG: Lower is.fpclass fcSubnormal|fcZero to fabs(x) < smallest_normal
Produces better code on x86_64 only in the unordered case. Not sure what the exact condition should be to avoid the regression. Free fabs might do it, or maybe requires legality checks for the alternative integer expansion.
1 parent 257cb80 commit 574701f

File tree

2 files changed

+45
-28
lines changed

2 files changed

+45
-28
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8677,6 +8677,27 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86778677
return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
86788678
}
86798679
}
8680+
8681+
if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8682+
// TODO: Could handle ordered case, but it produces worse code for
8683+
// x86. Maybe handle ordered if fabs is free?
8684+
8685+
ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8686+
ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8687+
8688+
if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8689+
OperandVT.getScalarType().getSimpleVT())) {
8690+
// (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8691+
8692+
// TODO: Maybe only makes sense if fabs is free. Integer test of
8693+
// exponent bits seems better for x86.
8694+
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8695+
SDValue SmallestNormal = DAG.getConstantFP(
8696+
APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8697+
return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8698+
IsOrdered ? OrderedOp : UnorderedOp);
8699+
}
8700+
}
86808701
}
86818702

86828703
// Some checks may be represented as inversion of simpler check, for example

llvm/test/CodeGen/X86/is_fpclass.ll

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2597,24 +2597,22 @@ define i1 @issubnormal_or_nan_f(float %x) {
25972597
define i1 @issubnormal_or_zero_or_nan_f(float %x) {
25982598
; X86-LABEL: issubnormal_or_zero_or_nan_f:
25992599
; X86: # %bb.0:
2600-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2601-
; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000
2602-
; X86-NEXT: sete %cl
2603-
; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2604-
; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
2605-
; X86-NEXT: setge %al
2606-
; X86-NEXT: orb %cl, %al
2600+
; X86-NEXT: flds {{[0-9]+}}(%esp)
2601+
; X86-NEXT: fabs
2602+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
2603+
; X86-NEXT: fxch %st(1)
2604+
; X86-NEXT: fucompp
2605+
; X86-NEXT: fnstsw %ax
2606+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
2607+
; X86-NEXT: sahf
2608+
; X86-NEXT: setb %al
26072609
; X86-NEXT: retl
26082610
;
26092611
; X64-LABEL: issubnormal_or_zero_or_nan_f:
26102612
; X64: # %bb.0:
2611-
; X64-NEXT: movd %xmm0, %eax
2612-
; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
2613-
; X64-NEXT: sete %cl
2614-
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2615-
; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
2616-
; X64-NEXT: setge %al
2617-
; X64-NEXT: orb %cl, %al
2613+
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2614+
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2615+
; X64-NEXT: setb %al
26182616
; X64-NEXT: retq
26192617
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan"
26202618
ret i1 %class
@@ -2768,24 +2766,22 @@ define i1 @not_issubnormal_or_nan_f(float %x) {
27682766
define i1 @not_issubnormal_or_zero_or_nan_f(float %x) {
27692767
; X86-LABEL: not_issubnormal_or_zero_or_nan_f:
27702768
; X86: # %bb.0:
2771-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2772-
; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000
2773-
; X86-NEXT: setne %cl
2774-
; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2775-
; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
2776-
; X86-NEXT: setl %al
2777-
; X86-NEXT: andb %cl, %al
2769+
; X86-NEXT: flds {{[0-9]+}}(%esp)
2770+
; X86-NEXT: fabs
2771+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
2772+
; X86-NEXT: fxch %st(1)
2773+
; X86-NEXT: fucompp
2774+
; X86-NEXT: fnstsw %ax
2775+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
2776+
; X86-NEXT: sahf
2777+
; X86-NEXT: setae %al
27782778
; X86-NEXT: retl
27792779
;
27802780
; X64-LABEL: not_issubnormal_or_zero_or_nan_f:
27812781
; X64: # %bb.0:
2782-
; X64-NEXT: movd %xmm0, %eax
2783-
; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
2784-
; X64-NEXT: setne %cl
2785-
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2786-
; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
2787-
; X64-NEXT: setl %al
2788-
; X64-NEXT: andb %cl, %al
2782+
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2783+
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2784+
; X64-NEXT: setae %al
27892785
; X64-NEXT: retq
27902786
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 780) ; ~(0xf0|0x3) = ~"subnormal|zero|nan"
27912787
ret i1 %class

0 commit comments

Comments
 (0)