Skip to content

Commit 361d4cf

Browse files
authored
DAG: Lower is.fpclass fcSubnormal|fcZero to fabs(x) < smallest_normal (#100390)
Produces better code on x86_64 only in the unordered case. Not sure what the exact condition should be to avoid the regression. Free fabs might do it, or maybe requires legality checks for the alternative integer expansion.
1 parent 134b448 commit 361d4cf

File tree

2 files changed

+60
-30
lines changed

2 files changed

+60
-30
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+36-2
Original file line numberDiff line numberDiff line change
@@ -8556,11 +8556,12 @@ static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
85568556
}
85578557

85588558
SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8559-
FPClassTest Test, SDNodeFlags Flags,
8560-
const SDLoc &DL,
8559+
const FPClassTest OrigTestMask,
8560+
SDNodeFlags Flags, const SDLoc &DL,
85618561
SelectionDAG &DAG) const {
85628562
EVT OperandVT = Op.getValueType();
85638563
assert(OperandVT.isFloatingPoint());
8564+
FPClassTest Test = OrigTestMask;
85648565

85658566
// Degenerated cases.
85668567
if (Test == fcNone)
@@ -8594,9 +8595,21 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
85948595
// exceptions are ignored.
85958596
if (Flags.hasNoFPExcept() &&
85968597
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8598+
FPClassTest FPTestMask = Test;
8599+
85978600
ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
85988601
ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
85998602

8603+
// See if we can fold an | fcNan into an unordered compare.
8604+
FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8605+
8606+
// Can't fold the ordered check if we're only testing for snan or qnan
8607+
// individually.
8608+
if ((FPTestMask & fcNan) != fcNan)
8609+
OrderedFPTestMask = FPTestMask;
8610+
8611+
const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8612+
86008613
if (std::optional<bool> IsCmp0 =
86018614
isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
86028615
IsCmp0 && (isCondCodeLegalOrCustom(
@@ -8628,6 +8641,27 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86288641
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
86298642
IsInverted ? ISD::SETUNE : ISD::SETOEQ);
86308643
}
8644+
8645+
if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8646+
// TODO: Could handle ordered case, but it produces worse code for
8647+
// x86. Maybe handle ordered if fabs is free?
8648+
8649+
ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
8650+
ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
8651+
8652+
if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8653+
OperandVT.getScalarType().getSimpleVT())) {
8654+
// (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8655+
8656+
// TODO: Maybe only makes sense if fabs is free. Integer test of
8657+
// exponent bits seems better for x86.
8658+
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8659+
SDValue SmallestNormal = DAG.getConstantFP(
8660+
APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8661+
return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8662+
IsOrdered ? OrderedOp : UnorderedOp);
8663+
}
8664+
}
86318665
}
86328666

86338667
// In the general case use integer operations.

llvm/test/CodeGen/X86/is_fpclass.ll

+24-28
Original file line numberDiff line numberDiff line change
@@ -2602,24 +2602,22 @@ define i1 @issubnormal_or_nan_f(float %x) {
26022602
define i1 @issubnormal_or_zero_or_nan_f(float %x) {
26032603
; X86-LABEL: issubnormal_or_zero_or_nan_f:
26042604
; X86: # %bb.0:
2605-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2606-
; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000
2607-
; X86-NEXT: sete %cl
2608-
; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2609-
; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
2610-
; X86-NEXT: setge %al
2611-
; X86-NEXT: orb %cl, %al
2605+
; X86-NEXT: flds {{[0-9]+}}(%esp)
2606+
; X86-NEXT: fabs
2607+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
2608+
; X86-NEXT: fxch %st(1)
2609+
; X86-NEXT: fucompp
2610+
; X86-NEXT: fnstsw %ax
2611+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
2612+
; X86-NEXT: sahf
2613+
; X86-NEXT: setb %al
26122614
; X86-NEXT: retl
26132615
;
26142616
; X64-LABEL: issubnormal_or_zero_or_nan_f:
26152617
; X64: # %bb.0:
2616-
; X64-NEXT: movd %xmm0, %eax
2617-
; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
2618-
; X64-NEXT: sete %cl
2619-
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2620-
; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
2621-
; X64-NEXT: setge %al
2622-
; X64-NEXT: orb %cl, %al
2618+
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2619+
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2620+
; X64-NEXT: setb %al
26232621
; X64-NEXT: retq
26242622
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 243) ; 0xf0|0x3 = "subnormal|zero|nan"
26252623
ret i1 %class
@@ -2773,24 +2771,22 @@ define i1 @not_issubnormal_or_nan_f(float %x) {
27732771
define i1 @not_issubnormal_or_zero_or_nan_f(float %x) {
27742772
; X86-LABEL: not_issubnormal_or_zero_or_nan_f:
27752773
; X86: # %bb.0:
2776-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2777-
; X86-NEXT: testl $2139095040, %eax # imm = 0x7F800000
2778-
; X86-NEXT: setne %cl
2779-
; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2780-
; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
2781-
; X86-NEXT: setl %al
2782-
; X86-NEXT: andb %cl, %al
2774+
; X86-NEXT: flds {{[0-9]+}}(%esp)
2775+
; X86-NEXT: fabs
2776+
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
2777+
; X86-NEXT: fxch %st(1)
2778+
; X86-NEXT: fucompp
2779+
; X86-NEXT: fnstsw %ax
2780+
; X86-NEXT: # kill: def $ah killed $ah killed $ax
2781+
; X86-NEXT: sahf
2782+
; X86-NEXT: setae %al
27832783
; X86-NEXT: retl
27842784
;
27852785
; X64-LABEL: not_issubnormal_or_zero_or_nan_f:
27862786
; X64: # %bb.0:
2787-
; X64-NEXT: movd %xmm0, %eax
2788-
; X64-NEXT: testl $2139095040, %eax # imm = 0x7F800000
2789-
; X64-NEXT: setne %cl
2790-
; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
2791-
; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
2792-
; X64-NEXT: setl %al
2793-
; X64-NEXT: andb %cl, %al
2787+
; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2788+
; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2789+
; X64-NEXT: setae %al
27942790
; X64-NEXT: retq
27952791
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 780) ; ~(0xf0|0x3) = ~"subnormal|zero|nan"
27962792
ret i1 %class

0 commit comments

Comments
 (0)