Skip to content

Commit fc3e6a8

Browse files
authored
DAG: Handle lowering unordered compare with inf (#100378)
Try to take advantage of the nan check behavior of fcmp. x86_64 looks better, x86_32 looks worse.
1 parent 2f6e4ed commit fc3e6a8

File tree

5 files changed

+67
-45
lines changed

5 files changed

+67
-45
lines changed

llvm/include/llvm/CodeGen/CodeGenCommonISel.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,14 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
218218
/// Evaluates if the specified FP class test is better performed as the inverse
219219
/// (i.e. fewer instructions should be required to lower it). An example is the
220220
/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
221+
///
221222
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
223+
/// \param UseFCmp The intention is to perform the comparison using
224+
/// floating-point compare instructions which check for nan.
225+
///
222226
/// \returns The inverted test, or fcNone, if inversion does not produce a
223227
/// simpler test.
224-
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
228+
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp);
225229

226230
/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
227231
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.

llvm/lib/CodeGen/CodeGenCommonISel.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
173173
return SplitPoint;
174174
}
175175

176-
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
176+
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) {
177177
FPClassTest InvertedTest = ~Test;
178+
178179
// Pick the direction with fewer tests
179180
// TODO: Handle more combinations of cases that can be handled together
180181
switch (static_cast<unsigned>(InvertedTest)) {
@@ -200,6 +201,11 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
200201
case fcSubnormal | fcZero:
201202
case fcSubnormal | fcZero | fcNan:
202203
return InvertedTest;
204+
case fcInf | fcNan:
205+
// If we're trying to use fcmp, we can take advantage of the nan check
206+
// behavior of the compare (but this is more instructions in the integer
207+
// expansion).
208+
return UseFCmp ? InvertedTest : fcNone;
203209
default:
204210
return fcNone;
205211
}

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8675,7 +8675,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86758675
// Degenerated cases.
86768676
if (Test == fcNone)
86778677
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8678-
if ((Test & fcAllFlags) == fcAllFlags)
8678+
if (Test == fcAllFlags)
86798679
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
86808680

86818681
// PPC double double is a pair of doubles, of which the higher part determines
@@ -8686,14 +8686,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86868686
OperandVT = MVT::f64;
86878687
}
86888688

8689-
// Some checks may be represented as inversion of simpler check, for example
8690-
// "inf|normal|subnormal|zero" => !"nan".
8691-
bool IsInverted = false;
8692-
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8693-
IsInverted = true;
8694-
Test = InvertedCheck;
8695-
}
8696-
86978689
// Floating-point type properties.
86988690
EVT ScalarFloatVT = OperandVT.getScalarType();
86998691
const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
@@ -8705,9 +8697,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
87058697
if (Flags.hasNoFPExcept() &&
87068698
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
87078699
FPClassTest FPTestMask = Test;
8700+
bool IsInvertedFP = false;
8701+
8702+
if (FPClassTest InvertedFPCheck =
8703+
invertFPClassTestIfSimpler(FPTestMask, true)) {
8704+
FPTestMask = InvertedFPCheck;
8705+
IsInvertedFP = true;
8706+
}
87088707

8709-
ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8710-
ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8708+
ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8709+
ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
87118710

87128711
// See if we can fold an | fcNan into an unordered compare.
87138712
FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
@@ -8720,7 +8719,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
87208719
const bool IsOrdered = FPTestMask == OrderedFPTestMask;
87218720

87228721
if (std::optional<bool> IsCmp0 =
8723-
isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8722+
isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
87248723
IsCmp0 && (isCondCodeLegalOrCustom(
87258724
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
87268725
OperandVT.getScalarType().getSimpleVT()))) {
@@ -8732,31 +8731,35 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
87328731
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
87338732
}
87348733

8735-
if (Test == fcNan &&
8736-
isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
8737-
OperandVT.getScalarType().getSimpleVT())) {
8734+
if (FPTestMask == fcNan &&
8735+
isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8736+
OperandVT.getScalarType().getSimpleVT()))
87388737
return DAG.getSetCC(DL, ResultVT, Op, Op,
8739-
IsInverted ? ISD::SETO : ISD::SETUO);
8740-
}
8738+
IsInvertedFP ? ISD::SETO : ISD::SETUO);
87418739

8742-
if (Test == fcInf &&
8743-
isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8740+
bool IsOrderedInf = FPTestMask == fcInf;
8741+
if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8742+
isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8743+
: UnorderedCmpOpcode,
87448744
OperandVT.getScalarType().getSimpleVT()) &&
8745-
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
8745+
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&
8746+
(isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||
8747+
(OperandVT.isVector() &&
8748+
isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {
87468749
// isinf(x) --> fabs(x) == inf
87478750
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
87488751
SDValue Inf =
87498752
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
87508753
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8751-
IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8754+
IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
87528755
}
87538756

87548757
if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
87558758
// TODO: Could handle ordered case, but it produces worse code for
87568759
// x86. Maybe handle ordered if fabs is free?
87578760

8758-
ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
8759-
ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
8761+
ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8762+
ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
87608763

87618764
if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
87628765
OperandVT.getScalarType().getSimpleVT())) {
@@ -8773,6 +8776,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
87738776
}
87748777
}
87758778

8779+
// Some checks may be represented as inversion of simpler check, for example
8780+
// "inf|normal|subnormal|zero" => !"nan".
8781+
bool IsInverted = false;
8782+
8783+
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8784+
Test = InvertedCheck;
8785+
IsInverted = true;
8786+
}
8787+
87768788
// In the general case use integer operations.
87778789
unsigned BitSize = OperandVT.getScalarSizeInBits();
87788790
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);

llvm/test/CodeGen/AArch64/isinf.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ define i32 @replace_isinf_call_f16(half %x) {
2626
define i32 @replace_isinf_call_f32(float %x) {
2727
; CHECK-LABEL: replace_isinf_call_f32:
2828
; CHECK: // %bb.0:
29-
; CHECK-NEXT: fabs s0, s0
29+
; CHECK-NEXT: fmov w9, s0
3030
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
31-
; CHECK-NEXT: fmov s1, w8
32-
; CHECK-NEXT: fcmp s0, s1
31+
; CHECK-NEXT: and w9, w9, #0x7fffffff
32+
; CHECK-NEXT: cmp w9, w8
3333
; CHECK-NEXT: cset w0, eq
3434
; CHECK-NEXT: ret
3535
%abs = tail call float @llvm.fabs.f32(float %x)
@@ -42,10 +42,10 @@ define i32 @replace_isinf_call_f32(float %x) {
4242
define i32 @replace_isinf_call_f64(double %x) {
4343
; CHECK-LABEL: replace_isinf_call_f64:
4444
; CHECK: // %bb.0:
45-
; CHECK-NEXT: fabs d0, d0
45+
; CHECK-NEXT: fmov x9, d0
4646
; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
47-
; CHECK-NEXT: fmov d1, x8
48-
; CHECK-NEXT: fcmp d0, d1
47+
; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
48+
; CHECK-NEXT: cmp x9, x8
4949
; CHECK-NEXT: cset w0, eq
5050
; CHECK-NEXT: ret
5151
%abs = tail call double @llvm.fabs.f64(double %x)

llvm/test/CodeGen/PowerPC/fp-classify.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
define zeroext i1 @abs_isinff(float %x) {
88
; P8-LABEL: abs_isinff:
99
; P8: # %bb.0: # %entry
10-
; P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha
11-
; P8-NEXT: xsabsdp 0, 1
12-
; P8-NEXT: li 4, 1
13-
; P8-NEXT: lfs 1, .LCPI0_0@toc@l(3)
14-
; P8-NEXT: li 3, 0
15-
; P8-NEXT: fcmpu 0, 0, 1
16-
; P8-NEXT: iseleq 3, 4, 3
10+
; P8-NEXT: xscvdpspn 0, 1
11+
; P8-NEXT: lis 4, 32640
12+
; P8-NEXT: mffprwz 3, 0
13+
; P8-NEXT: clrlwi 3, 3, 1
14+
; P8-NEXT: xor 3, 3, 4
15+
; P8-NEXT: cntlzw 3, 3
16+
; P8-NEXT: srwi 3, 3, 5
1717
; P8-NEXT: blr
1818
;
1919
; P9-LABEL: abs_isinff:
@@ -32,13 +32,13 @@ entry:
3232
define zeroext i1 @abs_isinf(double %x) {
3333
; P8-LABEL: abs_isinf:
3434
; P8: # %bb.0: # %entry
35-
; P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha
36-
; P8-NEXT: xsabsdp 0, 1
37-
; P8-NEXT: li 4, 1
38-
; P8-NEXT: lfs 1, .LCPI1_0@toc@l(3)
39-
; P8-NEXT: li 3, 0
40-
; P8-NEXT: fcmpu 0, 0, 1
41-
; P8-NEXT: iseleq 3, 4, 3
35+
; P8-NEXT: mffprd 3, 1
36+
; P8-NEXT: li 4, 2047
37+
; P8-NEXT: rldic 4, 4, 52, 1
38+
; P8-NEXT: clrldi 3, 3, 1
39+
; P8-NEXT: xor 3, 3, 4
40+
; P8-NEXT: cntlzd 3, 3
41+
; P8-NEXT: rldicl 3, 3, 58, 63
4242
; P8-NEXT: blr
4343
;
4444
; P9-LABEL: abs_isinf:

0 commit comments

Comments
 (0)