Skip to content

Commit 1246ce4

Browse files
committed
DAG: Handle lowering unordered compare with inf
Try to take advantage of the nan check behavior of fcmp. x86_64 looks better, x86_32 looks worse.
1 parent 361d4cf commit 1246ce4

File tree

10 files changed

+126
-99
lines changed

10 files changed

+126
-99
lines changed

llvm/include/llvm/CodeGen/CodeGenCommonISel.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,15 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
218218
/// Evaluates if the specified FP class test is better performed as the inverse
219219
/// (i.e. fewer instructions should be required to lower it). An example is the
220220
/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
221+
///
221222
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
223+
///
224+
/// \param UseFCmp The intention is to perform the comparison using
225+
/// floating-point compare instructions which check for nan.
226+
///
222227
/// \returns The inverted test, or fcNone, if inversion does not produce a
223228
/// simpler test.
224-
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
229+
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp);
225230

226231
/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
227232
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.

llvm/lib/CodeGen/CodeGenCommonISel.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
173173
return SplitPoint;
174174
}
175175

176-
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
176+
FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) {
177177
FPClassTest InvertedTest = ~Test;
178+
178179
// Pick the direction with fewer tests
179180
// TODO: Handle more combinations of cases that can be handled together
180181
switch (static_cast<unsigned>(InvertedTest)) {
@@ -200,6 +201,14 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
200201
case fcSubnormal | fcZero:
201202
case fcSubnormal | fcZero | fcNan:
202203
return InvertedTest;
204+
case fcInf | fcNan:
205+
// If we're trying to use fcmp, we can take advantage of the nan check
206+
// behavior of the compare (but this is more instructions in the integer
207+
// expansion).
208+
return UseFCmp ? InvertedTest : fcNone;
209+
case fcFinite | fcNan:
210+
// Inversion of fcInf, which can be done in a combined check.
211+
return fcInf;
203212
default:
204213
return fcNone;
205214
}

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8566,7 +8566,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
85668566
// Degenerated cases.
85678567
if (Test == fcNone)
85688568
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8569-
if ((Test & fcAllFlags) == fcAllFlags)
8569+
if (Test == fcAllFlags)
85708570
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
85718571

85728572
// PPC double double is a pair of doubles, of which the higher part determines
@@ -8577,14 +8577,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
85778577
OperandVT = MVT::f64;
85788578
}
85798579

8580-
// Some checks may be represented as inversion of simpler check, for example
8581-
// "inf|normal|subnormal|zero" => !"nan".
8582-
bool IsInverted = false;
8583-
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8584-
IsInverted = true;
8585-
Test = InvertedCheck;
8586-
}
8587-
85888580
// Floating-point type properties.
85898581
EVT ScalarFloatVT = OperandVT.getScalarType();
85908582
const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
@@ -8596,9 +8588,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
85968588
if (Flags.hasNoFPExcept() &&
85978589
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
85988590
FPClassTest FPTestMask = Test;
8591+
bool IsInvertedFP = false;
8592+
8593+
if (FPClassTest InvertedFPCheck =
8594+
invertFPClassTestIfSimpler(FPTestMask, true)) {
8595+
FPTestMask = InvertedFPCheck;
8596+
IsInvertedFP = true;
8597+
}
85998598

8600-
ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8601-
ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8599+
ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8600+
ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
86028601

86038602
// See if we can fold an | fcNan into an unordered compare.
86048603
FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
@@ -8611,7 +8610,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86118610
const bool IsOrdered = FPTestMask == OrderedFPTestMask;
86128611

86138612
if (std::optional<bool> IsCmp0 =
8614-
isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8613+
isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
86158614
IsCmp0 && (isCondCodeLegalOrCustom(
86168615
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
86178616
OperandVT.getScalarType().getSimpleVT()))) {
@@ -8623,31 +8622,32 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86238622
*IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
86248623
}
86258624

8626-
if (Test == fcNan &&
8627-
isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
8628-
OperandVT.getScalarType().getSimpleVT())) {
8625+
if (FPTestMask == fcNan &&
8626+
isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
8627+
OperandVT.getScalarType().getSimpleVT()))
86298628
return DAG.getSetCC(DL, ResultVT, Op, Op,
8630-
IsInverted ? ISD::SETO : ISD::SETUO);
8631-
}
8629+
IsInvertedFP ? ISD::SETO : ISD::SETUO);
86328630

8633-
if (Test == fcInf &&
8634-
isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8631+
bool IsOrderedInf = FPTestMask == fcInf;
8632+
if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8633+
isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8634+
: UnorderedCmpOpcode,
86358635
OperandVT.getScalarType().getSimpleVT()) &&
86368636
isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
86378637
// isinf(x) --> fabs(x) == inf
86388638
SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
86398639
SDValue Inf =
86408640
DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
86418641
return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8642-
IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8642+
IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
86438643
}
86448644

86458645
if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
86468646
// TODO: Could handle ordered case, but it produces worse code for
86478647
// x86. Maybe handle ordered if fabs is free?
86488648

8649-
ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
8650-
ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
8649+
ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8650+
ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
86518651

86528652
if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
86538653
OperandVT.getScalarType().getSimpleVT())) {
@@ -8664,6 +8664,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
86648664
}
86658665
}
86668666

8667+
// Some checks may be represented as inversion of simpler check, for example
8668+
// "inf|normal|subnormal|zero" => !"nan".
8669+
bool IsInverted = false;
8670+
8671+
if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8672+
Test = InvertedCheck;
8673+
IsInverted = true;
8674+
}
8675+
86678676
// In the general case use integer operations.
86688677
unsigned BitSize = OperandVT.getScalarSizeInBits();
86698678
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);

llvm/test/CodeGen/AMDGPU/fp-classify.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou
624624
; SI-NEXT: s_mov_b32 s2, -1
625625
; SI-NEXT: s_waitcnt lgkmcnt(0)
626626
; SI-NEXT: s_and_b32 s4, s4, 0x7fff
627-
; SI-NEXT: s_cmpk_eq_i32 s4, 0x7c00
627+
; SI-NEXT: s_cmpk_lg_i32 s4, 0x7c00
628628
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
629629
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
630630
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0

llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
11301130
; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
11311131
; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
11321132
; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
1133-
; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
1133+
; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
11341134
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
11351135
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
11361136
;
@@ -1139,7 +1139,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
11391139
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11401140
; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
11411141
; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
1142-
; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
1142+
; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
11431143
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
11441144
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
11451145
;
@@ -1148,23 +1148,23 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
11481148
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11491149
; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
11501150
; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
1151-
; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
1151+
; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
11521152
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
11531153
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
11541154
;
11551155
; GFX10CHECK-LABEL: isinf_bf16:
11561156
; GFX10CHECK: ; %bb.0:
11571157
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11581158
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1159-
; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
1159+
; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
11601160
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
11611161
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
11621162
;
11631163
; GFX11CHECK-LABEL: isinf_bf16:
11641164
; GFX11CHECK: ; %bb.0:
11651165
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11661166
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1167-
; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
1167+
; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
11681168
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
11691169
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
11701170
%1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516) ; 0x204 = "inf"
@@ -2856,7 +2856,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
28562856
; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0
28572857
; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
28582858
; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
2859-
; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
2859+
; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
28602860
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
28612861
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
28622862
;
@@ -2865,7 +2865,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
28652865
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28662866
; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
28672867
; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
2868-
; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
2868+
; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
28692869
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
28702870
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
28712871
;
@@ -2874,23 +2874,23 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
28742874
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28752875
; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
28762876
; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
2877-
; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
2877+
; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0
28782878
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
28792879
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
28802880
;
28812881
; GFX10CHECK-LABEL: not_isfinite_or_nan_f:
28822882
; GFX10CHECK: ; %bb.0: ; %entry
28832883
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28842884
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2885-
; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
2885+
; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
28862886
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
28872887
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
28882888
;
28892889
; GFX11CHECK-LABEL: not_isfinite_or_nan_f:
28902890
; GFX11CHECK: ; %bb.0: ; %entry
28912891
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28922892
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
2893-
; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
2893+
; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
28942894
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
28952895
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
28962896
entry:

llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,7 +1368,7 @@ define i1 @isinf_f16(half %x) nounwind {
13681368
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
13691369
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
13701370
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1371-
; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
1371+
; GFX7SELDAG-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
13721372
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
13731373
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
13741374
;
@@ -3159,7 +3159,7 @@ define i1 @not_isfinite_or_nan_f(half %x) {
31593159
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
31603160
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
31613161
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
3162-
; GFX7SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
3162+
; GFX7SELDAG-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
31633163
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
31643164
; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31]
31653165
;

llvm/test/CodeGen/PowerPC/fp-classify.ll

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@ define zeroext i1 @abs_isinff(float %x) {
99
; P8: # %bb.0: # %entry
1010
; P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha
1111
; P8-NEXT: xsabsdp 0, 1
12-
; P8-NEXT: li 4, 1
1312
; P8-NEXT: lfs 1, .LCPI0_0@toc@l(3)
14-
; P8-NEXT: li 3, 0
13+
; P8-NEXT: li 3, 1
1514
; P8-NEXT: fcmpu 0, 0, 1
16-
; P8-NEXT: iseleq 3, 4, 3
15+
; P8-NEXT: iseleq 3, 0, 3
1716
; P8-NEXT: blr
1817
;
1918
; P9-LABEL: abs_isinff:
@@ -34,11 +33,10 @@ define zeroext i1 @abs_isinf(double %x) {
3433
; P8: # %bb.0: # %entry
3534
; P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha
3635
; P8-NEXT: xsabsdp 0, 1
37-
; P8-NEXT: li 4, 1
3836
; P8-NEXT: lfs 1, .LCPI1_0@toc@l(3)
39-
; P8-NEXT: li 3, 0
37+
; P8-NEXT: li 3, 1
4038
; P8-NEXT: fcmpu 0, 0, 1
41-
; P8-NEXT: iseleq 3, 4, 3
39+
; P8-NEXT: iseleq 3, 0, 3
4240
; P8-NEXT: blr
4341
;
4442
; P9-LABEL: abs_isinf:
@@ -67,8 +65,8 @@ define zeroext i1 @abs_isinfq(fp128 %x) {
6765
; P8-NEXT: clrldi 4, 4, 1
6866
; P8-NEXT: xor 4, 4, 5
6967
; P8-NEXT: or 3, 3, 4
70-
; P8-NEXT: cntlzd 3, 3
71-
; P8-NEXT: rldicl 3, 3, 58, 63
68+
; P8-NEXT: addic 4, 3, -1
69+
; P8-NEXT: subfe 3, 4, 3
7270
; P8-NEXT: blr
7371
;
7472
; P9-LABEL: abs_isinfq:
@@ -173,7 +171,8 @@ define <4 x i1> @abs_isinfv4f32(<4 x float> %x) {
173171
; P8-NEXT: xvabssp 0, 34
174172
; P8-NEXT: addi 3, 3, .LCPI6_0@toc@l
175173
; P8-NEXT: lxvd2x 1, 0, 3
176-
; P8-NEXT: xvcmpeqsp 34, 0, 1
174+
; P8-NEXT: xvcmpeqsp 0, 0, 1
175+
; P8-NEXT: xxlnor 34, 0, 0
177176
; P8-NEXT: blr
178177
;
179178
; P9-LABEL: abs_isinfv4f32:
@@ -182,7 +181,8 @@ define <4 x i1> @abs_isinfv4f32(<4 x float> %x) {
182181
; P9-NEXT: xvabssp 0, 34
183182
; P9-NEXT: addi 3, 3, .LCPI6_0@toc@l
184183
; P9-NEXT: lxv 1, 0(3)
185-
; P9-NEXT: xvcmpeqsp 34, 0, 1
184+
; P9-NEXT: xvcmpeqsp 0, 0, 1
185+
; P9-NEXT: xxlnor 34, 0, 0
186186
; P9-NEXT: blr
187187
entry:
188188
%0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -198,6 +198,7 @@ define <2 x i1> @abs_isinfv2f64(<2 x double> %x) {
198198
; P8-NEXT: addi 3, 3, .LCPI7_0@toc@l
199199
; P8-NEXT: lxvd2x 1, 0, 3
200200
; P8-NEXT: xvcmpeqdp 34, 0, 1
201+
; P8-NEXT: xxlnor 34, 34, 34
201202
; P8-NEXT: blr
202203
;
203204
; P9-LABEL: abs_isinfv2f64:
@@ -207,6 +208,7 @@ define <2 x i1> @abs_isinfv2f64(<2 x double> %x) {
207208
; P9-NEXT: addi 3, 3, .LCPI7_0@toc@l
208209
; P9-NEXT: lxv 1, 0(3)
209210
; P9-NEXT: xvcmpeqdp 34, 0, 1
211+
; P9-NEXT: xxlnor 34, 34, 34
210212
; P9-NEXT: blr
211213
entry:
212214
%0 = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)

llvm/test/CodeGen/PowerPC/is_fpclass.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,12 @@ define i1 @isinf_float(float %x) nounwind {
117117
define i1 @isinf_ppc_fp128(ppc_fp128 %x) nounwind {
118118
; CHECK-LABEL: isinf_ppc_fp128:
119119
; CHECK: # %bb.0:
120-
; CHECK-NEXT: xststdcdp 0, 1, 48
121-
; CHECK-NEXT: li 3, 0
122-
; CHECK-NEXT: li 4, 1
123-
; CHECK-NEXT: iseleq 3, 4, 3
120+
; CHECK-NEXT: addis 3, 2, .LCPI9_0@toc@ha
121+
; CHECK-NEXT: xsabsdp 0, 1
122+
; CHECK-NEXT: lfs 1, .LCPI9_0@toc@l(3)
123+
; CHECK-NEXT: li 3, 1
124+
; CHECK-NEXT: fcmpu 0, 0, 1
125+
; CHECK-NEXT: iseleq 3, 0, 3
124126
; CHECK-NEXT: blr
125127
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 516) ; 0x204 = "inf"
126128
ret i1 %1

llvm/test/CodeGen/X86/is_fpclass-fp80.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ define i1 @is_inf_f80(x86_fp80 %x) nounwind {
244244
; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
245245
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
246246
; X86-NEXT: orl %ecx, %eax
247-
; X86-NEXT: sete %al
247+
; X86-NEXT: setne %al
248248
; X86-NEXT: retl
249249
;
250250
; X64-LABEL: is_inf_f80:
@@ -255,7 +255,7 @@ define i1 @is_inf_f80(x86_fp80 %x) nounwind {
255255
; X64-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
256256
; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
257257
; X64-NEXT: orq %rcx, %rax
258-
; X64-NEXT: sete %al
258+
; X64-NEXT: setne %al
259259
; X64-NEXT: retq
260260
entry:
261261
%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 516) ; 0x204 = "inf"

0 commit comments

Comments
 (0)