DAG: Handle lowering unordered compare with inf

arsenm · arsenm · commit 1246ce40eebc · 2024-07-26T18:47:30.000Z
Try to take advantage of the nan check behavior of fcmp.
x86_64 looks better, x86_32 looks worse.
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -218,10 +218,15 @@ findSplitPointForStackProtector(MachineBasicBlock *BB,
 /// Evaluates if the specified FP class test is better performed as the inverse
 /// (i.e. fewer instructions should be required to lower it).  An example is the
 /// test "inf|normal|subnormal|zero", which is an inversion of "nan".
+///
 /// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
+///
+/// \param UseFCmp The intention is to perform the comparison using
+/// floating-point compare instructions which check for nan.
+///
 /// \returns The inverted test, or fcNone, if inversion does not produce a
 /// simpler test.
-FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
+FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp);
 
 /// Assuming the instruction \p MI is going to be deleted, attempt to salvage
 /// debug users of \p MI by writing the effect of \p MI in a DIExpression.
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -173,8 +173,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
   return SplitPoint;
 }
 
-FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
+FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) {
   FPClassTest InvertedTest = ~Test;
+
   // Pick the direction with fewer tests
   // TODO: Handle more combinations of cases that can be handled together
   switch (static_cast<unsigned>(InvertedTest)) {
@@ -200,6 +201,14 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
   case fcSubnormal | fcZero:
   case fcSubnormal | fcZero | fcNan:
     return InvertedTest;
+  case fcInf | fcNan:
+    // If we're trying to use fcmp, we can take advantage of the nan check
+    // behavior of the compare (but this is more instructions in the integer
+    // expansion).
+    return UseFCmp ? InvertedTest : fcNone;
+  case fcFinite | fcNan:
+    // Inversion of fcInf, which can be done in a combined check.
+    return fcInf;
   default:
     return fcNone;
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8566,7 +8566,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
   // Degenerated cases.
   if (Test == fcNone)
     return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
-  if ((Test & fcAllFlags) == fcAllFlags)
+  if (Test == fcAllFlags)
     return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
 
   // PPC double double is a pair of doubles, of which the higher part determines
@@ -8577,14 +8577,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
     OperandVT = MVT::f64;
   }
 
-  // Some checks may be represented as inversion of simpler check, for example
-  // "inf|normal|subnormal|zero" => !"nan".
-  bool IsInverted = false;
-  if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
-    IsInverted = true;
-    Test = InvertedCheck;
-  }
-
   // Floating-point type properties.
   EVT ScalarFloatVT = OperandVT.getScalarType();
   const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
@@ -8596,9 +8588,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
   if (Flags.hasNoFPExcept() &&
       isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
     FPClassTest FPTestMask = Test;
+    bool IsInvertedFP = false;
+
+    if (FPClassTest InvertedFPCheck =
+            invertFPClassTestIfSimpler(FPTestMask, true)) {
+      FPTestMask = InvertedFPCheck;
+      IsInvertedFP = true;
+    }
 
-    ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
-    ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
+    ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
+    ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
 
     // See if we can fold an | fcNan into an unordered compare.
     FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
@@ -8611,7 +8610,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
     const bool IsOrdered = FPTestMask == OrderedFPTestMask;
 
     if (std::optional<bool> IsCmp0 =
-            isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
+            isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
         IsCmp0 && (isCondCodeLegalOrCustom(
                       *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
                       OperandVT.getScalarType().getSimpleVT()))) {
@@ -8623,31 +8622,32 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
                           *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
     }
 
-    if (Test == fcNan &&
-        isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
-                                OperandVT.getScalarType().getSimpleVT())) {
+    if (FPTestMask == fcNan &&
+        isCondCodeLegalOrCustom(IsInvertedFP ? ISD::SETO : ISD::SETUO,
+                                OperandVT.getScalarType().getSimpleVT()))
       return DAG.getSetCC(DL, ResultVT, Op, Op,
-                          IsInverted ? ISD::SETO : ISD::SETUO);
-    }
+                          IsInvertedFP ? ISD::SETO : ISD::SETUO);
 
-    if (Test == fcInf &&
-        isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+    bool IsOrderedInf = FPTestMask == fcInf;
+    if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
+        isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
+                                             : UnorderedCmpOpcode,
                                 OperandVT.getScalarType().getSimpleVT()) &&
         isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
       // isinf(x) --> fabs(x) == inf
       SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
       SDValue Inf =
           DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
       return DAG.getSetCC(DL, ResultVT, Abs, Inf,
-                          IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+                          IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
     }
 
     if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
       // TODO: Could handle ordered case, but it produces worse code for
       // x86. Maybe handle ordered if fabs is free?
 
-      ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
-      ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
+      ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
+      ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
 
       if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
                                   OperandVT.getScalarType().getSimpleVT())) {
@@ -8664,6 +8664,15 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
     }
   }
 
+  // Some checks may be represented as inversion of simpler check, for example
+  // "inf|normal|subnormal|zero" => !"nan".
+  bool IsInverted = false;
+
+  if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
+    Test = InvertedCheck;
+    IsInverted = true;
+  }
+
   // In the general case use integer operations.
   unsigned BitSize = OperandVT.getScalarSizeInBits();
   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -624,7 +624,7 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_and_b32 s4, s4, 0x7fff
-; SI-NEXT:    s_cmpk_eq_i32 s4, 0x7c00
+; SI-NEXT:    s_cmpk_lg_i32 s4, 0x7c00
 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -1130,7 +1130,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1139,7 +1139,7 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
 ; GFX8CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX8CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX8CHECK-NEXT:    v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX8CHECK-NEXT:    v_cmp_ne_u16_e32 vcc, s4, v0
 ; GFX8CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX8CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1148,23 +1148,23 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
 ; GFX9CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX9CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX9CHECK-NEXT:    v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX9CHECK-NEXT:    v_cmp_ne_u16_e32 vcc, s4, v0
 ; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX9CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10CHECK-LABEL: isinf_bf16:
 ; GFX10CHECK:       ; %bb.0:
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX10CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX10CHECK-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11CHECK-LABEL: isinf_bf16:
 ; GFX11CHECK:       ; %bb.0:
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11CHECK-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
 ; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516)  ; 0x204 = "inf"
@@ -2856,7 +2856,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
 ; GFX7CHECK-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 ; GFX7CHECK-NEXT:    v_bfe_u32 v0, v0, 16, 15
 ; GFX7CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX7CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
 ; GFX7CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2865,7 +2865,7 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
 ; GFX8CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX8CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX8CHECK-NEXT:    v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX8CHECK-NEXT:    v_cmp_ne_u16_e32 vcc, s4, v0
 ; GFX8CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX8CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -2874,23 +2874,23 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
 ; GFX9CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX9CHECK-NEXT:    s_movk_i32 s4, 0x7f80
-; GFX9CHECK-NEXT:    v_cmp_eq_u16_e32 vcc, s4, v0
+; GFX9CHECK-NEXT:    v_cmp_ne_u16_e32 vcc, s4, v0
 ; GFX9CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX9CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10CHECK-LABEL: not_isfinite_or_nan_f:
 ; GFX10CHECK:       ; %bb.0: ; %entry
 ; GFX10CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX10CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX10CHECK-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11CHECK-LABEL: not_isfinite_or_nan_f:
 ; GFX11CHECK:       ; %bb.0: ; %entry
 ; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11CHECK-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
 ; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -1368,7 +1368,7 @@ define i1 @isinf_f16(half %x) nounwind {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -3159,7 +3159,7 @@ define i1 @not_isfinite_or_nan_f(half %x) {
 ; GFX7SELDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX7SELDAG-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX7SELDAG-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX7SELDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v0
+; GFX7SELDAG-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
 ; GFX7SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; GFX7SELDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll
@@ -9,11 +9,10 @@ define zeroext i1 @abs_isinff(float %x) {
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
 ; P8-NEXT:    xsabsdp 0, 1
-; P8-NEXT:    li 4, 1
 ; P8-NEXT:    lfs 1, .LCPI0_0@toc@l(3)
-; P8-NEXT:    li 3, 0
+; P8-NEXT:    li 3, 1
 ; P8-NEXT:    fcmpu 0, 0, 1
-; P8-NEXT:    iseleq 3, 4, 3
+; P8-NEXT:    iseleq 3, 0, 3
 ; P8-NEXT:    blr
 ;
 ; P9-LABEL: abs_isinff:
@@ -34,11 +33,10 @@ define zeroext i1 @abs_isinf(double %x) {
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
 ; P8-NEXT:    xsabsdp 0, 1
-; P8-NEXT:    li 4, 1
 ; P8-NEXT:    lfs 1, .LCPI1_0@toc@l(3)
-; P8-NEXT:    li 3, 0
+; P8-NEXT:    li 3, 1
 ; P8-NEXT:    fcmpu 0, 0, 1
-; P8-NEXT:    iseleq 3, 4, 3
+; P8-NEXT:    iseleq 3, 0, 3
 ; P8-NEXT:    blr
 ;
 ; P9-LABEL: abs_isinf:
@@ -67,8 +65,8 @@ define zeroext i1 @abs_isinfq(fp128 %x) {
 ; P8-NEXT:    clrldi 4, 4, 1
 ; P8-NEXT:    xor 4, 4, 5
 ; P8-NEXT:    or 3, 3, 4
-; P8-NEXT:    cntlzd 3, 3
-; P8-NEXT:    rldicl 3, 3, 58, 63
+; P8-NEXT:    addic 4, 3, -1
+; P8-NEXT:    subfe 3, 4, 3
 ; P8-NEXT:    blr
 ;
 ; P9-LABEL: abs_isinfq:
@@ -173,7 +171,8 @@ define <4 x i1> @abs_isinfv4f32(<4 x float> %x) {
 ; P8-NEXT:    xvabssp 0, 34
 ; P8-NEXT:    addi 3, 3, .LCPI6_0@toc@l
 ; P8-NEXT:    lxvd2x 1, 0, 3
-; P8-NEXT:    xvcmpeqsp 34, 0, 1
+; P8-NEXT:    xvcmpeqsp 0, 0, 1
+; P8-NEXT:    xxlnor 34, 0, 0
 ; P8-NEXT:    blr
 ;
 ; P9-LABEL: abs_isinfv4f32:
@@ -182,7 +181,8 @@ define <4 x i1> @abs_isinfv4f32(<4 x float> %x) {
 ; P9-NEXT:    xvabssp 0, 34
 ; P9-NEXT:    addi 3, 3, .LCPI6_0@toc@l
 ; P9-NEXT:    lxv 1, 0(3)
-; P9-NEXT:    xvcmpeqsp 34, 0, 1
+; P9-NEXT:    xvcmpeqsp 0, 0, 1
+; P9-NEXT:    xxlnor 34, 0, 0
 ; P9-NEXT:    blr
 entry:
   %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
@@ -198,6 +198,7 @@ define <2 x i1> @abs_isinfv2f64(<2 x double> %x) {
 ; P8-NEXT:    addi 3, 3, .LCPI7_0@toc@l
 ; P8-NEXT:    lxvd2x 1, 0, 3
 ; P8-NEXT:    xvcmpeqdp 34, 0, 1
+; P8-NEXT:    xxlnor 34, 34, 34
 ; P8-NEXT:    blr
 ;
 ; P9-LABEL: abs_isinfv2f64:
@@ -207,6 +208,7 @@ define <2 x i1> @abs_isinfv2f64(<2 x double> %x) {
 ; P9-NEXT:    addi 3, 3, .LCPI7_0@toc@l
 ; P9-NEXT:    lxv 1, 0(3)
 ; P9-NEXT:    xvcmpeqdp 34, 0, 1
+; P9-NEXT:    xxlnor 34, 34, 34
 ; P9-NEXT:    blr
 entry:
   %0 = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
diff --git a/llvm/test/CodeGen/PowerPC/is_fpclass.ll b/llvm/test/CodeGen/PowerPC/is_fpclass.ll
@@ -117,10 +117,12 @@ define i1 @isinf_float(float %x) nounwind {
 define i1 @isinf_ppc_fp128(ppc_fp128 %x) nounwind {
 ; CHECK-LABEL: isinf_ppc_fp128:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xststdcdp 0, 1, 48
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    li 4, 1
-; CHECK-NEXT:    iseleq 3, 4, 3
+; CHECK-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; CHECK-NEXT:    xsabsdp 0, 1
+; CHECK-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    fcmpu 0, 0, 1
+; CHECK-NEXT:    iseleq 3, 0, 3
 ; CHECK-NEXT:    blr
   %1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 516)  ; 0x204 = "inf"
   ret i1 %1
diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
@@ -244,7 +244,7 @@ define i1 @is_inf_f80(x86_fp80 %x) nounwind {
 ; X86-NEXT:    andl $32767, %eax # imm = 0x7FFF
 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    sete %al
+; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: is_inf_f80:
@@ -255,7 +255,7 @@ define i1 @is_inf_f80(x86_fp80 %x) nounwind {
 ; X64-NEXT:    xorq {{[0-9]+}}(%rsp), %rcx
 ; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
 ; X64-NEXT:    orq %rcx, %rax
-; X64-NEXT:    sete %al
+; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 516)  ; 0x204 = "inf"
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll