diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5ce1013f30fd1..368593218bcc1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3467,12 +3467,51 @@ void SelectionDAGBuilder::visitICmp(const User &I) { setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } +SDValue SelectionDAGBuilder::lowerIsFpClass(Value *ClassVal, + FPClassTest ClassTest) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + SDLoc sdl = getCurSDLoc(); + + EVT DestVT = + TLI.getValueType(DL, CmpInst::makeCmpResultType(ClassVal->getType())); + EVT ArgVT = TLI.getValueType(DL, ClassVal->getType()); + MachineFunction &MF = DAG.getMachineFunction(); + const Function &F = MF.getFunction(); + SDValue Op = getValue(ClassVal); + SDNodeFlags Flags; + Flags.setNoFPExcept(!F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP)); + // If ISD::IS_FPCLASS should be expanded, do it right now, because the + // expansion can use illegal types. Making expansion early allows + // legalizing these types prior to selection. + if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) + return TLI.expandIS_FPCLASS(DestVT, Op, ClassTest, Flags, sdl, DAG); + + SDValue Check = DAG.getTargetConstant(ClassTest, sdl, MVT::i32); + return DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags); +} + void SelectionDAGBuilder::visitFCmp(const User &I) { FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; - if (const FCmpInst *FC = dyn_cast(&I)) + if (const FCmpInst *FC = dyn_cast(&I)) { predicate = FC->getPredicate(); - else if (const ConstantExpr *FC = dyn_cast(&I)) + + // Reverse the canonicalization if it is a FP class test + auto ShouldReverseTransform = [](FPClassTest ClassTest) { + return ClassTest == fcInf || ClassTest == (fcInf | fcNan); + }; + auto [ClassVal, ClassTest] = + fcmpToClassTest(predicate, *FC->getParent()->getParent(), + FC->getOperand(0), FC->getOperand(1)); + if (ClassVal && (ShouldReverseTransform(ClassTest) || + ShouldReverseTransform(~ClassTest))) { + setValue(&I, lowerIsFpClass(ClassVal, ClassTest)); + return; + } + } else if (const ConstantExpr *FC = dyn_cast(&I)) { predicate = FCmpInst::Predicate(FC->getPredicate()); + } + SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -6666,29 +6705,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.setRoot(Res.getValue(0)); return; case Intrinsic::is_fpclass: { - const DataLayout DLayout = DAG.getDataLayout(); - EVT DestVT = TLI.getValueType(DLayout, I.getType()); - EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType()); - FPClassTest Test = static_cast( - cast(I.getArgOperand(1))->getZExtValue()); - MachineFunction &MF = DAG.getMachineFunction(); - const Function &F = MF.getFunction(); - SDValue Op = getValue(I.getArgOperand(0)); - SDNodeFlags Flags; - Flags.setNoFPExcept( - !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP)); - // If ISD::IS_FPCLASS should be expanded, do it right now, because the - // expansion can use illegal types. Making expansion early allows - // legalizing these types prior to selection. - if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) { - SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG); - setValue(&I, Result); - return; - } - - SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32); - SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags); - setValue(&I, V); + setValue(&I, + lowerIsFpClass( + I.getArgOperand(0), + static_cast( + cast(I.getArgOperand(1))->getZExtValue()))); return; } case Intrinsic::get_fpenv: { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 47657313cb6a3..dfc9369117c79 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -700,6 +700,7 @@ class SelectionDAGBuilder { MCSymbol *&BeginLabel); SDValue lowerEndEH(SDValue Chain, const InvokeInst *II, const BasicBlock *EHPadBB, MCSymbol *BeginLabel); + SDValue lowerIsFpClass(Value *ClassVal, FPClassTest ClassTest); }; /// This struct represents the registers (physical or virtual) diff --git a/llvm/test/CodeGen/AArch64/fpclass-test.ll b/llvm/test/CodeGen/AArch64/fpclass-test.ll new file mode 100644 index 0000000000000..e058c724d9e42 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fpclass-test.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s + +define i1 @test_is_inf_or_nan(double %arg) { +; CHECK-LABEL: test_is_inf_or_nan: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov x8, #9218868437227405311 // =0x7fefffffffffffff +; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp ueq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf_or_nan(double %arg) { +; CHECK-LABEL: test_is_not_inf_or_nan: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 +; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp one double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_inf(double %arg) { +; CHECK-LABEL: test_is_inf: +; CHECK: // %bb.0: +; CHECK-NEXT: fabs d0, d0 +; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp oeq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf(double %arg) { +; CHECK-LABEL: test_is_not_inf: +; CHECK: // %bb.0: +; CHECK-NEXT: fabs d0, d0 +; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp une double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define @test_vec_is_inf_or_nan( %arg) { +; CHECK-LABEL: test_vec_is_inf_or_nan: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, #0x7ff0000000000000 +; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NEXT: cmpge p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret + %abs = tail call @llvm.fabs.nxv2f64( %arg) + %ret = fcmp ueq %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_not_inf_or_nan( %arg) { +; CHECK-LABEL: test_vec_is_not_inf_or_nan: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, #0x7ff0000000000000 +; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NEXT: cmpgt p0.d, p0/z, z1.d, z0.d +; CHECK-NEXT: ret + %abs = tail call @llvm.fabs.nxv2f64( %arg) + %ret = fcmp one %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_inf( %arg) { +; CHECK-LABEL: test_vec_is_inf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: fabs z0.d, p0/m, z0.d +; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret + %abs = tail call @llvm.fabs.nxv2f64( %arg) + %ret = fcmp oeq %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_not_inf( %arg) { +; CHECK-LABEL: test_vec_is_not_inf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: fabs z0.d, p0/m, z0.d +; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ret + %abs = tail call @llvm.fabs.nxv2f64( %arg) + %ret = fcmp une %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define i1 @test_fp128_is_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_inf_or_nan: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9223090561878065151 // =0x7ffeffffffffffff +; CHECK-NEXT: str q0, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr x9, [sp, #8] +; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_not_inf_or_nan: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9223090561878065152 // =0x7fff000000000000 +; CHECK-NEXT: str q0, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr x9, [sp, #8] +; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_inf(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_inf: +; CHECK: // %bb.0: +; CHECK-NEXT: str q0, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldp x9, x8, [sp], #16 +; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff +; CHECK-NEXT: eor x8, x8, #0x7fff000000000000 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_not_inf: +; CHECK: // %bb.0: +; CHECK-NEXT: str q0, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldp x9, x8, [sp], #16 +; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff +; CHECK-NEXT: eor x8, x8, #0x7fff000000000000 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll index 458bd7eeba16c..834417b98743a 100644 --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -58,22 +58,14 @@ define i32 @replace_isinf_call_f64(double %x) { define i32 @replace_isinf_call_f128(fp128 %x) { ; CHECK-LABEL: replace_isinf_call_f128: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldrb w8, [sp, #15] -; CHECK-NEXT: and w8, w8, #0x7f -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: ldr q0, [sp] -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: bl __eqtf2 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldp x9, x8, [sp], #16 +; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff +; CHECK-NEXT: eor x8, x8, #0x7fff000000000000 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %abs = tail call fp128 @llvm.fabs.f128(fp128 %x) %cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll index 6fa7df913812a..ed9ce4d62383b 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll @@ -61,10 +61,10 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture % ; SI-NEXT: s_load_dword s0, s[0:1], 0xb ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000 +; SI-NEXT: v_mov_b32_e32 v0, 0x207 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cmp_nlg_f32_e64 s[0:1], |s0|, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -72,11 +72,11 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture % ; VI: ; %bb.0: ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000 +; VI-NEXT: v_mov_b32_e32 v0, 0x207 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_nlg_f32_e64 s[2:3], |s2|, v0 +; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -88,7 +88,7 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture % ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_nlg_f32_e64 s2, 0x7f800000, |s2| +; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x207 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -143,25 +143,29 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o ; SI-LABEL: test_isfinite_pattern_0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb +; SI-NEXT: s_load_dword s2, s[0:1], 0xb ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x1f8 +; SI-NEXT: v_mov_b32_e32 v0, 0x1fb ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2 +; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 +; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isfinite_pattern_0: ; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 +; VI-NEXT: v_mov_b32_e32 v0, 0x1fb ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 +; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4 +; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 +; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -173,8 +177,10 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2 +; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1fb +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s2, s3, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_nop 0 @@ -349,13 +355,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000 +; SI-NEXT: v_mov_b32_e32 v0, 0x1fb ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s0 ; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2 -; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0 -; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; SI-NEXT: v_cmp_class_f32_e32 vcc, s3, v0 +; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -363,11 +369,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur ; VI-LABEL: test_isfinite_not_pattern_2: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000 +; VI-NEXT: v_mov_b32_e32 v0, 0x1fb ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_cmp_o_f32_e64 s[4:5], s2, s2 -; VI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0 -; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3] +; VI-NEXT: v_cmp_class_f32_e32 vcc, s3, v0 +; VI-NEXT: s_and_b64 s[2:3], s[4:5], vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] ; VI-NEXT: v_mov_b32_e32 v1, s1 @@ -380,7 +386,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_cmp_o_f32_e64 s2, s2, s2 -; GFX11-NEXT: v_cmp_neq_f32_e64 s3, 0x7f800000, |s3| +; GFX11-NEXT: v_cmp_class_f32_e64 s3, s3, 0x1fb ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s2, s2, s3 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 @@ -405,11 +411,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur ; SI-NEXT: s_load_dword s2, s[0:1], 0xb ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000 +; SI-NEXT: v_mov_b32_e32 v0, 0x1fb ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_cmp_u_f32_e64 s[0:1], s2, s2 -; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s2|, v0 -; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 +; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -418,11 +424,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur ; VI: ; %bb.0: ; VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000 +; VI-NEXT: v_mov_b32_e32 v0, 0x1fb ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_cmp_u_f32_e64 s[2:3], s4, s4 -; VI-NEXT: v_cmp_neq_f32_e64 s[4:5], |s4|, v0 -; VI-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5] +; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 +; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] ; VI-NEXT: v_mov_b32_e32 v1, s1 @@ -437,7 +443,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_cmp_u_f32_e64 s3, s2, s2 -; GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x7f800000, |s2| +; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1fb ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s2, s3, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 @@ -458,25 +464,29 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o ; SI-LABEL: test_isfinite_pattern_4: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb +; SI-NEXT: s_load_dword s2, s[0:1], 0xb ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, 0x1f8 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2 +; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 +; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isfinite_pattern_4: ; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 +; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4 +; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 +; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -488,8 +498,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2 ; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s2, s3, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_nop 0 @@ -508,25 +520,29 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1) ; SI-LABEL: test_isfinite_pattern_4_commute_and: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb +; SI-NEXT: s_load_dword s2, s[0:1], 0xb ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, 0x1f8 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2 +; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 +; SI-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isfinite_pattern_4_commute_and: ; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 +; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4 +; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 +; VI-NEXT: s_and_b64 s[2:3], vcc, s[2:3] ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -538,8 +554,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1) ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2 ; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s2, s2, s3 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_nop 0 @@ -618,16 +636,16 @@ define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrsp define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %out, half %x) #0 { ; SI-LABEL: test_isinf_pattern_f16: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s1, 0x7f800000 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e64 v0, |s0| -; SI-NEXT: v_cmp_eq_f32_e32 vcc, s1, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_and_b32 s4, s4, 0x7fff +; SI-NEXT: s_cmpk_eq_i32 s4, 0x7c00 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isinf_pattern_f16: @@ -667,27 +685,32 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocapture %out, half %x) #0 { ; SI-LABEL: test_isfinite_pattern_0_f16: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_movk_i32 s1, 0x1f8 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 -; SI-NEXT: v_cmp_class_f32_e64 s[0:1], v0, s1 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s4 +; SI-NEXT: s_and_b32 s4, s4, 0x7fff +; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; SI-NEXT: s_cmpk_lg_i32 s4, 0x7c00 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isfinite_pattern_0_f16: ; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 +; VI-NEXT: v_mov_b32_e32 v0, 0x1fb ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_class_f16_e32 vcc, s2, v0 +; VI-NEXT: v_cmp_o_f16_e64 s[2:3], s4, s4 +; VI-NEXT: v_cmp_class_f16_e32 vcc, s4, v0 +; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -699,8 +722,10 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x1f8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cmp_o_f16_e64 s3, s2, s2 +; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x1fb +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s2, s3, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_nop 0 @@ -718,27 +743,32 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocapture %out, half %x) #0 { ; SI-LABEL: test_isfinite_pattern_4_f16: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_movk_i32 s1, 0x1f8 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 -; SI-NEXT: v_cmp_class_f32_e64 s[0:1], v0, s1 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s4 +; SI-NEXT: s_and_b32 s4, s4, 0x7fff +; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; SI-NEXT: s_cmpk_lt_i32 s4, 0x7c00 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isfinite_pattern_4_f16: ; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_class_f16_e32 vcc, s2, v0 +; VI-NEXT: v_cmp_o_f16_e64 s[2:3], s4, s4 +; VI-NEXT: v_cmp_class_f16_e32 vcc, s4, v0 +; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -750,8 +780,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocaptur ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_o_f16_e64 s3, s2, s2 ; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x1f8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s2, s3, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index 3a0b8259d0849..d65dff7bc4462 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -53,14 +53,14 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly ; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3 ; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 ; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX6-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX6-NEXT: v_mov_b32_e32 v5, 0x204 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc ; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -68,15 +68,15 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly ; GFX7-LABEL: safe_math_fract_f32: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x204 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_fract_f32_e32 v4, v0 -; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 ; GFX7-NEXT: v_floor_f32_e32 v3, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc ; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -84,11 +84,11 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly ; GFX8-LABEL: safe_math_fract_f32: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX8-NEXT: v_mov_b32_e32 v5, 0x204 ; GFX8-NEXT: v_fract_f32_e32 v4, v0 -; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4 +; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 ; GFX8-NEXT: v_floor_f32_e32 v3, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc ; GFX8-NEXT: global_store_dword v[1:2], v3, off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -97,10 +97,10 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fract_f32_e32 v3, v0 -; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 ; GFX11-NEXT: v_floor_f32_e32 v4, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v0, v3, 0, s0 ; GFX11-NEXT: global_store_b32 v[1:2], v4, off ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -210,14 +210,14 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_floor_f32_e32 v3, v0 ; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX6-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX6-NEXT: v_mov_b32_e32 v5, 0x204 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 -; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc ; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -227,14 +227,14 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_floor_f32_e32 v3, v0 ; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x204 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 -; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 -; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc ; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -244,10 +244,10 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_floor_f32_e32 v3, v0 ; GFX8-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX8-NEXT: v_mov_b32_e32 v5, 0x204 ; GFX8-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 -; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4 -; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc ; GFX8-NEXT: global_store_dword v[1:2], v3, off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -256,12 +256,12 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_floor_f32_e32 v3, v0 -; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3 ; GFX11-NEXT: global_store_b32 v[1:2], v3, off ; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 -; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call float @llvm.floor.f32(float %x) @@ -1705,16 +1705,16 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX6-NEXT: v_min_f32_e32 v7, 0x3f7fffff, v7 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc ; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX6-NEXT: s_movk_i32 s10, 0x204 +; GFX6-NEXT: v_mov_b32_e32 v8, 0x204 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc -; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v0, s10 +; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 ; GFX6-NEXT: s_mov_b32 s6, 0 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, s[8:9] -; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v1, s10 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, vcc +; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, s[8:9] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -1722,19 +1722,19 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX7-LABEL: safe_math_fract_v2f32: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0x204 ; GFX7-NEXT: v_fract_f32_e32 v6, v0 -; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_floor_f32_e32 v4, v0 ; GFX7-NEXT: v_fract_f32_e32 v7, v1 -; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v1|, s8 +; GFX7-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 ; GFX7-NEXT: v_floor_f32_e32 v5, v1 -; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc ; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -1742,15 +1742,15 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX8-LABEL: safe_math_fract_v2f32: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX8-NEXT: v_mov_b32_e32 v8, 0x204 ; GFX8-NEXT: v_fract_f32_e32 v6, v0 -; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4 +; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 ; GFX8-NEXT: v_floor_f32_e32 v4, v0 ; GFX8-NEXT: v_fract_f32_e32 v7, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v1|, s4 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 ; GFX8-NEXT: v_floor_f32_e32 v5, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc ; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -1759,14 +1759,15 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fract_f32_e32 v6, v0 -; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 ; GFX11-NEXT: v_fract_f32_e32 v7, v1 ; GFX11-NEXT: v_floor_f32_e32 v4, v0 ; GFX11-NEXT: v_floor_f32_e32 v5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo -; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0 +; GFX11-NEXT: v_cmp_class_f32_e64 s0, v1, 0x204 ; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off -; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v7, 0, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x) @@ -1823,17 +1824,16 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon ; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5] ; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] ; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9] -; GFX6-NEXT: s_mov_b32 s8, 0 -; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000 +; GFX6-NEXT: v_mov_b32_e32 v8, 0x204 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc -; GFX6-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9] +; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -1841,17 +1841,16 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon ; GFX7-LABEL: safe_math_fract_f64: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s4, 0 -; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX7-NEXT: v_mov_b32_e32 v6, 0x204 ; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1] -; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX7-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6 ; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1] ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 -; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc ; GFX7-NEXT: buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -1859,13 +1858,12 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon ; GFX8-LABEL: safe_math_fract_f64: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0 -; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX8-NEXT: v_mov_b32_e32 v6, 0x204 ; GFX8-NEXT: v_fract_f64_e32 v[4:5], v[0:1] -; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX8-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6 ; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1] -; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc ; GFX8-NEXT: global_store_dwordx2 v[2:3], v[6:7], off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -1874,10 +1872,11 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1] -; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]| +; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204 ; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0, s0 ; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -1937,21 +1936,22 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly % ; GFX6: ; %bb.0: ; %entry ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX6-NEXT: s_movk_i32 s8, 0x7c00 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX6-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_floor_f32_e32 v3, v0 -; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4 -; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 -; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc -; GFX6-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64 +; GFX6-NEXT: v_floor_f32_e32 v4, v3 +; GFX6-NEXT: v_sub_f32_e32 v5, v3, v4 +; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX6-NEXT: v_min_f32_e32 v5, 0x3f7fe000, v5 +; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: buffer_store_short v4, v[1:2], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -1959,32 +1959,33 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly % ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX7-NEXT: s_movk_i32 s8, 0x7c00 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 -; GFX7-NEXT: v_floor_f32_e32 v3, v0 -; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4 -; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 -; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc -; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64 +; GFX7-NEXT: v_floor_f32_e32 v4, v3 +; GFX7-NEXT: v_sub_f32_e32 v5, v3, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_min_f32_e32 v5, 0x3f7fe000, v5 +; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX7-NEXT: buffer_store_short v4, v[1:2], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: safe_math_fract_f16: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_movk_i32 s4, 0x7c00 +; GFX8-NEXT: v_mov_b32_e32 v5, 0x204 ; GFX8-NEXT: v_fract_f16_e32 v4, v0 -; GFX8-NEXT: v_cmp_neq_f16_e64 vcc, |v0|, s4 +; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v5 ; GFX8-NEXT: v_floor_f16_e32 v3, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc ; GFX8-NEXT: global_store_short v[1:2], v3, off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -1993,10 +1994,10 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly % ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fract_f16_e32 v3, v0 -; GFX11-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0| +; GFX11-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204 ; GFX11-NEXT: v_floor_f16_e32 v4, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v0, v3, 0, s0 ; GFX11-NEXT: global_store_b16 v[1:2], v4, off ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -2062,12 +2063,12 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX6-NEXT: s_movk_i32 s8, 0x7c00 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; GFX6-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX6-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX6-NEXT: v_floor_f32_e32 v6, v4 ; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v6 ; GFX6-NEXT: v_floor_f32_e32 v8, v5 @@ -2080,10 +2081,10 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX6-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc ; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc -; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, s8, v0 +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc -; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, s8, v1 +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 @@ -2098,12 +2099,12 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX7-NEXT: s_movk_i32 s8, 0x7c00 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX7-NEXT: v_floor_f32_e32 v6, v4 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v6 ; GFX7-NEXT: v_floor_f32_e32 v8, v5 @@ -2116,10 +2117,10 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc ; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc -; GFX7-NEXT: v_cmp_neq_f32_e32 vcc, s8, v0 +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc -; GFX7-NEXT: v_cmp_neq_f32_e32 vcc, s8, v1 +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v1 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 @@ -2133,16 +2134,16 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: s_movk_i32 s6, 0x204 +; GFX8-NEXT: v_mov_b32_e32 v7, 0x204 ; GFX8-NEXT: v_floor_f16_e32 v4, v3 ; GFX8-NEXT: v_floor_f16_e32 v5, v0 ; GFX8-NEXT: v_fract_f16_e32 v6, v3 -; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v3, s6 +; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v3, v7 ; GFX8-NEXT: v_pack_b32_f16 v4, v5, v4 ; GFX8-NEXT: v_fract_f16_e32 v5, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[4:5] -; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, vcc +; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, vcc ; GFX8-NEXT: v_pack_b32_f16 v0, v0, v3 ; GFX8-NEXT: global_store_dword v[1:2], v4, off ; GFX8-NEXT: s_waitcnt vmcnt(0) @@ -2237,19 +2238,19 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc ; GFX6-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc ; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] -; GFX6-NEXT: s_movk_i32 s10, 0x204 -; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s10 +; GFX6-NEXT: v_mov_b32_e32 v14, 0x204 ; GFX6-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, s[8:9] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, s[8:9] -; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[2:3], s10 +; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v14 ; GFX6-NEXT: s_mov_b32 s6, 0 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, vcc +; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v14 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_cndmask_b32_e64 v2, v10, 0, s[8:9] -; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, s[8:9] +; GFX6-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, vcc ; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -2257,39 +2258,39 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc ; GFX7-LABEL: safe_math_fract_v2f64: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_movk_i32 s4, 0x204 +; GFX7-NEXT: v_mov_b32_e32 v6, 0x204 ; GFX7-NEXT: v_fract_f64_e32 v[10:11], v[0:1] -; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s4 +; GFX7-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6 ; GFX7-NEXT: v_fract_f64_e32 v[12:13], v[2:3] -; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[2:3], s4 +; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v6 ; GFX7-NEXT: v_floor_f64_e32 v[8:9], v[2:3] ; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1] -; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: s_mov_b32 s4, s6 -; GFX7-NEXT: s_mov_b32 s5, s6 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[10:11] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[10:11] -; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64 +; GFX7-NEXT: s_mov_b32 s10, 0 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s8, s10 +; GFX7-NEXT: s_mov_b32 s9, s10 +; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[4:5] +; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[8:11], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: safe_math_fract_v2f64: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_movk_i32 s6, 0x204 +; GFX8-NEXT: v_mov_b32_e32 v6, 0x204 ; GFX8-NEXT: v_fract_f64_e32 v[10:11], v[0:1] -; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s6 +; GFX8-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6 ; GFX8-NEXT: v_fract_f64_e32 v[12:13], v[2:3] -; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], s6 +; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v6 ; GFX8-NEXT: v_floor_f64_e32 v[8:9], v[2:3] ; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[4:5] ; GFX8-NEXT: global_store_dwordx4 v[4:5], v[6:9], off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/RISCV/fpclass-test.ll b/llvm/test/CodeGen/RISCV/fpclass-test.ll new file mode 100644 index 0000000000000..b38e0d28941c9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fpclass-test.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv64 -mattr=+d,+v < %s | FileCheck %s + +define i1 @test_is_inf_or_nan(double %arg) { +; CHECK-LABEL: test_is_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: fclass.d a0, fa0 +; CHECK-NEXT: andi a0, a0, 897 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp ueq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf_or_nan(double %arg) { +; CHECK-LABEL: test_is_not_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: fclass.d a0, fa0 +; CHECK-NEXT: andi a0, a0, 126 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp one double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_inf(double %arg) { +; CHECK-LABEL: test_is_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: fclass.d a0, fa0 +; CHECK-NEXT: andi a0, a0, 129 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp oeq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf(double %arg) { +; CHECK-LABEL: test_is_not_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: fclass.d a0, fa0 +; CHECK-NEXT: andi a0, a0, 894 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp une double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define @test_vec_is_inf_or_nan( %arg) { +; CHECK-LABEL: test_vec_is_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: li a0, 897 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %abs = tail call @llvm.fabs.nxv4f64( %arg) + %ret = fcmp ueq %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_not_inf_or_nan( %arg) { +; CHECK-LABEL: test_vec_is_not_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: li a0, 126 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %abs = tail call @llvm.fabs.nxv4f64( %arg) + %ret = fcmp one %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_inf( %arg) { +; CHECK-LABEL: test_vec_is_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: li a0, 129 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %abs = tail call @llvm.fabs.nxv4f64( %arg) + %ret = fcmp oeq %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_not_inf( %arg) { +; CHECK-LABEL: test_vec_is_not_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: li a0, 894 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %abs = tail call @llvm.fabs.nxv4f64( %arg) + %ret = fcmp une %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define i1 @test_fp128_is_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: lui a0, 32767 +; CHECK-NEXT: slli a0, a0, 36 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: slt a0, a0, a1 +; CHECK-NEXT: ret + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_not_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: lui a0, 32767 +; CHECK-NEXT: slli a0, a0, 36 +; CHECK-NEXT: slt a0, a1, a0 +; CHECK-NEXT: ret + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_inf(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: lui a2, 32767 +; CHECK-NEXT: slli a2, a2, 36 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_not_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: lui a2, 32767 +; CHECK-NEXT: slli a2, a2, 36 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: ret + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/X86/compare-inf.ll b/llvm/test/CodeGen/X86/compare-inf.ll index 5beec4d76e22e..499ab98ab12cb 100644 --- a/llvm/test/CodeGen/X86/compare-inf.ll +++ b/llvm/test/CodeGen/X86/compare-inf.ll @@ -1,14 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s -; Convert oeq and une to ole/oge/ule/uge when comparing with infinity -; and negative infinity, because those are more efficient on x86. - declare void @f() nounwind -; CHECK-LABEL: oeq_inff: -; CHECK: ucomiss -; CHECK: jb define void @oeq_inff(float %x) nounwind { +; CHECK-LABEL: oeq_inff: +; CHECK: # %bb.0: +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: jb .LBB0_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB0_2: # %false +; CHECK-NEXT: retq %t0 = fcmp oeq float %x, 0x7FF0000000000000 br i1 %t0, label %true, label %false @@ -20,10 +25,17 @@ false: ret void } -; CHECK-LABEL: oeq_inf: -; CHECK: ucomisd -; CHECK: jb define void @oeq_inf(double %x) nounwind { +; CHECK-LABEL: oeq_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: jb .LBB1_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB1_2: # %false +; CHECK-NEXT: retq %t0 = fcmp oeq double %x, 0x7FF0000000000000 br i1 %t0, label %true, label %false @@ -35,10 +47,17 @@ false: ret void } -; CHECK-LABEL: une_inff: -; CHECK: ucomiss -; CHECK: jae define void @une_inff(float %x) nounwind { +; CHECK-LABEL: une_inff: +; CHECK: # %bb.0: +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: jae .LBB2_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB2_2: # %false +; CHECK-NEXT: retq %t0 = fcmp une float %x, 0x7FF0000000000000 br i1 %t0, label %true, label %false @@ -50,10 +69,17 @@ false: ret void } -; CHECK-LABEL: une_inf: -; CHECK: ucomisd -; CHECK: jae define void @une_inf(double %x) nounwind { +; CHECK-LABEL: une_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: jae .LBB3_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB3_2: # %false +; CHECK-NEXT: retq %t0 = fcmp une double %x, 0x7FF0000000000000 br i1 %t0, label %true, label %false @@ -65,10 +91,18 @@ false: ret void } -; CHECK-LABEL: oeq_neg_inff: -; CHECK: ucomiss -; CHECK: jb define void @oeq_neg_inff(float %x) nounwind { +; CHECK-LABEL: oeq_neg_inff: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jb .LBB4_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB4_2: # %false +; CHECK-NEXT: retq %t0 = fcmp oeq float %x, 0xFFF0000000000000 br i1 %t0, label %true, label %false @@ -80,10 +114,18 @@ false: ret void } -; CHECK-LABEL: oeq_neg_inf: -; CHECK: ucomisd -; CHECK: jb define void @oeq_neg_inf(double %x) nounwind { +; CHECK-LABEL: oeq_neg_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [-Inf,0.0E+0] +; CHECK-NEXT: ucomisd %xmm0, %xmm1 +; CHECK-NEXT: jb .LBB5_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB5_2: # %false +; CHECK-NEXT: retq %t0 = fcmp oeq double %x, 0xFFF0000000000000 br i1 %t0, label %true, label %false @@ -95,10 +137,18 @@ false: ret void } -; CHECK-LABEL: une_neg_inff: -; CHECK: ucomiss -; CHECK: jae define void @une_neg_inff(float %x) nounwind { +; CHECK-LABEL: une_neg_inff: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jae .LBB6_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB6_2: # %false +; CHECK-NEXT: retq %t0 = fcmp une float %x, 0xFFF0000000000000 br i1 %t0, label %true, label %false @@ -110,10 +160,18 @@ false: ret void } -; CHECK-LABEL: une_neg_inf: -; CHECK: ucomisd -; CHECK: jae define void @une_neg_inf(double %x) nounwind { +; CHECK-LABEL: une_neg_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm1 = [-Inf,0.0E+0] +; CHECK-NEXT: ucomisd %xmm0, %xmm1 +; CHECK-NEXT: jae .LBB7_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB7_2: # %false +; CHECK-NEXT: retq %t0 = fcmp une double %x, 0xFFF0000000000000 br i1 %t0, label %true, label %false diff --git a/llvm/test/CodeGen/X86/fpclass-test.ll b/llvm/test/CodeGen/X86/fpclass-test.ll new file mode 100644 index 0000000000000..bf3e191efd24a --- /dev/null +++ b/llvm/test/CodeGen/X86/fpclass-test.ll @@ -0,0 +1,268 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s + +define i1 @test_is_inf_or_nan(double %arg) { +; CHECK-LABEL: test_is_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovq %xmm0, %rax +; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: andq %rax, %rcx +; CHECK-NEXT: movabsq $9218868437227405311, %rax # imm = 0x7FEFFFFFFFFFFFFF +; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: setg %al +; CHECK-NEXT: retq + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp ueq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf_or_nan(double %arg) { +; CHECK-LABEL: test_is_not_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovq %xmm0, %rax +; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: andq %rax, %rcx +; CHECK-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 +; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: setl %al +; CHECK-NEXT: retq + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp one double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_inf(double %arg) { +; CHECK-LABEL: test_is_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovq %xmm0, %rax +; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: andq %rax, %rcx +; CHECK-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 +; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp oeq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf(double %arg) { +; CHECK-LABEL: test_is_not_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovq %xmm0, %rax +; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: andq %rax, %rcx +; CHECK-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000 +; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp une double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define <4 x i1> @test_vec_is_inf_or_nan(<4 x double> %arg) { +; CHECK-LABEL: test_vec_is_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] +; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9218868437227405311,9218868437227405311,9218868437227405311,9218868437227405311] +; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg) + %ret = fcmp ueq <4 x double> %abs, splat (double 0x7FF0000000000000) + ret <4 x i1> %ret +} + +define <4 x i1> @test_vec_is_not_inf_or_nan(<4 x double> %arg) { +; CHECK-LABEL: test_vec_is_not_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] +; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9218868437227405312,9218868437227405312,9218868437227405312,9218868437227405312] +; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg) + %ret = fcmp one <4 x double> %abs, splat (double 0x7FF0000000000000) + ret <4 x i1> %ret +} + +define <4 x i1> @test_vec_is_inf(<4 x double> %arg) { +; CHECK-LABEL: test_vec_is_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] +; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9218868437227405312,9218868437227405312,9218868437227405312,9218868437227405312] +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg) + %ret = fcmp oeq <4 x double> %abs, splat (double 0x7FF0000000000000) + ret <4 x i1> %ret +} + +define <4 x i1> @test_vec_is_not_inf(<4 x double> %arg) { +; CHECK-LABEL: test_vec_is_not_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807] +; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9218868437227405312,9218868437227405312,9218868437227405312,9218868437227405312] +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg) + %ret = fcmp une <4 x double> %abs, splat (double 0x7FF0000000000000) + ret <4 x i1> %ret +} + +define i1 @test_fp128_is_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movabsq $9223090561878065151, %rcx # imm = 0x7FFEFFFFFFFFFFFF +; CHECK-NEXT: cmpq %rcx, %rax +; CHECK-NEXT: setg %al +; CHECK-NEXT: retq + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_not_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000 +; CHECK-NEXT: cmpq %rcx, %rax +; CHECK-NEXT: setl %al +; CHECK-NEXT: retq + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_inf(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf] +; CHECK-NEXT: callq __eqtf2@PLT +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: sete %al +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf(fp128 %arg) { +; CHECK-LABEL: test_fp128_is_not_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf] +; CHECK-NEXT: callq __netf2@PLT +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_x86_fp80_is_inf_or_nan(x86_fp80 %arg) { +; CHECK-LABEL: test_x86_fp80_is_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: btq $63, {{[0-9]+}}(%rsp) +; CHECK-NEXT: setae %cl +; CHECK-NEXT: andl $32767, %eax # imm = 0x7FFF +; CHECK-NEXT: leal -1(%rax), %edx +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: cmpl $32766, %edx # imm = 0x7FFE +; CHECK-NEXT: setae %dl +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: setne %al +; CHECK-NEXT: andb %dl, %al +; CHECK-NEXT: retq + %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg) + %ret = fcmp ueq x86_fp80 %abs, 0xK7FFF8000000000000000 + ret i1 %ret +} + +define i1 @test_x86_fp80_is_not_inf_or_nan(x86_fp80 %arg) { +; CHECK-LABEL: test_x86_fp80_is_not_inf_or_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: shrq $63, %rcx +; CHECK-NEXT: andl $32767, %eax # imm = 0x7FFF +; CHECK-NEXT: leal -1(%rax), %edx +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: cmpl $32766, %edx # imm = 0x7FFE +; CHECK-NEXT: setb %dl +; CHECK-NEXT: andb %cl, %dl +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: sete %al +; CHECK-NEXT: orb %dl, %al +; CHECK-NEXT: retq + %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg) + %ret = fcmp one x86_fp80 %abs, 0xK7FFF8000000000000000 + ret i1 %ret +} + +define i1 @test_x86_fp80_is_inf(x86_fp80 %arg) { +; CHECK-LABEL: test_x86_fp80_is_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; CHECK-NEXT: xorq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: andl $32767, %eax # imm = 0x7FFF +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg) + %ret = fcmp oeq x86_fp80 %abs, 0xK7FFF8000000000000000 + ret i1 %ret +} + +define i1 @test_x86_fp80_is_not_inf(x86_fp80 %arg) { +; CHECK-LABEL: test_x86_fp80_is_not_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; CHECK-NEXT: xorq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: andl $32767, %eax # imm = 0x7FFF +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg) + %ret = fcmp une x86_fp80 %abs, 0xK7FFF8000000000000000 + ret i1 %ret +}