diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 59a0c64d3c9f2..47f7981a24088 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1943,6 +1943,39 @@ static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) { return false; } +static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL) { + FCmpInst *FCmp = dyn_cast(Cmp); + if (!FCmp) + return false; + + // Don't fold if the target offers free fabs and the predicate is legal. + EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType()); + if (TLI.isFAbsFree(VT) && + TLI.isCondCodeLegal(getFCmpCondCode(FCmp->getPredicate()), + VT.getSimpleVT())) + return false; + + // Reverse the canonicalization if it is a FP class test + auto ShouldReverseTransform = [](FPClassTest ClassTest) { + return ClassTest == fcInf || ClassTest == (fcInf | fcNan); + }; + auto [ClassVal, ClassTest] = + fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(), + FCmp->getOperand(0), FCmp->getOperand(1)); + if (!ClassVal) + return false; + + if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest)) + return false; + + IRBuilder<> Builder(Cmp); + Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest); + Cmp->replaceAllUsesWith(IsFPClass); + RecursivelyDeleteTriviallyDeadInstructions(Cmp); + return true; +} + bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -1959,6 +1992,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (swapICmpOperandsToExposeCSEOpportunities(Cmp)) return true; + if (foldFCmpToFPClassTest(Cmp, *TLI, *DL)) + return true; + return false; } diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll index 458bd7eeba16c..834417b98743a 100644 --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -58,22 +58,14 @@ define i32 @replace_isinf_call_f64(double %x) { define i32 @replace_isinf_call_f128(fp128 %x) { ; CHECK-LABEL: replace_isinf_call_f128: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldrb w8, [sp, #15] -; CHECK-NEXT: and w8, w8, #0x7f -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: ldr q0, [sp] -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: bl __eqtf2 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: str q0, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldp x9, x8, [sp], #16 +; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff +; CHECK-NEXT: eor x8, x8, #0x7fff000000000000 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %abs = tail call fp128 @llvm.fabs.f128(fp128 %x) %cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll index 6fa7df913812a..18d2e52e8f900 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll @@ -618,16 +618,16 @@ define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrsp define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %out, half %x) #0 { ; SI-LABEL: test_isinf_pattern_f16: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s1, 0x7f800000 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e64 v0, |s0| -; SI-NEXT: v_cmp_eq_f32_e32 vcc, s1, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_and_b32 s4, s4, 0x7fff +; SI-NEXT: s_cmpk_eq_i32 s4, 0x7c00 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isinf_pattern_f16: @@ -667,16 +667,19 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocapture %out, half %x) #0 { ; SI-LABEL: test_isfinite_pattern_0_f16: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_movk_i32 s1, 0x1f8 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 -; SI-NEXT: v_cmp_class_f32_e64 s[0:1], v0, s1 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s4 +; SI-NEXT: s_and_b32 s4, s4, 0x7fff +; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; SI-NEXT: s_cmpk_lg_i32 s4, 0x7c00 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isfinite_pattern_0_f16: @@ -718,16 +721,19 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocapture %out, half %x) #0 { ; SI-LABEL: test_isfinite_pattern_4_f16: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dword s0, s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_movk_i32 s1, 0x1f8 +; SI-NEXT: s_load_dword s4, s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 -; SI-NEXT: v_cmp_class_f32_e64 s[0:1], v0, s1 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s4 +; SI-NEXT: s_and_b32 s4, s4, 0x7fff +; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; SI-NEXT: s_cmpk_lt_i32 s4, 0x7c00 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 +; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_isfinite_pattern_4_f16: diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index 3a0b8259d0849..e361aa4db2aa9 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -1705,16 +1705,16 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX6-NEXT: v_min_f32_e32 v7, 0x3f7fffff, v7 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc ; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX6-NEXT: s_movk_i32 s10, 0x204 +; GFX6-NEXT: v_mov_b32_e32 v8, 0x204 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc -; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v0, s10 +; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 ; GFX6-NEXT: s_mov_b32 s6, 0 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, s[8:9] -; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v1, s10 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, vcc +; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, s[8:9] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -1722,19 +1722,19 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX7-LABEL: safe_math_fract_v2f32: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0x204 ; GFX7-NEXT: v_fract_f32_e32 v6, v0 -; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 +; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_floor_f32_e32 v4, v0 ; GFX7-NEXT: v_fract_f32_e32 v7, v1 -; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v1|, s8 +; GFX7-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 ; GFX7-NEXT: v_floor_f32_e32 v5, v1 -; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc ; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -1742,15 +1742,15 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX8-LABEL: safe_math_fract_v2f32: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX8-NEXT: v_mov_b32_e32 v8, 0x204 ; GFX8-NEXT: v_fract_f32_e32 v6, v0 -; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4 +; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 ; GFX8-NEXT: v_floor_f32_e32 v4, v0 ; GFX8-NEXT: v_fract_f32_e32 v7, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v1|, s4 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 ; GFX8-NEXT: v_floor_f32_e32 v5, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc ; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -1759,14 +1759,15 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_fract_f32_e32 v6, v0 -; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 ; GFX11-NEXT: v_fract_f32_e32 v7, v1 ; GFX11-NEXT: v_floor_f32_e32 v4, v0 ; GFX11-NEXT: v_floor_f32_e32 v5, v1 -; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo -; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v1| +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0 +; GFX11-NEXT: v_cmp_class_f32_e64 s0, v1, 0x204 ; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off -; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v7, 0, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x) @@ -1937,21 +1938,22 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly % ; GFX6: ; %bb.0: ; %entry ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX6-NEXT: s_movk_i32 s8, 0x7c00 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX6-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_floor_f32_e32 v3, v0 -; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4 -; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 -; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc -; GFX6-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64 +; GFX6-NEXT: v_floor_f32_e32 v4, v3 +; GFX6-NEXT: v_sub_f32_e32 v5, v3, v4 +; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX6-NEXT: v_min_f32_e32 v5, 0x3f7fe000, v5 +; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX6-NEXT: buffer_store_short v4, v[1:2], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -1959,21 +1961,22 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly % ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX7-NEXT: s_movk_i32 s8, 0x7c00 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 -; GFX7-NEXT: v_floor_f32_e32 v3, v0 -; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4 -; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 -; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc -; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64 +; GFX7-NEXT: v_floor_f32_e32 v4, v3 +; GFX7-NEXT: v_sub_f32_e32 v5, v3, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_min_f32_e32 v5, 0x3f7fe000, v5 +; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc +; GFX7-NEXT: buffer_store_short v4, v[1:2], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -2062,12 +2065,12 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX6-NEXT: s_movk_i32 s8, 0x7c00 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; GFX6-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX6-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX6-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX6-NEXT: v_floor_f32_e32 v6, v4 ; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v6 ; GFX6-NEXT: v_floor_f32_e32 v8, v5 @@ -2080,10 +2083,10 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX6-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc ; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc -; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, s8, v0 +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc -; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, s8, v1 +; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 @@ -2098,12 +2101,12 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 +; GFX7-NEXT: s_movk_i32 s8, 0x7c00 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1| +; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff, v1 ; GFX7-NEXT: v_floor_f32_e32 v6, v4 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v6 ; GFX7-NEXT: v_floor_f32_e32 v8, v5 @@ -2116,10 +2119,10 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc ; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc -; GFX7-NEXT: v_cmp_neq_f32_e32 vcc, s8, v0 +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc -; GFX7-NEXT: v_cmp_neq_f32_e32 vcc, s8, v1 +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v1 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 @@ -2133,16 +2136,16 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: s_movk_i32 s6, 0x204 +; GFX8-NEXT: v_mov_b32_e32 v7, 0x204 ; GFX8-NEXT: v_floor_f16_e32 v4, v3 ; GFX8-NEXT: v_floor_f16_e32 v5, v0 ; GFX8-NEXT: v_fract_f16_e32 v6, v3 -; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v3, s6 +; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v3, v7 ; GFX8-NEXT: v_pack_b32_f16 v4, v5, v4 ; GFX8-NEXT: v_fract_f16_e32 v5, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[4:5] -; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, vcc +; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, vcc ; GFX8-NEXT: v_pack_b32_f16 v0, v0, v3 ; GFX8-NEXT: global_store_dword v[1:2], v4, off ; GFX8-NEXT: s_waitcnt vmcnt(0) @@ -2237,19 +2240,19 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc ; GFX6-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc ; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] -; GFX6-NEXT: s_movk_i32 s10, 0x204 -; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s10 +; GFX6-NEXT: v_mov_b32_e32 v14, 0x204 ; GFX6-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, s[8:9] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, s[8:9] -; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[2:3], s10 +; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v14 ; GFX6-NEXT: s_mov_b32 s6, 0 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, vcc +; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v14 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s4, s6 ; GFX6-NEXT: s_mov_b32 s5, s6 -; GFX6-NEXT: v_cndmask_b32_e64 v2, v10, 0, s[8:9] -; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, s[8:9] +; GFX6-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, vcc ; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -2257,39 +2260,39 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc ; GFX7-LABEL: safe_math_fract_v2f64: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_movk_i32 s4, 0x204 +; GFX7-NEXT: v_mov_b32_e32 v6, 0x204 ; GFX7-NEXT: v_fract_f64_e32 v[10:11], v[0:1] -; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s4 +; GFX7-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6 ; GFX7-NEXT: v_fract_f64_e32 v[12:13], v[2:3] -; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[2:3], s4 +; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v6 ; GFX7-NEXT: v_floor_f64_e32 v[8:9], v[2:3] ; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1] -; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: s_mov_b32 s4, s6 -; GFX7-NEXT: s_mov_b32 s5, s6 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[10:11] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[10:11] -; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64 +; GFX7-NEXT: s_mov_b32 s10, 0 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s8, s10 +; GFX7-NEXT: s_mov_b32 s9, s10 +; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[4:5] +; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[8:11], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: safe_math_fract_v2f64: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_movk_i32 s6, 0x204 +; GFX8-NEXT: v_mov_b32_e32 v6, 0x204 ; GFX8-NEXT: v_fract_f64_e32 v[10:11], v[0:1] -; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s6 +; GFX8-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6 ; GFX8-NEXT: v_fract_f64_e32 v[12:13], v[2:3] -; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], s6 +; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v6 ; GFX8-NEXT: v_floor_f64_e32 v[8:9], v[2:3] ; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[4:5] ; GFX8-NEXT: global_store_dwordx4 v[4:5], v[6:9], off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll index 7de35b880a5d9..f527b3c48040e 100644 --- a/llvm/test/CodeGen/PowerPC/fp-classify.ll +++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll @@ -57,30 +57,18 @@ entry: define zeroext i1 @abs_isinfq(fp128 %x) { ; P8-LABEL: abs_isinfq: ; P8: # %bb.0: # %entry -; P8-NEXT: mflr 0 -; P8-NEXT: stdu 1, -48(1) -; P8-NEXT: std 0, 64(1) -; P8-NEXT: .cfi_def_cfa_offset 48 -; P8-NEXT: .cfi_offset lr, 16 ; P8-NEXT: xxswapd 0, 34 -; P8-NEXT: addi 3, 1, 32 +; P8-NEXT: addi 3, 1, -16 +; P8-NEXT: li 5, 32767 ; P8-NEXT: stxvd2x 0, 0, 3 -; P8-NEXT: lbz 4, 47(1) -; P8-NEXT: clrlwi 4, 4, 25 -; P8-NEXT: stb 4, 47(1) -; P8-NEXT: lxvd2x 0, 0, 3 -; P8-NEXT: addis 3, 2, .LCPI2_0@toc@ha -; P8-NEXT: addi 3, 3, .LCPI2_0@toc@l -; P8-NEXT: xxswapd 34, 0 -; P8-NEXT: lxvd2x 0, 0, 3 -; P8-NEXT: xxswapd 35, 0 -; P8-NEXT: bl __eqkf2 -; P8-NEXT: nop -; P8-NEXT: cntlzw 3, 3 -; P8-NEXT: srwi 3, 3, 5 -; P8-NEXT: addi 1, 1, 48 -; P8-NEXT: ld 0, 16(1) -; P8-NEXT: mtlr 0 +; P8-NEXT: rldic 5, 5, 48, 1 +; P8-NEXT: ld 4, -8(1) +; P8-NEXT: ld 3, -16(1) +; P8-NEXT: clrldi 4, 4, 1 +; P8-NEXT: xor 4, 4, 5 +; P8-NEXT: or 3, 3, 4 +; P8-NEXT: cntlzd 3, 3 +; P8-NEXT: rldicl 3, 3, 58, 63 ; P8-NEXT: blr ; ; P9-LABEL: abs_isinfq: @@ -99,12 +87,13 @@ entry: define zeroext i1 @abs_isinfornanf(float %x) { ; P8-LABEL: abs_isinfornanf: ; P8: # %bb.0: # %entry -; P8-NEXT: addis 3, 2, .LCPI3_0@toc@ha -; P8-NEXT: xsabsdp 0, 1 -; P8-NEXT: lfs 1, .LCPI3_0@toc@l(3) -; P8-NEXT: li 3, 1 -; P8-NEXT: fcmpu 0, 0, 1 -; P8-NEXT: isellt 3, 0, 3 +; P8-NEXT: xscvdpspn 0, 1 +; P8-NEXT: lis 4, 32639 +; P8-NEXT: ori 4, 4, 65535 +; P8-NEXT: mffprwz 3, 0 +; P8-NEXT: clrlwi 3, 3, 1 +; P8-NEXT: sub 3, 4, 3 +; P8-NEXT: rldicl 3, 3, 1, 63 ; P8-NEXT: blr ; ; P9-LABEL: abs_isinfornanf: @@ -123,12 +112,15 @@ entry: define zeroext i1 @abs_isinfornan(double %x) { ; P8-LABEL: abs_isinfornan: ; P8: # %bb.0: # %entry -; P8-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; P8-NEXT: xsabsdp 0, 1 -; P8-NEXT: lfs 1, .LCPI4_0@toc@l(3) -; P8-NEXT: li 3, 1 -; P8-NEXT: fcmpu 0, 0, 1 -; P8-NEXT: isellt 3, 0, 3 +; P8-NEXT: mffprd 3, 1 +; P8-NEXT: li 4, -33 +; P8-NEXT: rldicl 4, 4, 47, 1 +; P8-NEXT: sradi 5, 4, 63 +; P8-NEXT: clrldi 3, 3, 1 +; P8-NEXT: rldicl 6, 3, 1, 63 +; P8-NEXT: subc 3, 4, 3 +; P8-NEXT: adde 3, 6, 5 +; P8-NEXT: xori 3, 3, 1 ; P8-NEXT: blr ; ; P9-LABEL: abs_isinfornan: @@ -147,53 +139,18 @@ entry: define zeroext i1 @abs_isinfornanq(fp128 %x) { ; P8-LABEL: abs_isinfornanq: ; P8: # %bb.0: # %entry -; P8-NEXT: mflr 0 -; P8-NEXT: stdu 1, -112(1) -; P8-NEXT: std 0, 128(1) -; P8-NEXT: .cfi_def_cfa_offset 112 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: .cfi_offset r30, -16 -; P8-NEXT: .cfi_offset v30, -48 -; P8-NEXT: .cfi_offset v31, -32 -; P8-NEXT: li 3, 64 ; P8-NEXT: xxswapd 0, 34 -; P8-NEXT: std 30, 96(1) # 8-byte Folded Spill -; P8-NEXT: stvx 30, 1, 3 # 16-byte Folded Spill -; P8-NEXT: li 3, 80 -; P8-NEXT: stvx 31, 1, 3 # 16-byte Folded Spill -; P8-NEXT: addi 3, 1, 48 +; P8-NEXT: addi 3, 1, -16 +; P8-NEXT: li 4, -3 ; P8-NEXT: stxvd2x 0, 0, 3 -; P8-NEXT: lbz 4, 63(1) -; P8-NEXT: clrlwi 4, 4, 25 -; P8-NEXT: stb 4, 63(1) -; P8-NEXT: lxvd2x 0, 0, 3 -; P8-NEXT: addis 3, 2, .LCPI5_0@toc@ha -; P8-NEXT: addi 3, 3, .LCPI5_0@toc@l -; P8-NEXT: xxswapd 63, 0 -; P8-NEXT: lxvd2x 0, 0, 3 -; P8-NEXT: vmr 2, 31 -; P8-NEXT: xxswapd 62, 0 -; P8-NEXT: vmr 3, 30 -; P8-NEXT: bl __eqkf2 -; P8-NEXT: nop -; P8-NEXT: cntlzw 3, 3 -; P8-NEXT: vmr 2, 31 -; P8-NEXT: vmr 3, 30 -; P8-NEXT: srwi 30, 3, 5 -; P8-NEXT: bl __unordkf2 -; P8-NEXT: nop -; P8-NEXT: cntlzw 3, 3 -; P8-NEXT: li 4, 80 -; P8-NEXT: lvx 31, 1, 4 # 16-byte Folded Reload -; P8-NEXT: li 4, 64 -; P8-NEXT: srwi 3, 3, 5 -; P8-NEXT: lvx 30, 1, 4 # 16-byte Folded Reload +; P8-NEXT: rldicl 4, 4, 47, 1 +; P8-NEXT: ld 3, -8(1) +; P8-NEXT: sradi 5, 4, 63 +; P8-NEXT: clrldi 3, 3, 1 +; P8-NEXT: rldicl 6, 3, 1, 63 +; P8-NEXT: subc 3, 4, 3 +; P8-NEXT: adde 3, 6, 5 ; P8-NEXT: xori 3, 3, 1 -; P8-NEXT: or 3, 3, 30 -; P8-NEXT: ld 30, 96(1) # 8-byte Folded Reload -; P8-NEXT: addi 1, 1, 112 -; P8-NEXT: ld 0, 16(1) -; P8-NEXT: mtlr 0 ; P8-NEXT: blr ; ; P9-LABEL: abs_isinfornanq: diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll new file mode 100644 index 0000000000000..63ab22e96ad2a --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -codegenprepare -S -mtriple=aarch64 < %s | FileCheck %s + +define i1 @test_is_inf_or_nan(double %arg) { +; CHECK-LABEL: define i1 @test_is_inf_or_nan( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 519) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp ueq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf_or_nan(double %arg) { +; CHECK-LABEL: define i1 @test_is_not_inf_or_nan( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 504) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp one double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_inf(double %arg) { +; CHECK-LABEL: define i1 @test_is_inf( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 516) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp oeq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf(double %arg) { +; CHECK-LABEL: define i1 @test_is_not_inf( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 507) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp une double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define @test_vec_is_inf_or_nan( %arg) { +; CHECK-LABEL: define @test_vec_is_inf_or_nan( +; CHECK-SAME: [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.is.fpclass.nxv2f64( [[ARG]], i32 519) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.fabs.nxv2f64( %arg) + %ret = fcmp ueq %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_not_inf_or_nan( %arg) { +; CHECK-LABEL: define @test_vec_is_not_inf_or_nan( +; CHECK-SAME: [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.is.fpclass.nxv2f64( [[ARG]], i32 504) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.fabs.nxv2f64( %arg) + %ret = fcmp one %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_inf( %arg) { +; CHECK-LABEL: define @test_vec_is_inf( +; CHECK-SAME: [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.is.fpclass.nxv2f64( [[ARG]], i32 516) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.fabs.nxv2f64( %arg) + %ret = fcmp oeq %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_not_inf( %arg) { +; CHECK-LABEL: define @test_vec_is_not_inf( +; CHECK-SAME: [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.is.fpclass.nxv2f64( [[ARG]], i32 507) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.fabs.nxv2f64( %arg) + %ret = fcmp une %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define i1 @test_fp128_is_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 519) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 504) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_inf(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_inf( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 516) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_not_inf( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 507) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll new file mode 100644 index 0000000000000..7c00218bdcce3 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -codegenprepare -S -mtriple=riscv64 < %s | FileCheck %s + +define i1 @test_is_inf_or_nan(double %arg) { +; CHECK-LABEL: define i1 @test_is_inf_or_nan( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 519) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp ueq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf_or_nan(double %arg) { +; CHECK-LABEL: define i1 @test_is_not_inf_or_nan( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 504) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp one double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_inf(double %arg) { +; CHECK-LABEL: define i1 @test_is_inf( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 516) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp oeq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf(double %arg) { +; CHECK-LABEL: define i1 @test_is_not_inf( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 507) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp une double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define @test_vec_is_inf_or_nan( %arg) { +; CHECK-LABEL: define @test_vec_is_inf_or_nan( +; CHECK-SAME: [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.is.fpclass.nxv4f64( [[ARG]], i32 519) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.fabs.nxv4f64( %arg) + %ret = fcmp ueq %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_not_inf_or_nan( %arg) { +; CHECK-LABEL: define @test_vec_is_not_inf_or_nan( +; CHECK-SAME: [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.is.fpclass.nxv4f64( [[ARG]], i32 504) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.fabs.nxv4f64( %arg) + %ret = fcmp one %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_inf( %arg) { +; CHECK-LABEL: define @test_vec_is_inf( +; CHECK-SAME: [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.is.fpclass.nxv4f64( [[ARG]], i32 516) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.fabs.nxv4f64( %arg) + %ret = fcmp oeq %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define @test_vec_is_not_inf( %arg) { +; CHECK-LABEL: define @test_vec_is_not_inf( +; CHECK-SAME: [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.is.fpclass.nxv4f64( [[ARG]], i32 507) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.fabs.nxv4f64( %arg) + %ret = fcmp une %abs, splat (double 0x7FF0000000000000) + ret %ret +} + +define i1 @test_fp128_is_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 519) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 504) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_inf(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_inf( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 516) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_not_inf( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 507) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll new file mode 100644 index 0000000000000..525caeb3e79a1 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -codegenprepare -S -mtriple=x86_64-unknown-unknown < %s | FileCheck %s + +define i1 @test_is_inf_or_nan(double %arg) { +; CHECK-LABEL: define i1 @test_is_inf_or_nan( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 519) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp ueq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf_or_nan(double %arg) { +; CHECK-LABEL: define i1 @test_is_not_inf_or_nan( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 504) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp one double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_inf(double %arg) { +; CHECK-LABEL: define i1 @test_is_inf( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 516) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp oeq double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define i1 @test_is_not_inf(double %arg) { +; CHECK-LABEL: define i1 @test_is_not_inf( +; CHECK-SAME: double [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 507) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call double @llvm.fabs.f64(double %arg) + %ret = fcmp une double %abs, 0x7FF0000000000000 + ret i1 %ret +} + +define <4 x i1> @test_vec_is_inf_or_nan(<4 x double> %arg) { +; CHECK-LABEL: define <4 x i1> @test_vec_is_inf_or_nan( +; CHECK-SAME: <4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.is.fpclass.v4f64(<4 x double> [[ARG]], i32 519) +; CHECK-NEXT: ret <4 x i1> [[TMP1]] +; + %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg) + %ret = fcmp ueq <4 x double> %abs, splat (double 0x7FF0000000000000) + ret <4 x i1> %ret +} + +define <4 x i1> @test_vec_is_not_inf_or_nan(<4 x double> %arg) { +; CHECK-LABEL: define <4 x i1> @test_vec_is_not_inf_or_nan( +; CHECK-SAME: <4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.is.fpclass.v4f64(<4 x double> [[ARG]], i32 504) +; CHECK-NEXT: ret <4 x i1> [[TMP1]] +; + %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg) + %ret = fcmp one <4 x double> %abs, splat (double 0x7FF0000000000000) + ret <4 x i1> %ret +} + +define <4 x i1> @test_vec_is_inf(<4 x double> %arg) { +; CHECK-LABEL: define <4 x i1> @test_vec_is_inf( +; CHECK-SAME: <4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.is.fpclass.v4f64(<4 x double> [[ARG]], i32 516) +; CHECK-NEXT: ret <4 x i1> [[TMP1]] +; + %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg) + %ret = fcmp oeq <4 x double> %abs, splat (double 0x7FF0000000000000) + ret <4 x i1> %ret +} + +define <4 x i1> @test_vec_is_not_inf(<4 x double> %arg) { +; CHECK-LABEL: define <4 x i1> @test_vec_is_not_inf( +; CHECK-SAME: <4 x double> [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.is.fpclass.v4f64(<4 x double> [[ARG]], i32 507) +; CHECK-NEXT: ret <4 x i1> [[TMP1]] +; + %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg) + %ret = fcmp une <4 x double> %abs, splat (double 0x7FF0000000000000) + ret <4 x i1> %ret +} + +define i1 @test_fp128_is_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 519) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 504) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_inf(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_inf( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 516) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_fp128_is_not_inf(fp128 %arg) { +; CHECK-LABEL: define i1 @test_fp128_is_not_inf( +; CHECK-SAME: fp128 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 507) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg) + %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000 + ret i1 %ret +} + +define i1 @test_x86_fp80_is_inf_or_nan(x86_fp80 %arg) { +; CHECK-LABEL: define i1 @test_x86_fp80_is_inf_or_nan( +; CHECK-SAME: x86_fp80 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[ARG]], i32 519) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg) + %ret = fcmp ueq x86_fp80 %abs, 0xK7FFF8000000000000000 + ret i1 %ret +} + +define i1 @test_x86_fp80_is_not_inf_or_nan(x86_fp80 %arg) { +; CHECK-LABEL: define i1 @test_x86_fp80_is_not_inf_or_nan( +; CHECK-SAME: x86_fp80 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[ARG]], i32 504) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg) + %ret = fcmp one x86_fp80 %abs, 0xK7FFF8000000000000000 + ret i1 %ret +} + +define i1 @test_x86_fp80_is_inf(x86_fp80 %arg) { +; CHECK-LABEL: define i1 @test_x86_fp80_is_inf( +; CHECK-SAME: x86_fp80 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[ARG]], i32 516) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg) + %ret = fcmp oeq x86_fp80 %abs, 0xK7FFF8000000000000000 + ret i1 %ret +} + +define i1 @test_x86_fp80_is_not_inf(x86_fp80 %arg) { +; CHECK-LABEL: define i1 @test_x86_fp80_is_not_inf( +; CHECK-SAME: x86_fp80 [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[ARG]], i32 507) +; CHECK-NEXT: ret i1 [[TMP1]] +; + %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg) + %ret = fcmp une x86_fp80 %abs, 0xK7FFF8000000000000000 + ret i1 %ret +}