|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s |
| 3 | +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s |
| 4 | + |
| 5 | +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p32:32:32-p64:32:32-p65:32:32" |
| 6 | +target triple = "amdgcn--amdpal" |
| 7 | + |
| 8 | +define amdgpu_cs void @_amdgpu_cs_main(i64 %0) { |
| 9 | +; GFX10-LABEL: _amdgpu_cs_main: |
| 10 | +; GFX10: ; %bb.0: ; %.entry |
| 11 | +; GFX10-NEXT: s_clause 0x1 |
| 12 | +; GFX10-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:32 |
| 13 | +; GFX10-NEXT: global_load_dwordx4 v[6:9], v[0:1], off |
| 14 | +; GFX10-NEXT: s_waitcnt vmcnt(1) |
| 15 | +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v5 |
| 16 | +; GFX10-NEXT: s_waitcnt vmcnt(0) |
| 17 | +; GFX10-NEXT: v_cmp_gt_f32_e64 s0, 0, v7 |
| 18 | +; GFX10-NEXT: s_and_b32 s0, vcc_lo, s0 |
| 19 | +; GFX10-NEXT: s_and_saveexec_b32 s1, s0 |
| 20 | +; GFX10-NEXT: s_endpgm |
| 21 | +; |
| 22 | +; GFX11-LABEL: _amdgpu_cs_main: |
| 23 | +; GFX11: ; %bb.0: ; %.entry |
| 24 | +; GFX11-NEXT: s_clause 0x1 |
| 25 | +; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:32 |
| 26 | +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off |
| 27 | +; GFX11-NEXT: s_waitcnt vmcnt(1) |
| 28 | +; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v5 |
| 29 | +; GFX11-NEXT: s_waitcnt vmcnt(0) |
| 30 | +; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0, v1 |
| 31 | +; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 |
| 32 | +; GFX11-NEXT: s_and_saveexec_b32 s1, s0 |
| 33 | +; GFX11-NEXT: s_endpgm |
| 34 | +.entry: |
| 35 | + %1 = inttoptr i64 %0 to ptr addrspace(1) |
| 36 | + %2 = load float, ptr addrspace(1) %1, align 4 |
| 37 | + %3 = call float @llvm.fabs.f32(float %2) |
| 38 | + %4 = fcmp olt float %3, 1.000000e+00 |
| 39 | + %5 = getelementptr i8, ptr addrspace(1) %1, i64 4 |
| 40 | + %6 = load float, ptr addrspace(1) %5, align 4 |
| 41 | + %.fr.i0 = freeze float %6 |
| 42 | + %7 = getelementptr i8, ptr addrspace(1) %1, i64 16 |
| 43 | + %8 = load float, ptr addrspace(1) %7, align 4 |
| 44 | + %.fr123.i0 = freeze float %8 |
| 45 | + %9 = fadd float %.fr123.i0, 0.000000e+00 |
| 46 | + %10 = call float @llvm.fabs.f32(float %9) |
| 47 | + %11 = and i1 false, %4 |
| 48 | + %12 = getelementptr i8, ptr addrspace(1) %1, i64 20 |
| 49 | + %13 = load float, ptr addrspace(1) %12, align 4 |
| 50 | + %14 = call float @llvm.fabs.f32(float %13) |
| 51 | + %15 = fcmp olt float %14, 1.000000e+00 |
| 52 | + %16 = and i1 %15, false |
| 53 | + %17 = getelementptr i8, ptr addrspace(1) %1, i64 24 |
| 54 | + %18 = load float, ptr addrspace(1) %17, align 4 |
| 55 | + %19 = call float @llvm.fabs.f32(float %18) |
| 56 | + %20 = fcmp olt float %19, 1.000000e+00 |
| 57 | + %21 = and i1 %20, false |
| 58 | + %22 = getelementptr i8, ptr addrspace(1) %1, i64 28 |
| 59 | + %23 = load float, ptr addrspace(1) %22, align 4 |
| 60 | + %.fr128.i0 = freeze float %23 |
| 61 | + %24 = fadd float %.fr128.i0, 0.000000e+00 |
| 62 | + %25 = call float @llvm.fabs.f32(float %24) |
| 63 | + %26 = getelementptr i8, ptr addrspace(1) %1, i64 36 |
| 64 | + %27 = load float, ptr addrspace(1) %26, align 4 |
| 65 | + %28 = call float @llvm.fabs.f32(float %27) |
| 66 | + %29 = fcmp olt float %28, 1.000000e+00 |
| 67 | + %30 = and i1 %29, false |
| 68 | + %31 = getelementptr i8, ptr addrspace(1) %1, i64 40 |
| 69 | + %32 = load float, ptr addrspace(1) %31, align 4 |
| 70 | + %.fr133.i0 = freeze float %32 |
| 71 | + %33 = fadd float %.fr133.i0, 0.000000e+00 |
| 72 | + %34 = call float @llvm.fabs.f32(float %33) |
| 73 | + %35 = getelementptr i8, ptr addrspace(1) %1, i64 44 |
| 74 | + %36 = load float, ptr addrspace(1) %35, align 4 |
| 75 | + %37 = fcmp olt float %36, 0.000000e+00 |
| 76 | + %.i112 = getelementptr i8, ptr addrspace(1) %1, i64 8 |
| 77 | + %.ii1 = load float, ptr addrspace(1) %.i112, align 4 |
| 78 | + %.i213 = getelementptr i8, ptr addrspace(1) %1, i64 12 |
| 79 | + %.ii2 = load float, ptr addrspace(1) %.i213, align 4 |
| 80 | + %.fr.i1 = freeze float %.ii1 |
| 81 | + %.fr.i2 = freeze float %.ii2 |
| 82 | + %38 = fcmp olt float %.fr.i0, 0.000000e+00 |
| 83 | + %39 = fadd float %.fr.i1, 0.000000e+00 |
| 84 | + %40 = call float @llvm.fabs.f32(float %39) |
| 85 | + %41 = fadd float %.fr.i2, 0.000000e+00 |
| 86 | + %42 = call float @llvm.fabs.f32(float %41) |
| 87 | + %43 = and i1 %37, %38 |
| 88 | + %.i124 = getelementptr i8, ptr addrspace(1) %1, i64 32 |
| 89 | + %.ii125 = load float, ptr addrspace(1) %.i124, align 4 |
| 90 | + %.fr128.i1 = freeze float %.ii125 |
| 91 | + %44 = fadd float %.fr128.i1, 0.000000e+00 |
| 92 | + %45 = call float @llvm.fabs.f32(float %44) |
| 93 | + %.i234 = getelementptr i8, ptr addrspace(1) %1, i64 48 |
| 94 | + %.ii235 = load float, ptr addrspace(1) %.i234, align 4 |
| 95 | + %.fr133.i2 = freeze float %.ii235 |
| 96 | + %46 = fadd float %.fr133.i2, 0.000000e+00 |
| 97 | + %47 = call float @llvm.fabs.f32(float %46) |
| 98 | + br i1 %43, label %48, label %53 |
| 99 | + |
| 100 | +48: ; preds = %.entry |
| 101 | + %49 = call i64 @llvm.amdgcn.s.getpc() |
| 102 | + %50 = and i64 %49, 1 |
| 103 | + %51 = inttoptr i64 %50 to ptr addrspace(4) |
| 104 | + %52 = load <4 x i32>, ptr addrspace(4) %51, align 16 |
| 105 | + br label %53 |
| 106 | + |
| 107 | +53: ; preds = %48, %.entry |
| 108 | + ret void |
| 109 | +} |
| 110 | + |
| 111 | +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| 112 | +declare float @llvm.fabs.f32(float) #0 |
| 113 | + |
| 114 | +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| 115 | +declare noundef i64 @llvm.amdgcn.s.getpc() #0 |
| 116 | + |
| 117 | +; uselistorder directives |
| 118 | +uselistorder ptr @llvm.fabs.f32, { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } |
| 119 | + |
| 120 | +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
0 commit comments