|
| 1 | +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
| 2 | +// RUN: -mcode-object-version=4 -DUSER -x hip -o %t_4.bc %s |
| 3 | + |
| 4 | +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
| 5 | +// RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s |
| 6 | + |
| 7 | +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
| 8 | +// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s |
| 9 | + |
| 10 | +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \ |
| 11 | +// RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s |
| 12 | + |
| 13 | +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ |
| 14 | +// RUN: %t_4.bc -mlink-builtin-bitcode %t_0.bc -o - |\ |
| 15 | +// RUN: FileCheck -check-prefix=LINKED4 %s |
| 16 | + |
| 17 | +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ |
| 18 | +// RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\ |
| 19 | +// RUN: FileCheck -check-prefix=LINKED5 %s |
| 20 | + |
| 21 | +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \ |
| 22 | +// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\ |
| 23 | +// RUN: FileCheck -check-prefix=LINKED6 %s |
| 24 | + |
| 25 | +#include "Inputs/cuda.h" |
| 26 | + |
| 27 | +// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400 |
| 28 | +// LINKED4-LABEL: bar |
| 29 | +// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 30 | +// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 |
| 31 | +// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 32 | +// LINKED4: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 |
| 33 | +// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 34 | +// LINKED4: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 |
| 35 | +// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] |
| 36 | +// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 37 | + |
| 38 | +// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 39 | +// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 |
| 40 | +// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 41 | +// LINKED4: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 |
| 42 | +// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 43 | +// LINKED4: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 |
| 44 | +// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] |
| 45 | +// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 46 | + |
| 47 | +// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 48 | +// LINKED4-NOT: icmp sge i32 %{{.*}}, 500 |
| 49 | +// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 50 | +// LINKED4: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 |
| 51 | +// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 52 | +// LINKED4: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 |
| 53 | +// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] |
| 54 | +// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 55 | +// LINKED4: "amdhsa_code_object_version", i32 400 |
| 56 | + |
| 57 | +// LINKED5: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 |
| 58 | +// LINKED5-LABEL: bar |
| 59 | +// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 60 | +// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 |
| 61 | +// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 62 | +// LINKED5: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 |
| 63 | +// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 64 | +// LINKED5: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 |
| 65 | +// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] |
| 66 | +// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 67 | + |
| 68 | +// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 69 | +// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 |
| 70 | +// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 71 | +// LINKED5: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 |
| 72 | +// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 73 | +// LINKED5: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 |
| 74 | +// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] |
| 75 | +// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 76 | + |
| 77 | +// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 78 | +// LINKED5-NOT: icmp sge i32 %{{.*}}, 500 |
| 79 | +// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 80 | +// LINKED5: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 |
| 81 | +// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 82 | +// LINKED5: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 |
| 83 | +// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] |
| 84 | +// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 85 | +// LINKED5: "amdhsa_code_object_version", i32 500 |
| 86 | + |
| 87 | +// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600 |
| 88 | +// LINKED6-LABEL: bar |
| 89 | +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 90 | +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 |
| 91 | +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 92 | +// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12 |
| 93 | +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 94 | +// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4 |
| 95 | +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]] |
| 96 | +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 97 | + |
| 98 | +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 99 | +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 |
| 100 | +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 101 | +// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14 |
| 102 | +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 103 | +// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6 |
| 104 | +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]] |
| 105 | +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 106 | + |
| 107 | +// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}} |
| 108 | +// LINKED6-NOT: icmp sge i32 %{{.*}}, 500 |
| 109 | +// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| 110 | +// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16 |
| 111 | +// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 112 | +// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8 |
| 113 | +// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]] |
| 114 | +// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef |
| 115 | +// LINKED6: "amdhsa_code_object_version", i32 600 |
| 116 | + |
| 117 | +#ifdef DEVICELIB |
| 118 | +__device__ void bar(int *x, int *y, int *z) |
| 119 | +{ |
| 120 | + *x = __builtin_amdgcn_workgroup_size_x(); |
| 121 | + *y = __builtin_amdgcn_workgroup_size_y(); |
| 122 | + *z = __builtin_amdgcn_workgroup_size_z(); |
| 123 | +} |
| 124 | +#endif |
| 125 | + |
| 126 | +#ifdef USER |
| 127 | +__device__ void bar(int *x, int *y, int *z); |
| 128 | +__device__ void foo() |
| 129 | +{ |
| 130 | + int *x, *y, *z; |
| 131 | + bar(x, y, z); |
| 132 | +} |
| 133 | +#endif |
0 commit comments