|
| 1 | +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s |
| 2 | + |
| 3 | +// The aim of the test is to check the LLVM IR codegen for the device |
| 4 | +// for omp target parallel construct |
| 5 | + |
| 6 | +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">} { |
| 7 | + llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QQmain"} { |
| 8 | + %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"} |
| 9 | + omp.target map_entries(%0 -> %arg2 : !llvm.ptr) { |
| 10 | + ^bb0(%arg2: !llvm.ptr): |
| 11 | + omp.parallel { |
| 12 | + %1 = llvm.mlir.constant(1 : i32) : i32 |
| 13 | + llvm.store %1, %arg2 : i32, !llvm.ptr |
| 14 | + omp.terminator |
| 15 | + } |
| 16 | + omp.terminator |
| 17 | + } |
| 18 | + llvm.return |
| 19 | + } |
| 20 | +} |
| 21 | + |
| 22 | +// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC0:@.*]]( |
| 23 | +// CHECK-SAME: ptr [[TMP:%.*]], ptr [[TMP0:.*]]) { |
| 24 | +// CHECK: [[TMP1:%.*]] = alloca [1 x ptr], align 8, addrspace(5) |
| 25 | +// CHECK: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr |
| 26 | +// CHECK: [[STRUCTARG:%.*]] = alloca { ptr }, align 8, addrspace(5) |
| 27 | +// CHECK: [[STRUCTARG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STRUCTARG]] to ptr |
| 28 | +// CHECK: [[TMP3:%.*]] = alloca ptr, align 8, addrspace(5) |
| 29 | +// CHECK: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[TMP3]] to ptr |
| 30 | +// CHECK: store ptr [[TMP0]], ptr [[TMP4]], align 8 |
| 31 | +// CHECK: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) [[KERNEL_ENV:@.*]] to ptr), ptr [[TMP]]) |
| 32 | +// CHECK: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1 |
| 33 | +// CHECK: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] |
| 34 | +// CHECK: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8 |
| 35 | +// CHECK: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) |
| 36 | +// CHECK: [[GEP_:%.*]] = getelementptr { ptr }, ptr addrspace(5) [[STRUCTARG]], i32 0, i32 0 |
| 37 | +// CHECK: store ptr [[TMP6]], ptr addrspace(5) [[GEP_]], align 8 |
| 38 | +// CHECK: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 |
| 39 | +// CHECK: store ptr [[STRUCTARG_ASCAST]], ptr [[TMP7]], align 8 |
| 40 | +// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr [[FUNC1:@.*]], ptr null, ptr [[TMP2]], i64 1) |
| 41 | +// CHECK: call void @__kmpc_target_deinit() |
| 42 | + |
| 43 | +// CHECK: define internal void [[FUNC1]]( |
| 44 | +// CHECK-SAME: ptr noalias noundef [[TID_ADDR_ASCAST:%.*]], ptr noalias noundef [[ZERO_ADDR_ASCAST:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { |
| 45 | + |
0 commit comments