1
1
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2
2
3
- module attributes {llvm.target_triple = " amdgcn-amd-amdhsa" , omp.is_target_device = true } {
3
+ module attributes {dlti.dl_spec = #dlti.dl_spec < #dlti.dl_entry < " dlti.alloca_memory_space " , 5 : ui32 >>, llvm.target_triple = " amdgcn-amd-amdhsa" , omp.is_target_device = true } {
4
4
llvm.func @omp_target_region_ () {
5
5
%0 = llvm.mlir.constant (20 : i32 ) : i32
6
6
%1 = llvm.mlir.constant (10 : i32 ) : i32
7
7
%2 = llvm.mlir.constant (1 : i64 ) : i64
8
- %3 = llvm.alloca %2 x i32 {bindc_name = " a" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEa" } : (i64 ) -> !llvm.ptr
8
+ %3 = llvm.alloca %2 x i32 {bindc_name = " a" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEa" } : (i64 ) -> !llvm.ptr < 5 >
9
9
%4 = llvm.mlir.constant (1 : i64 ) : i64
10
- %5 = llvm.alloca %4 x i32 {bindc_name = " b" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEb" } : (i64 ) -> !llvm.ptr
10
+ %5 = llvm.alloca %4 x i32 {bindc_name = " b" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEb" } : (i64 ) -> !llvm.ptr < 5 >
11
11
%6 = llvm.mlir.constant (1 : i64 ) : i64
12
- %7 = llvm.alloca %6 x i32 {bindc_name = " c" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEc" } : (i64 ) -> !llvm.ptr
13
- llvm.store %1 , %3 : i32 , !llvm.ptr
14
- llvm.store %0 , %5 : i32 , !llvm.ptr
15
- %map1 = omp.map.info var_ptr (%3 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
16
- %map2 = omp.map.info var_ptr (%5 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
17
- %map3 = omp.map.info var_ptr (%7 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
12
+ %7 = llvm.alloca %6 x i32 {bindc_name = " c" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEc" } : (i64 ) -> !llvm.ptr <5 >
13
+ %8 = llvm.addrspacecast %3 : !llvm.ptr <5 > to !llvm.ptr
14
+ %9 = llvm.addrspacecast %5 : !llvm.ptr <5 > to !llvm.ptr
15
+ %10 = llvm.addrspacecast %7 : !llvm.ptr <5 > to !llvm.ptr
16
+ llvm.store %1 , %8 : i32 , !llvm.ptr
17
+ llvm.store %0 , %9 : i32 , !llvm.ptr
18
+ %map1 = omp.map.info var_ptr (%8 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
19
+ %map2 = omp.map.info var_ptr (%9 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
20
+ %map3 = omp.map.info var_ptr (%10 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
18
21
omp.target map_entries (%map1 -> %arg0 , %map2 -> %arg1 , %map3 -> %arg2 : !llvm.ptr , !llvm.ptr , !llvm.ptr ) {
19
- %8 = llvm.load %arg0 : !llvm.ptr -> i32
20
- %9 = llvm.load %arg1 : !llvm.ptr -> i32
21
- %10 = llvm.add %8 , %9 : i32
22
- llvm.store %10 , %arg2 : i32 , !llvm.ptr
22
+ %11 = llvm.load %arg0 : !llvm.ptr -> i32
23
+ %12 = llvm.load %arg1 : !llvm.ptr -> i32
24
+ %13 = llvm.add %11 , %12 : i32
25
+ llvm.store %13 , %arg2 : i32 , !llvm.ptr
23
26
omp.terminator
24
27
}
25
28
llvm.return
@@ -31,19 +34,22 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
31
34
// CHECK: @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer
32
35
// CHECK: @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 256, i32 -1, i32 -1, i32 0, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] }
33
36
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{[^_]+}}_{{[^_]+}}_omp_target_region__l{{[0-9]+}}(ptr %[[DYN_PTR:.*]], ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
34
- // CHECK: %[[TMP_A:.*]] = alloca ptr, align 8
35
- // CHECK: store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8
37
+ // CHECK: %[[TMP_A:.*]] = alloca ptr, align 8, addrspace(5)
38
+ // CHECK: %[[ASCAST_A:.*]] = addrspacecast ptr addrspace(5) %[[TMP_A]] to ptr
39
+ // CHECK: store ptr %[[ADDR_A]], ptr %[[ASCAST_A]], align 8
36
40
// CHECK: %[[TMP_B:.*]] = alloca ptr, align 8
37
- // CHECK: store ptr %[[ADDR_B]], ptr %[[TMP_B]], align 8
41
+ // CHECK: %[[ASCAST_B:.*]] = addrspacecast ptr addrspace(5) %[[TMP_B]] to ptr
42
+ // CHECK: store ptr %[[ADDR_B]], ptr %[[ASCAST_B]], align 8
38
43
// CHECK: %[[TMP_C:.*]] = alloca ptr, align 8
39
- // CHECK: store ptr %[[ADDR_C]], ptr %[[TMP_C]], align 8
44
+ // CHECK: %[[ASCAST_C:.*]] = addrspacecast ptr addrspace(5) %[[TMP_C]] to ptr
45
+ // CHECK: store ptr %[[ADDR_C]], ptr %[[ASCAST_C]], align 8
40
46
// CHECK: %[[INIT:.*]] = call i32 @__kmpc_target_init(ptr @[[KERNEL_ENV]], ptr %[[DYN_PTR]])
41
47
// CHECK-NEXT: %[[CMP:.*]] = icmp eq i32 %[[INIT]], -1
42
48
// CHECK-NEXT: br i1 %[[CMP]], label %[[LABEL_ENTRY:.*]], label %[[LABEL_EXIT:.*]]
43
49
// CHECK: [[LABEL_ENTRY]]:
44
- // CHECK: %[[PTR_A:.*]] = load ptr, ptr %[[TMP_A ]], align 8
45
- // CHECK: %[[PTR_B:.*]] = load ptr, ptr %[[TMP_B ]], align 8
46
- // CHECK: %[[PTR_C:.*]] = load ptr, ptr %[[TMP_C ]], align 8
50
+ // CHECK: %[[PTR_A:.*]] = load ptr, ptr %[[ASCAST_A ]], align 8
51
+ // CHECK: %[[PTR_B:.*]] = load ptr, ptr %[[ASCAST_B ]], align 8
52
+ // CHECK: %[[PTR_C:.*]] = load ptr, ptr %[[ASCAST_C ]], align 8
47
53
// CHECK-NEXT: br label %[[LABEL_TARGET:.*]]
48
54
// CHECK: [[LABEL_TARGET]]:
49
55
// CHECK: %[[A:.*]] = load i32, ptr %[[PTR_A]], align 4
0 commit comments