[NFC][MLIR][OpenMP] Modify tests to have allocas in the correct address space for AMDGPU

agozillon · agozillon · commit a8b2c96d00c4 · 2025-04-25T20:41:08.000-05:00
diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
     %0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
     %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
@@ -26,19 +26,21 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
 // CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_l{{.*}}(ptr %[[DYN_PTR:.*]], ptr %[[ARG_BYREF:.*]], ptr %[[ARG_BYCOPY:.*]]) #{{[0-9]+}} {
 
 // CHECK: entry:
-// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_BYREF]], align 8
-// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8
+// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ALLOCA_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_BYREF]] to ptr
+// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_ASCAST]], align 8
+// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ALLOCA_ASCAST2:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_BYCOPY]] to ptr
+// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_ASCAST2]], align 8
 
 // CHECK: user_code.entry:                                  ; preds = %entry
-// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8
+// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_ASCAST]], align 8
 // CHECK: br label %outlined.body
 
 // CHECK: outlined.body:
 // CHECK: br label %omp.target
 
 // CHECK: omp.target:
-// CHECK:  %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4
+// CHECK:  %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_ASCAST2]], align 4
 // CHECK:  store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4
 // CHECK: br label %omp.region.cont
diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
@@ -10,22 +10,24 @@
 // constant sized) allocations performs its task reasonably in these 
 // scenarios. 
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
     %1 = llvm.mlir.constant(1 : i64) : i64
     %2 = llvm.alloca %1 x !llvm.struct<(ptr)> : (i64) -> !llvm.ptr
     %3 = omp.map.info var_ptr(%2 : !llvm.ptr, !llvm.struct<(ptr)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
     omp.target map_entries(%3 -> %arg0 : !llvm.ptr) {
       %4 = llvm.mlir.constant(1 : i32) : i32
-      %5 = llvm.alloca %4 x !llvm.struct<(ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+      %5 = llvm.alloca %4 x !llvm.struct<(ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+      %ascast1 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
       %6 = llvm.mlir.constant(50 : i32) : i32
       %7 = llvm.mlir.constant(1 : i64) : i64
-      %8 = llvm.alloca %7 x i32 : (i64) -> !llvm.ptr
-      llvm.store %6, %8 : i32, !llvm.ptr
+      %8 = llvm.alloca %7 x i32 : (i64) -> !llvm.ptr<5>
+      %ascast2 = llvm.addrspacecast %8 : !llvm.ptr<5> to !llvm.ptr
+      llvm.store %6, %ascast2 : i32, !llvm.ptr
       %9 = llvm.mlir.undef : !llvm.struct<(ptr)>
-      %10 = llvm.insertvalue %8, %9[0] : !llvm.struct<(ptr)> 
-      llvm.store %10, %5 : !llvm.struct<(ptr)>, !llvm.ptr
-      %88 = llvm.call @_ExternalCall(%arg0, %5) : (!llvm.ptr, !llvm.ptr) -> !llvm.struct<()>
+      %10 = llvm.insertvalue %ascast2, %9[0] : !llvm.struct<(ptr)> 
+      llvm.store %10, %ascast1 : !llvm.struct<(ptr)>, !llvm.ptr
+      %88 = llvm.call @_ExternalCall(%arg0, %ascast1) : (!llvm.ptr, !llvm.ptr) -> !llvm.struct<()>
       omp.terminator
     }
     llvm.return
diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {bindc_name = "main"} {
     %0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
     %1 = llvm.mlir.constant(10 : index) : i64
@@ -32,9 +32,10 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
 
 // CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) #{{[0-9]+}} {
 
-// CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8
-// CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ALLOCA]], align 8
-// CHECK: %[[LOAD_ARG1_ALLOCA:.*]] = load ptr, ptr %[[ARG1_ALLOCA]], align 8
+// CHECK: %[[ARG1_ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[ARG1_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ARG1_ALLOCA]] to ptr
+// CHECK: store ptr %[[ARG1]], ptr %[[ARG1_ASCAST]], align 8
+// CHECK: %[[LOAD_ARG1_ALLOCA:.*]] = load ptr, ptr %[[ARG1_ASCAST]], align 8
 // CHECK: store i32 20, ptr %[[LOAD_ARG1_ALLOCA]], align 4
 // CHECK: %[[GEP_ARG1_ALLOCA:.*]] = getelementptr inbounds nuw i8, ptr %[[LOAD_ARG1_ALLOCA]], i64 16
 // CHECK: store i32 10, ptr %[[GEP_ARG1_ALLOCA]], align 4
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
@@ -26,20 +26,22 @@
 #var_x = #llvm.di_local_variable<scope = #sp,
  name = "x", file = #file, line = 12, type = #real_ty>
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @test() {
     %0 = llvm.mlir.constant(1 : i64) : i64
-    %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr
-    %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr
+    %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr<5>
+    %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %ascast2 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
     %6 = llvm.mlir.constant(9 : index) : i64
     %7 = llvm.mlir.constant(0 : index) : i64
     %8 = llvm.mlir.constant(1 : index) : i64
     %10 = llvm.mlir.constant(10 : index) : i64
     %11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
-    %14 = omp.map.info var_ptr(%1 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+    %14 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
     %15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64)
     %16 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr
-    %17 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
+    %17 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
     omp.target map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
       llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr
       llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug.mlir b/mlir/test/Target/LLVMIR/omptarget-debug.mlir
@@ -1,10 +1,11 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() {
     %0 = llvm.mlir.constant(1 : i32) : i32
-    %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
-    %9 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %9 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
     omp.target map_entries(%9 -> %arg0 : !llvm.ptr) {
       %13 = llvm.mlir.constant(1 : i32) : i32
       llvm.store %13, %arg0 : i32, !llvm.ptr loc(#loc2)
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -33,8 +33,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
 
   llvm.func @parallel_if(%arg0: !llvm.ptr {fir.bindc_name = "ifcond"}) {
     %0 = llvm.mlir.constant(1 : i64) : i64
-    %1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr
-    %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
+    %1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr<5>
+    %cast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %2 = omp.map.info var_ptr(%cast : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
     %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "ifcond"}
     omp.target map_entries(%2 -> %arg1, %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
       %4 = llvm.mlir.constant(10 : i32) : i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
@@ -1,25 +1,28 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @omp_target_region_() {
     %0 = llvm.mlir.constant(20 : i32) : i32
     %1 = llvm.mlir.constant(10 : i32) : i32
     %2 = llvm.mlir.constant(1 : i64) : i64
-    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr<5>
     %4 = llvm.mlir.constant(1 : i64) : i64
-    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr<5>
     %6 = llvm.mlir.constant(1 : i64) : i64
-    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
-    llvm.store %1, %3 : i32, !llvm.ptr
-    llvm.store %0, %5 : i32, !llvm.ptr
-    %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-    %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-    %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr<5>
+    %8 = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
+    %9 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
+    %10 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+    llvm.store %1, %8 : i32, !llvm.ptr
+    llvm.store %0, %9 : i32, !llvm.ptr
+    %map1 = omp.map.info var_ptr(%8 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %map2 = omp.map.info var_ptr(%9 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+    %map3 = omp.map.info var_ptr(%10 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
     omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-      %8 = llvm.load %arg0 : !llvm.ptr -> i32
-      %9 = llvm.load %arg1 : !llvm.ptr -> i32
-      %10 = llvm.add %8, %9  : i32
-      llvm.store %10, %arg2 : i32, !llvm.ptr
+      %11 = llvm.load %arg0 : !llvm.ptr -> i32
+      %12 = llvm.load %arg1 : !llvm.ptr -> i32
+      %13 = llvm.add %11, %12  : i32
+      llvm.store %13, %arg2 : i32, !llvm.ptr
       omp.terminator
     }
     llvm.return
@@ -31,19 +34,22 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
 // CHECK:      @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer
 // CHECK:      @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 256, i32 -1, i32 -1, i32 0, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] }
 // CHECK:      define weak_odr protected amdgpu_kernel void @__omp_offloading_{{[^_]+}}_{{[^_]+}}_omp_target_region__l{{[0-9]+}}(ptr %[[DYN_PTR:.*]], ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
-// CHECK:        %[[TMP_A:.*]] = alloca ptr, align 8
-// CHECK:        store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8
+// CHECK:        %[[TMP_A:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK:        %[[ASCAST_A:.*]] = addrspacecast ptr addrspace(5) %[[TMP_A]] to ptr
+// CHECK:        store ptr %[[ADDR_A]], ptr %[[ASCAST_A]], align 8
 // CHECK:        %[[TMP_B:.*]] = alloca ptr, align 8
-// CHECK:        store ptr %[[ADDR_B]], ptr %[[TMP_B]], align 8
+// CHECK:        %[[ASCAST_B:.*]] = addrspacecast ptr addrspace(5) %[[TMP_B]] to ptr
+// CHECK:        store ptr %[[ADDR_B]], ptr %[[ASCAST_B]], align 8
 // CHECK:        %[[TMP_C:.*]] = alloca ptr, align 8
-// CHECK:        store ptr %[[ADDR_C]], ptr %[[TMP_C]], align 8
+// CHECK:        %[[ASCAST_C:.*]] = addrspacecast ptr addrspace(5) %[[TMP_C]] to ptr
+// CHECK:        store ptr %[[ADDR_C]], ptr %[[ASCAST_C]], align 8
 // CHECK:        %[[INIT:.*]] = call i32 @__kmpc_target_init(ptr @[[KERNEL_ENV]], ptr %[[DYN_PTR]])
 // CHECK-NEXT:   %[[CMP:.*]] = icmp eq i32 %[[INIT]], -1
 // CHECK-NEXT:   br i1 %[[CMP]], label %[[LABEL_ENTRY:.*]], label %[[LABEL_EXIT:.*]]
 // CHECK:        [[LABEL_ENTRY]]:
-// CHECK:        %[[PTR_A:.*]] = load ptr, ptr %[[TMP_A]], align 8
-// CHECK:        %[[PTR_B:.*]] = load ptr, ptr %[[TMP_B]], align 8
-// CHECK:        %[[PTR_C:.*]] = load ptr, ptr %[[TMP_C]], align 8
+// CHECK:        %[[PTR_A:.*]] = load ptr, ptr %[[ASCAST_A]], align 8
+// CHECK:        %[[PTR_B:.*]] = load ptr, ptr %[[ASCAST_B]], align 8
+// CHECK:        %[[PTR_C:.*]] = load ptr, ptr %[[ASCAST_C]], align 8
 // CHECK-NEXT:   br label %[[LABEL_TARGET:.*]]
 // CHECK:        [[LABEL_TARGET]]:
 // CHECK:        %[[A:.*]] = load i32, ptr %[[PTR_A]], align 4
diff --git a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
@@ -1,21 +1,24 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
   llvm.func @omp_target_region_() {
     %0 = llvm.mlir.constant(20 : i32) : i32
     %1 = llvm.mlir.constant(10 : i32) : i32
     %2 = llvm.mlir.constant(1 : i64) : i64
-    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr<5>
+    %ascast = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
     %4 = llvm.mlir.constant(1 : i64) : i64
-    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr<5>
+    %ascast2 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
     %6 = llvm.mlir.constant(1 : i64) : i64
-    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
-    llvm.store %1, %3 : i32, !llvm.ptr
-    llvm.store %0, %5 : i32, !llvm.ptr
+    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr<5>
+    %ascast3 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+    llvm.store %1, %ascast : i32, !llvm.ptr
+    llvm.store %0, %ascast2 : i32, !llvm.ptr
     omp.task {
-        %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-        %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-        %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+        %map1 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+        %map2 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+        %map3 = omp.map.info var_ptr(%ascast3 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
       omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
         %8 = llvm.load %arg0 : !llvm.ptr -> i32
         %9 = llvm.load %arg1 : !llvm.ptr -> i32
@@ -36,5 +39,5 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
   }
 }
 
-// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19
+// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l22
 // CHECK: ret void
diff --git a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir b/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
@@ -4,7 +4,7 @@
 // omp.threadprivate does not crash on lowering during the OpenMP target device
 // pass when used in conjunction with target code in the same module.
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
   llvm.func @func() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
     %0 = llvm.mlir.addressof @_QFEpointer2 : !llvm.ptr
     %1 = omp.threadprivate %0 : !llvm.ptr -> !llvm.ptr
@@ -24,7 +24,8 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
 }
 
 // CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) #{{[0-9]+}} {
-// CHECK:  %[[ALLOCA:.*]] = alloca ptr, align 8
-// CHECK:  store ptr %[[ARG1]], ptr %[[ALLOCA]], align 8
-// CHECK:  %[[LOAD_ALLOCA:.*]] = load ptr, ptr %[[ALLOCA]], align 8
+// CHECK:  %[[ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK:  %[[ALLOCA_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA]] to ptr
+// CHECK:  store ptr %[[ARG1]], ptr %[[ALLOCA_ASCAST]], align 8
+// CHECK:  %[[LOAD_ALLOCA:.*]] = load ptr, ptr %[[ALLOCA_ASCAST]], align 8
 // CHECK:  store i32 1, ptr %[[LOAD_ALLOCA]], align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir b/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir
@@ -60,7 +60,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 
 //--- device.mlir
 
-module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
   llvm.func @main(%arg0 : !llvm.ptr) {
     %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr
     omp.target map_entries(%0 -> %ptr : !llvm.ptr) {
diff --git a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir
diff --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
diff --git a/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir b/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir
diff --git a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
diff --git a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir