diff --git a/flang/lib/Optimizer/Transforms/CUFCommon.cpp b/flang/lib/Optimizer/Transforms/CUFCommon.cpp index 5eca86529f9e1..162df8f9cab9c 100644 --- a/flang/lib/Optimizer/Transforms/CUFCommon.cpp +++ b/flang/lib/Optimizer/Transforms/CUFCommon.cpp @@ -22,9 +22,6 @@ mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod, mlir::OpBuilder builder(ctx); auto gpuMod = builder.create(mod.getLoc(), cudaDeviceModuleName); - llvm::SmallVector targets; - targets.push_back(mlir::NVVM::NVVMTargetAttr::get(ctx)); - gpuMod.setTargetsAttr(builder.getArrayAttr(targets)); mlir::Block::iterator insertPt(mod.getBodyRegion().front().end()); symTab.insert(gpuMod, insertPt); return gpuMod; diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir index 49bb5bdf5e6bc..abf2d56695b17 100644 --- a/flang/test/Fir/CUDA/cuda-alloc-free.fir +++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir @@ -73,7 +73,7 @@ func.func @_QPtest_type() { // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 // CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref, i32) -> !fir.llvm_ptr -gpu.module @cuda_device_mod [#nvvm.target] { +gpu.module @cuda_device_mod { gpu.func @_QMalloc() kernel { %0 = cuf.alloc !fir.box>> {bindc_name = "a", data_attr = #cuf.cuda, uniq_name = "_QMallocEa"} -> !fir.ref>>> gpu.return diff --git a/flang/test/Fir/CUDA/cuda-constructor-2.f90 b/flang/test/Fir/CUDA/cuda-constructor-2.f90 index 99386abc4fafd..901497e2cde55 100644 --- a/flang/test/Fir/CUDA/cuda-constructor-2.f90 +++ b/flang/test/Fir/CUDA/cuda-constructor-2.f90 @@ -10,11 +10,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry>> } - gpu.module @cuda_device_mod [#nvvm.target] { + gpu.module @cuda_device_mod { } } -// CHECK: gpu.module @cuda_device_mod [#nvvm.target] +// CHECK: gpu.module @cuda_device_mod // CHECK: llvm.func internal @__cudaFortranConstructor() { // CHECK-DAG: %[[MODULE:.*]] = cuf.register_module @cuda_device_mod -> !llvm.ptr diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90 index c83a938d5af21..8cac643b27c34 100644 --- a/flang/test/Fir/CUDA/cuda-device-global.f90 +++ b/flang/test/Fir/CUDA/cuda-device-global.f90 @@ -5,9 +5,9 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} { fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda} : !fir.array<5xi32> - gpu.module @cuda_device_mod [#nvvm.target] { + gpu.module @cuda_device_mod { } } -// CHECK: gpu.module @cuda_device_mod [#nvvm.target] +// CHECK: gpu.module @cuda_device_mo // CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda} : !fir.array<5xi32> diff --git a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 index 18b56a491cd65..6707572efb5a8 100644 --- a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 +++ b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 @@ -25,7 +25,7 @@ // Test that global used in device function are flagged with the correct // CHECK: fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %[[CONV]], %{{.*}}) fastmath : (i32, !fir.ref, i32) -> !fir.ref // CHECK: fir.global linkonce @_QQcl[[SYMBOL]] {data_attr = #cuf.cuda} constant : !fir.char<1,32> -// CHECK-LABEL: gpu.module @cuda_device_mod [#nvvm.target] +// CHECK-LABEL: gpu.module @cuda_device_mod // CHECK: fir.global linkonce @_QQclX6995815537abaf90e86ce166af128f3a // ----- @@ -51,5 +51,5 @@ // Test that global used in device function are flagged with the correct // CHECK: fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %[[CONV]], %{{.*}}) fastmath : (i32, !fir.ref, i32) -> !fir.ref // CHECK: fir.global linkonce @_QQcl[[SYMBOL]] constant : !fir.char<1,32> -// CHECK-LABEL: gpu.module @cuda_device_mod [#nvvm.target] +// CHECK-LABEL: gpu.module @cuda_device_mod // CHECK-NOT: fir.global linkonce @_QQclX6995815537abaf90e86ce166af128f3a