diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h index b73bc390ea8c9..e9f61932230e9 100644 --- a/flang/include/flang/Runtime/CUDA/common.h +++ b/flang/include/flang/Runtime/CUDA/common.h @@ -30,7 +30,7 @@ static constexpr unsigned kDeviceToDevice = 2; const char *name = cudaGetErrorName(err); \ if (!name) \ name = ""; \ - Terminator terminator{__FILE__, __LINE__}; \ + Fortran::runtime::Terminator terminator{__FILE__, __LINE__}; \ terminator.Crash("'%s' failed with '%s'", #expr, name); \ }(expr) diff --git a/flang/include/flang/Runtime/CUDA/kernel.h b/flang/include/flang/Runtime/CUDA/kernel.h new file mode 100644 index 0000000000000..cf07d874a082c --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/kernel.h @@ -0,0 +1,27 @@ +//===-- include/flang/Runtime/CUDA/kernel.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_KERNEL_H_ +#define FORTRAN_RUNTIME_CUDA_KERNEL_H_ + +#include "flang/Runtime/entry-names.h" +#include +#include + +extern "C" { + +// This function uses intptr_t instead of CUDA's unsigned int to match +// the type of MLIR's index type. This avoids the need for casts in the +// generated MLIR code. +void RTDEF(CUFLaunchKernel)(const void *kernelName, intptr_t gridX, + intptr_t gridY, intptr_t gridZ, intptr_t blockX, intptr_t blockY, + intptr_t blockZ, int32_t smem, void **params, void **extra); + +} // extern "C" + +#endif // FORTRAN_RUNTIME_CUDA_KERNEL_H_ diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index 86523b419f871..ce87f3efdc363 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME} allocator.cpp allocatable.cpp descriptor.cpp + kernel.cpp memory.cpp registration.cpp ) diff --git a/flang/runtime/CUDA/kernel.cpp b/flang/runtime/CUDA/kernel.cpp new file mode 100644 index 0000000000000..f81153a1af4bc --- /dev/null +++ b/flang/runtime/CUDA/kernel.cpp @@ -0,0 +1,33 @@ +//===-- runtime/CUDA/kernel.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/kernel.h" +#include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +extern "C" { + +void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY, + intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ, + int32_t smem, void **params, void **extra) { + dim3 gridDim; + gridDim.x = gridX; + gridDim.y = gridY; + gridDim.z = gridZ; + dim3 blockDim; + blockDim.x = blockX; + blockDim.y = blockY; + blockDim.z = blockZ; + cudaStream_t stream = 0; + CUDA_REPORT_IF_ERROR( + cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, stream)); +} + +} // extern "C"