Skip to content

Commit f6e3dbc

Browse files
authored
Revert "[flang][cuda] Use cuda runtime API" (#104232)
Reverts #103488
1 parent a845dba commit f6e3dbc

File tree

4 files changed

+52
-22
lines changed

4 files changed

+52
-22
lines changed

flang/include/flang/Runtime/CUDA/allocator.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
#include "flang/Runtime/entry-names.h"
1414

1515
#define CUDA_REPORT_IF_ERROR(expr) \
16-
[](cudaError_t err) { \
17-
if (err == cudaSuccess) \
16+
[](CUresult result) { \
17+
if (!result) \
1818
return; \
19-
const char *name = cudaGetErrorName(err); \
19+
const char *name = nullptr; \
20+
cuGetErrorName(result, &name); \
2021
if (!name) \
2122
name = "<unknown>"; \
2223
Terminator terminator{__FILE__, __LINE__}; \

flang/runtime/CUDA/CMakeLists.txt

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,14 @@
77
#===------------------------------------------------------------------------===#
88

99
include_directories(${CUDAToolkit_INCLUDE_DIRS})
10+
find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
1011

1112
add_flang_library(CufRuntime
1213
allocator.cpp
1314
descriptor.cpp
1415
)
15-
16-
if (BUILD_SHARED_LIBS)
17-
set(CUF_LIBRARY ${CUDA_LIBRARIES})
18-
else()
19-
set(CUF_LIBRARY ${CUDA_cudart_static_LIBRARY})
20-
endif()
21-
2216
target_link_libraries(CufRuntime
2317
PRIVATE
2418
FortranRuntime
25-
${CUF_LIBRARY}
19+
${CUDA_RUNTIME_LIBRARY}
2620
)

flang/runtime/CUDA/allocator.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#include "flang/ISO_Fortran_binding_wrapper.h"
1616
#include "flang/Runtime/allocator-registry.h"
1717

18-
#include "cuda_runtime.h"
18+
#include "cuda.h"
1919

2020
namespace Fortran::runtime::cuda {
2121
extern "C" {
@@ -34,28 +34,32 @@ void RTDEF(CUFRegisterAllocator)() {
3434

3535
void *CUFAllocPinned(std::size_t sizeInBytes) {
3636
void *p;
37-
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
37+
CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
3838
return p;
3939
}
4040

41-
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
41+
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }
4242

4343
void *CUFAllocDevice(std::size_t sizeInBytes) {
44-
void *p;
45-
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
46-
return p;
44+
CUdeviceptr p = 0;
45+
CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
46+
return reinterpret_cast<void *>(p);
4747
}
4848

49-
void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
49+
void CUFFreeDevice(void *p) {
50+
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
51+
}
5052

5153
void *CUFAllocManaged(std::size_t sizeInBytes) {
52-
void *p;
54+
CUdeviceptr p = 0;
5355
CUDA_REPORT_IF_ERROR(
54-
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
56+
cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
5557
return reinterpret_cast<void *>(p);
5658
}
5759

58-
void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
60+
void CUFFreeManaged(void *p) {
61+
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
62+
}
5963

6064
void *CUFAllocUnified(std::size_t sizeInBytes) {
6165
// Call alloc managed for the time being.

flang/unittests/Runtime/CUDA/AllocatorCUF.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "flang/Runtime/allocatable.h"
1515
#include "flang/Runtime/allocator-registry.h"
1616

17-
#include "cuda_runtime.h"
17+
#include "cuda.h"
1818

1919
using namespace Fortran::runtime;
2020
using namespace Fortran::runtime::cuda;
@@ -25,9 +25,38 @@ static OwningPtr<Descriptor> createAllocatable(
2525
CFI_attribute_allocatable);
2626
}
2727

28+
thread_local static int32_t defaultDevice = 0;
29+
30+
CUdevice getDefaultCuDevice() {
31+
CUdevice device;
32+
CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice));
33+
return device;
34+
}
35+
36+
class ScopedContext {
37+
public:
38+
ScopedContext() {
39+
// Static reference to CUDA primary context for device ordinal
40+
// defaultDevice.
41+
static CUcontext context = [] {
42+
CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0));
43+
CUcontext ctx;
44+
// Note: this does not affect the current context.
45+
CUDA_REPORT_IF_ERROR(
46+
cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice()));
47+
return ctx;
48+
}();
49+
50+
CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context));
51+
}
52+
53+
~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
54+
};
55+
2856
TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
2957
using Fortran::common::TypeCategory;
3058
RTNAME(CUFRegisterAllocator)();
59+
ScopedContext ctx;
3160
// REAL(4), DEVICE, ALLOCATABLE :: a(:)
3261
auto a{createAllocatable(TypeCategory::Real, 4)};
3362
a->SetAllocIdx(kDeviceAllocatorPos);
@@ -45,6 +74,7 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
4574
TEST(AllocatableCUFTest, SimplePinnedAllocate) {
4675
using Fortran::common::TypeCategory;
4776
RTNAME(CUFRegisterAllocator)();
77+
ScopedContext ctx;
4878
// INTEGER(4), PINNED, ALLOCATABLE :: a(:)
4979
auto a{createAllocatable(TypeCategory::Integer, 4)};
5080
EXPECT_FALSE(a->HasAddendum());
@@ -63,6 +93,7 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) {
6393
TEST(AllocatableCUFTest, DescriptorAllocationTest) {
6494
using Fortran::common::TypeCategory;
6595
RTNAME(CUFRegisterAllocator)();
96+
ScopedContext ctx;
6697
// REAL(4), DEVICE, ALLOCATABLE :: a(:)
6798
auto a{createAllocatable(TypeCategory::Real, 4)};
6899
Descriptor *desc = nullptr;

0 commit comments

Comments
 (0)