Skip to content

Commit e767f43

Browse files
committed
common/cuda: Add public functions for cuMemAlloc and cuFree
Signed-off-by: William Zhang <[email protected]>
1 parent f6ca907 commit e767f43

File tree

4 files changed

+87
-0
lines changed

4 files changed

+87
-0
lines changed

opal/datatype/opal_datatype_cuda.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,59 @@ bool opal_cuda_check_one_buf(char *buf, opal_convertor_t *convertor )
111111
return ( ftable.gpu_is_gpu_buffer(buf, convertor));
112112
}
113113

114+
/*
115+
* This function allocates a buffer using either cuMemAlloc
116+
* or malloc, depending on if the convertor flag CONVERTOR_CUDA
117+
* is set.
118+
*
119+
* @param size Size of buffer to be allocated
120+
* @param convertor The convertor with flags describing if the buf
121+
* should be a Host or Cuda buffer.
122+
*
123+
* @returns void * A pointer to the newly allocated buffer.
124+
*/
125+
void *opal_cuda_malloc(size_t size, opal_convertor_t* convertor)
126+
{
127+
int res;
128+
void* buffer;
129+
if (!(convertor->flags & CONVERTOR_CUDA)) {
130+
return malloc(size);
131+
}
132+
res = ftable.gpu_malloc(buffer, size);
133+
if (res != 0 ) {
134+
opal_output(0, "CUDA: Error in cuMemAlloc: size=%d",
135+
(int)size);
136+
abort();
137+
} else {
138+
return buffer;
139+
}
140+
}
141+
142+
/*
143+
* This function frees a buffer using either cuMemFree() or free(),
144+
* depending on if the convertor flag CONVERTOR_CUDA is set.
145+
*
146+
* @param buffer Pointer to buffer to be freed
147+
* @param convertor The convertor with flags describing if the buf
148+
* should be a Host or Cuda buffer.
149+
*
150+
*/
151+
void opal_cuda_free(void *buffer, opal_convertor_t* convertor)
152+
{
153+
int res;
154+
if (!(convertor->flags & CONVERTOR_CUDA)) {
155+
free(buffer);
156+
return;
157+
}
158+
res = ftable.gpu_free(buffer);
159+
if (res != 0 ) {
160+
opal_output(0, "CUDA: Error in cuMemFree: ptr=%p",
161+
buffer);
162+
abort();
163+
}
164+
return;
165+
}
166+
114167
/*
115168
* With CUDA enabled, all contiguous copies will pass through this function.
116169
* Therefore, the first check is to see if the convertor is a GPU buffer.

opal/datatype/opal_datatype_cuda.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,16 @@ struct opal_common_cuda_function_table {
1818
int (*gpu_cu_memcpy_async)(void*, const void*, size_t, opal_convertor_t*);
1919
int (*gpu_cu_memcpy)(void*, const void*, size_t);
2020
int (*gpu_memmove)(void*, void*, size_t);
21+
int (*gpu_malloc)(void*, size_t);
22+
int (*gpu_free)(void*);
2123
};
2224
typedef struct opal_common_cuda_function_table opal_common_cuda_function_table_t;
2325

2426
void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf);
2527
bool opal_cuda_check_bufs(char *dest, char *src);
2628
bool opal_cuda_check_one_buf(char *buf, opal_convertor_t *convertor );
29+
void* opal_cuda_malloc(size_t size, opal_convertor_t* convertor);
30+
void opal_cuda_free(void * buffer, opal_convertor_t* convertor);
2731
void* opal_cuda_memcpy(void * dest, const void * src, size_t size, opal_convertor_t* convertor);
2832
void* opal_cuda_memcpy_sync(void * dest, const void * src, size_t size);
2933
void* opal_cuda_memmove(void * dest, void * src, size_t size);

opal/mca/common/cuda/common_cuda.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,8 @@ static int mca_common_cuda_stage_two_init(opal_common_cuda_function_table_t *fta
501501
ftable->gpu_cu_memcpy_async = &mca_common_cuda_cu_memcpy_async;
502502
ftable->gpu_cu_memcpy = &mca_common_cuda_cu_memcpy;
503503
ftable->gpu_memmove = &mca_common_cuda_memmove;
504+
ftable->gpu_malloc = &mca_common_cuda_malloc;
505+
ftable->gpu_free = &mca_common_cuda_free;
504506

505507
opal_output_verbose(30, mca_common_cuda_output,
506508
"CUDA: support functions initialized");
@@ -1922,6 +1924,31 @@ static int mca_common_cuda_cu_memcpy(void *dest, const void *src, size_t size)
19221924
return OPAL_SUCCESS;
19231925
}
19241926

1927+
int mca_common_cuda_malloc(void *dptr, size_t size)
1928+
{
1929+
int res;
1930+
res = cuFunc.cuMemAlloc((CUdeviceptr *)dptr, size);
1931+
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
1932+
opal_output(0, "CUDA: cuMemAlloc failed: res=%d",
1933+
res);
1934+
return res;
1935+
}
1936+
return 0;
1937+
}
1938+
1939+
int mca_common_cuda_free(void *dptr)
1940+
{
1941+
int res;
1942+
res = cuFunc.cuMemFree((CUdeviceptr)dptr);
1943+
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
1944+
opal_output(0, "CUDA: cuMemFree failed: res=%d",
1945+
res);
1946+
return res;
1947+
}
1948+
return 0;
1949+
}
1950+
1951+
19251952
static int mca_common_cuda_memmove(void *dest, void *src, size_t size)
19261953
{
19271954
CUdeviceptr tmp;

opal/mca/common/cuda/common_cuda.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ OPAL_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);
5252

5353
OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_rcache_common_cuda_reg_t *rget_reg);
5454

55+
OPAL_DECLSPEC int mca_common_cuda_malloc(void *buffer, size_t size);
56+
OPAL_DECLSPEC int mca_common_cuda_free(void *buffer);
57+
5558
OPAL_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
5659
struct mca_btl_base_descriptor_t *, int *done);
5760

0 commit comments

Comments
 (0)