common/cuda: Add public functions for cuMemAlloc and cuFree

wckzhang · wckzhang · commit e767f4362ace · 2021-02-26T15:08:25.000-08:00
Signed-off-by: William Zhang &lt;wilzhang@amazon.com&gt;
diff --git a/opal/datatype/opal_datatype_cuda.c b/opal/datatype/opal_datatype_cuda.c
@@ -111,6 +111,59 @@ bool  opal_cuda_check_one_buf(char *buf, opal_convertor_t *convertor )
     return ( ftable.gpu_is_gpu_buffer(buf, convertor));
 }
 
+/*
+ * This function allocates a buffer using either cuMemAlloc
+ * or malloc, depending on if the convertor flag CONVERTOR_CUDA
+ * is set.
+ *
+ * @param size       Size of buffer to be allocated
+ * @param convertor  The convertor with flags describing if the buf
+ *                   should be a Host or Cuda buffer.
+ *
+ * @returns void *   A pointer to the newly allocated buffer.
+ */
+void *opal_cuda_malloc(size_t size, opal_convertor_t* convertor)
+{
+    int res;
+    void* buffer;
+    if (!(convertor->flags & CONVERTOR_CUDA)) {
+        return malloc(size);
+    }
+    res = ftable.gpu_malloc(buffer, size);
+    if (res != 0 ) {
+        opal_output(0, "CUDA: Error in cuMemAlloc: size=%d",
+                    (int)size);
+        abort();
+    } else {
+        return buffer;
+    }
+}
+
+/*
+ * This function frees a buffer using either cuMemFree() or free(),
+ * depending on if the convertor flag CONVERTOR_CUDA is set.
+ *
+ * @param buffer     Pointer to buffer to be freed
+ * @param convertor  The convertor with flags describing if the buf
+ *                   should be a Host or Cuda buffer.
+ *
+ */
+void opal_cuda_free(void *buffer, opal_convertor_t* convertor)
+{
+    int res;
+    if (!(convertor->flags & CONVERTOR_CUDA)) {
+        free(buffer);
+        return;
+    }
+    res = ftable.gpu_free(buffer);
+    if (res != 0 ) {
+        opal_output(0, "CUDA: Error in cuMemFree: ptr=%p",
+                    buffer);
+        abort();
+    }
+    return;
+}
+
 /*
  * With CUDA enabled, all contiguous copies will pass through this function.
  * Therefore, the first check is to see if the convertor is a GPU buffer.
diff --git a/opal/datatype/opal_datatype_cuda.h b/opal/datatype/opal_datatype_cuda.h
@@ -18,12 +18,16 @@ struct opal_common_cuda_function_table {
     int (*gpu_cu_memcpy_async)(void*, const void*, size_t, opal_convertor_t*);
     int (*gpu_cu_memcpy)(void*, const void*, size_t);
     int (*gpu_memmove)(void*, void*, size_t);
+    int (*gpu_malloc)(void*, size_t);
+    int (*gpu_free)(void*);
 };
 typedef struct opal_common_cuda_function_table opal_common_cuda_function_table_t;
 
 void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf);
 bool opal_cuda_check_bufs(char *dest, char *src);
 bool opal_cuda_check_one_buf(char *buf, opal_convertor_t *convertor );
+void* opal_cuda_malloc(size_t size, opal_convertor_t* convertor);
+void opal_cuda_free(void * buffer, opal_convertor_t* convertor);
 void* opal_cuda_memcpy(void * dest, const void * src, size_t size, opal_convertor_t* convertor);
 void* opal_cuda_memcpy_sync(void * dest, const void * src, size_t size);
 void* opal_cuda_memmove(void * dest, void * src, size_t size);
diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c
@@ -501,6 +501,8 @@ static int mca_common_cuda_stage_two_init(opal_common_cuda_function_table_t *fta
     ftable->gpu_cu_memcpy_async = &mca_common_cuda_cu_memcpy_async;
     ftable->gpu_cu_memcpy = &mca_common_cuda_cu_memcpy;
     ftable->gpu_memmove = &mca_common_cuda_memmove;
+    ftable->gpu_malloc = &mca_common_cuda_malloc;
+    ftable->gpu_free = &mca_common_cuda_free;
 
     opal_output_verbose(30, mca_common_cuda_output,
                         "CUDA: support functions initialized");
@@ -1922,6 +1924,31 @@ static int mca_common_cuda_cu_memcpy(void *dest, const void *src, size_t size)
     return OPAL_SUCCESS;
 }
 
+int mca_common_cuda_malloc(void *dptr, size_t size)
+{
+    int res;
+    res = cuFunc.cuMemAlloc((CUdeviceptr *)dptr, size);
+    if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
+        opal_output(0, "CUDA: cuMemAlloc failed: res=%d",
+                    res);
+        return res;
+    }
+    return 0;
+}
+
+int mca_common_cuda_free(void *dptr)
+{
+    int res;
+    res = cuFunc.cuMemFree((CUdeviceptr)dptr);
+    if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
+        opal_output(0, "CUDA: cuMemFree failed: res=%d",
+                    res);
+        return res;
+    }
+    return 0;
+}
+
+
 static int mca_common_cuda_memmove(void *dest, void *src, size_t size)
 {
     CUdeviceptr tmp;
diff --git a/opal/mca/common/cuda/common_cuda.h b/opal/mca/common/cuda/common_cuda.h
@@ -52,6 +52,9 @@ OPAL_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);
 
 OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_rcache_common_cuda_reg_t *rget_reg);
 
+OPAL_DECLSPEC int mca_common_cuda_malloc(void *buffer, size_t size);
+OPAL_DECLSPEC int mca_common_cuda_free(void *buffer);
+
 OPAL_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
                                          struct mca_btl_base_descriptor_t *, int *done);