Skip to content

Commit b220c5d

Browse files
wckzhangPalmer Stolly
and
Palmer Stolly
committed
mtl/ofi: Add memory Registration & Deregistration, pass descriptors to Libfabric
Adds functions for memory registration & deregistration. Modifies MTL/OFI to use those functions to register memory, send the registered buffer memory descriptor as a parameter to the Libfabric API, and deregisters the memory after the request is finished. This patch alongside an earlier commit fixes an issue with using heterogenous device send buffers for the MTL's. Signed-off-by: William Zhang <[email protected]> Co-authored-by: William Zhang <[email protected]> Co-authored-by: Palmer Stolly <[email protected]>
1 parent e767f43 commit b220c5d

File tree

3 files changed

+308
-19
lines changed

3 files changed

+308
-19
lines changed

ompi/mca/mtl/base/mtl_base_datatype.h

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,83 @@
2525
#include "ompi/datatype/ompi_datatype.h"
2626
#include "opal/datatype/opal_convertor.h"
2727
#include "opal/datatype/opal_datatype_internal.h"
28+
#if OPAL_CUDA_SUPPORT
29+
#include "opal/mca/common/cuda/common_cuda.h"
30+
#include "opal/datatype/opal_convertor.h"
31+
#include "opal/datatype/opal_datatype_cuda.h"
32+
#endif
2833

2934
#ifndef MTL_BASE_DATATYPE_H_INCLUDED
3035
#define MTL_BASE_DATATYPE_H_INCLUDED
3136

37+
#if OPAL_CUDA_SUPPORT
38+
static int
39+
ompi_mtl_cuda_datatype_pack(struct opal_convertor_t *convertor,
40+
void **buffer,
41+
size_t *buffer_len,
42+
bool *freeAfter)
43+
{
44+
45+
struct iovec iov;
46+
uint32_t iov_count = 1;
47+
int is_cuda = convertor->flags & CONVERTOR_CUDA;
48+
49+
#if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT)
50+
if (convertor->pDesc &&
51+
!(convertor->flags & CONVERTOR_COMPLETED) &&
52+
opal_datatype_is_contiguous_memory_layout(convertor->pDesc,
53+
convertor->count)) {
54+
*freeAfter = false;
55+
*buffer = convertor->pBaseBuf;
56+
*buffer_len = convertor->local_size;
57+
return OPAL_SUCCESS;
58+
}
59+
#endif
60+
61+
opal_convertor_get_packed_size(convertor, buffer_len);
62+
*freeAfter = false;
63+
if( 0 == *buffer_len ) {
64+
*buffer = NULL;
65+
return OMPI_SUCCESS;
66+
}
67+
iov.iov_len = *buffer_len;
68+
iov.iov_base = NULL;
69+
/* opal_convertor_need_buffers always returns true
70+
* if CONVERTOR_CUDA is set, so unset temporarily
71+
*/
72+
convertor->flags &= ~CONVERTOR_CUDA;
73+
74+
if (opal_convertor_need_buffers(convertor)) {
75+
if (is_cuda) {
76+
convertor->flags |= CONVERTOR_CUDA;
77+
}
78+
iov.iov_base = opal_cuda_malloc(*buffer_len, convertor);
79+
if (NULL == iov.iov_base) return OMPI_ERR_OUT_OF_RESOURCE;
80+
*freeAfter = true;
81+
} else if (is_cuda) {
82+
convertor->flags |= CONVERTOR_CUDA;
83+
}
84+
85+
opal_convertor_pack( convertor, &iov, &iov_count, buffer_len );
86+
87+
*buffer = iov.iov_base;
88+
89+
return OMPI_SUCCESS;
90+
}
91+
#endif
92+
3293
__opal_attribute_always_inline__ static inline int
3394
ompi_mtl_datatype_pack(struct opal_convertor_t *convertor,
3495
void **buffer,
3596
size_t *buffer_len,
3697
bool *freeAfter)
3798
{
99+
#if OPAL_CUDA_SUPPORT
100+
return ompi_mtl_cuda_datatype_pack(convertor,
101+
buffer,
102+
buffer_len,
103+
freeAfter);
104+
#endif
38105
struct iovec iov;
39106
uint32_t iov_count = 1;
40107

@@ -71,13 +138,56 @@ ompi_mtl_datatype_pack(struct opal_convertor_t *convertor,
71138
return OMPI_SUCCESS;
72139
}
73140

141+
#if OPAL_CUDA_SUPPORT
142+
static int
143+
ompi_mtl_cuda_datatype_recv_buf(struct opal_convertor_t *convertor,
144+
void ** buffer,
145+
size_t *buffer_len,
146+
bool *free_on_error)
147+
{
148+
int is_cuda = convertor->flags & CONVERTOR_CUDA;
149+
opal_convertor_get_packed_size(convertor, buffer_len);
150+
*free_on_error = false;
151+
if( 0 == *buffer_len ) {
152+
*buffer = NULL;
153+
*buffer_len = 0;
154+
return OMPI_SUCCESS;
155+
}
156+
/* opal_convertor_need_buffers always returns true
157+
* if CONVERTOR_CUDA is set, so unset temporarily
158+
*/
159+
convertor->flags &= ~CONVERTOR_CUDA;
160+
if (opal_convertor_need_buffers(convertor)) {
161+
if (is_cuda) {
162+
convertor->flags |= CONVERTOR_CUDA;
163+
}
164+
*buffer = opal_cuda_malloc(*buffer_len, convertor);
165+
*free_on_error = true;
166+
} else {
167+
if (is_cuda) {
168+
convertor->flags |= CONVERTOR_CUDA;
169+
}
170+
*buffer = convertor->pBaseBuf +
171+
convertor->use_desc->desc[convertor->use_desc->used].end_loop.first_elem_disp;
172+
}
173+
return OMPI_SUCCESS;
174+
175+
}
176+
#endif
74177

75178
__opal_attribute_always_inline__ static inline int
76179
ompi_mtl_datatype_recv_buf(struct opal_convertor_t *convertor,
77180
void ** buffer,
78181
size_t *buffer_len,
79182
bool *free_on_error)
80183
{
184+
#if OPAL_CUDA_SUPPORT
185+
return ompi_mtl_cuda_datatype_recv_buf(convertor,
186+
buffer,
187+
buffer_len,
188+
free_on_error);
189+
#endif
190+
81191
opal_convertor_get_packed_size(convertor, buffer_len);
82192
*free_on_error = false;
83193
if( 0 == *buffer_len ) {
@@ -95,12 +205,48 @@ ompi_mtl_datatype_recv_buf(struct opal_convertor_t *convertor,
95205
return OMPI_SUCCESS;
96206
}
97207

208+
#if OPAL_CUDA_SUPPORT
209+
static int
210+
ompi_mtl_cuda_datatype_unpack(struct opal_convertor_t *convertor,
211+
void *buffer,
212+
size_t buffer_len) {
213+
struct iovec iov;
214+
uint32_t iov_count = 1;
215+
int is_cuda = convertor->flags & CONVERTOR_CUDA;
216+
/* opal_convertor_need_buffers always returns true
217+
* if CONVERTOR_CUDA is set, so unset temporarily
218+
*/
219+
convertor->flags &= ~CONVERTOR_CUDA;
220+
221+
if (buffer_len > 0 && opal_convertor_need_buffers(convertor)) {
222+
iov.iov_len = buffer_len;
223+
iov.iov_base = buffer;
224+
225+
if (is_cuda) {
226+
convertor->flags |= CONVERTOR_CUDA;
227+
}
228+
opal_convertor_unpack(convertor, &iov, &iov_count, &buffer_len );
229+
230+
opal_cuda_free(buffer, convertor);
231+
} else if (is_cuda) {
232+
convertor->flags |= CONVERTOR_CUDA;
233+
}
234+
235+
return OMPI_SUCCESS;
236+
237+
}
238+
#endif
98239

99240
__opal_attribute_always_inline__ static inline int
100241
ompi_mtl_datatype_unpack(struct opal_convertor_t *convertor,
101242
void *buffer,
102243
size_t buffer_len)
103244
{
245+
#if OPAL_CUDA_SUPPORT
246+
return ompi_mtl_cuda_datatype_unpack(convertor,
247+
buffer,
248+
buffer_len);
249+
#endif
104250
struct iovec iov;
105251
uint32_t iov_count = 1;
106252

ompi/mca/mtl/ofi/help-mtl-ofi.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,12 @@ recoverable and your application is likely to abort.
7777
Error: %s (%d)
7878
[message too big]
7979
Message size %llu bigger than supported by selected transport. Max = %llu
80+
81+
[Buffer Memory Registration Failed]
82+
Open MPI failed to register your buffer.
83+
This error is fatal, your job will abort
84+
85+
Buffer Type: %s
86+
Buffer Address: %p
87+
Buffer Length: %d
88+
Error: %s (%zd)

0 commit comments

Comments
 (0)