Skip to content

Commit 22074d7

Browse files
committed
opal: rework mpool and rcache frameworks
This commit rewrites both the mpool and rcache frameworks. Summary of changes: - Before this change a significant portion of the rcache functionality lived in mpool components. This meant that it was impossible to add a new memory pool to use with rdma networks (ugni, openib, etc) without duplicating the functionality of an existing mpool component. All the registration functionality has been removed from the mpool and placed in the rcache framework. - All registration cache mpools components (udreg, grdma, gpusm, rgpusm) have been changed to rcache components. rcaches are allocated and released in the same way mpool components were. - It is now valid to pass NULL as the resources argument when creating an rcache. At this time the gpusm and rgpusm components support this. All other rcache components require non-NULL resources. - A new mpool component has been added: hugepage. This component supports huge page allocations on linux. - Memory pools are now allocated using "hints". Each mpool component is queried with the hints and returns a priority. The current hints supported are NULL (uses posix_memalign/malloc), page_size=x (huge page mpool), and mpool=x. - The sm mpool has been moved to common/sm. This reflects that the sm mpool is specialized and not meant for any general allocations. This mpool may be moved back into the mpool framework if there is any objection. - The opal_free_list_init arguments have been updated. The unused0 argument is not used to pass in the registration cache module. The mpool registration flags are now rcache registration flags. - All components have been updated to make use of the new framework interfaces. As this commit makes significant changes to both the mpool and rcache frameworks both versions have been bumped to 3.0.0. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent a80e5e5 commit 22074d7

File tree

125 files changed

+4522
-5045
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+4522
-5045
lines changed

ompi/mca/crcp/base/base.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -9,6 +10,8 @@
910
* University of Stuttgart. All rights reserved.
1011
* Copyright (c) 2004-2005 The Regents of the University of California.
1112
* All rights reserved.
13+
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
14+
* reserved.
1215
* $COPYRIGHT$
1316
*
1417
* Additional copyrights may follow
@@ -145,7 +148,7 @@ BEGIN_C_DECLS
145148
ompi_crcp_base_btl_state_t*
146149
ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
147150
struct mca_btl_base_endpoint_t* endpoint,
148-
mca_mpool_base_registration_t* registration,
151+
mca_rcache_base_registration_t* registration,
149152
struct opal_convertor_t* convertor,
150153
size_t reserve,
151154
size_t* size,
@@ -154,7 +157,7 @@ BEGIN_C_DECLS
154157
ompi_crcp_base_btl_state_t*
155158
ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl,
156159
struct mca_btl_base_endpoint_t* endpoint,
157-
mca_mpool_base_registration_t* registration,
160+
mca_rcache_base_registration_t* registration,
158161
struct opal_convertor_t* convertor,
159162
size_t reserve,
160163
size_t* size,

ompi/mca/crcp/base/crcp_base_fns.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ ompi_crcp_base_none_btl_free( struct mca_btl_base_module_t* btl,
336336
ompi_crcp_base_btl_state_t*
337337
ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
338338
struct mca_btl_base_endpoint_t* endpoint,
339-
mca_mpool_base_registration_t* registration,
339+
mca_rcache_base_registration_t* registration,
340340
struct opal_convertor_t* convertor,
341341
size_t reserve,
342342
size_t* size,
@@ -349,7 +349,7 @@ ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
349349
ompi_crcp_base_btl_state_t*
350350
ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl,
351351
struct mca_btl_base_endpoint_t* endpoint,
352-
mca_mpool_base_registration_t* registration,
352+
mca_rcache_base_registration_t* registration,
353353
struct opal_convertor_t* convertor,
354354
size_t reserve,
355355
size_t* size,

ompi/mca/crcp/crcp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_free_fn_t)
235235
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_prepare_fn_t)
236236
( struct mca_btl_base_module_t* btl,
237237
struct mca_btl_base_endpoint_t* endpoint,
238-
mca_mpool_base_registration_t* registration,
238+
mca_rcache_base_registration_t* registration,
239239
struct opal_convertor_t* convertor,
240240
size_t reserve,
241241
size_t* size,

ompi/mca/pml/base/pml_base_bsend.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -12,6 +13,8 @@
1213
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
1314
* Copyright (c) 2015 Research Organization for Information Science
1415
* and Technology (RIST). All rights reserved.
16+
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
17+
* reserved.
1518
* $COPYRIGHT$
1619
*
1720
* Additional copyrights may follow
@@ -55,10 +58,7 @@ extern char *ompi_pml_base_bsend_allocator_name;
5558
/*
5659
* Routine to return pages to sub-allocator as needed
5760
*/
58-
static void* mca_pml_bsend_alloc_segment(
59-
struct mca_mpool_base_module_t* module,
60-
size_t* size_inout,
61-
mca_mpool_base_registration_t** registration)
61+
static void* mca_pml_bsend_alloc_segment(void *ctx, size_t *size_inout)
6262
{
6363
void *addr;
6464
size_t size = *size_inout;
@@ -70,7 +70,6 @@ static void* mca_pml_bsend_alloc_segment(
7070
addr = mca_pml_bsend_addr;
7171
mca_pml_bsend_addr += size;
7272
*size_inout = size;
73-
if (NULL != registration) *registration = NULL;
7473
return addr;
7574
}
7675

@@ -232,7 +231,7 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request)
232231

233232
/* allocate a buffer to hold packed message */
234233
sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
235-
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL);
234+
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0);
236235
if(NULL == sendreq->req_addr) {
237236
/* release resources when request is freed */
238237
sendreq->req_base.req_pml_complete = true;
@@ -287,7 +286,7 @@ int mca_pml_base_bsend_request_alloc(ompi_request_t* request)
287286

288287
/* allocate a buffer to hold packed message */
289288
sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
290-
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL);
289+
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0);
291290
if(NULL == sendreq->req_addr) {
292291
/* release resources when request is freed */
293292
sendreq->req_base.req_pml_complete = true;
@@ -321,7 +320,7 @@ void* mca_pml_base_bsend_request_alloc_buf( size_t length )
321320

322321
/* allocate a buffer to hold packed message */
323322
buf = mca_pml_bsend_allocator->alc_alloc(
324-
mca_pml_bsend_allocator, length, 0, NULL);
323+
mca_pml_bsend_allocator, length, 0);
325324
if(NULL == buf) {
326325
/* release resources when request is freed */
327326
OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);

ompi/mca/pml/ob1/pml_ob1_component.c

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,9 @@ mca_pml_base_component_2_0_0_t mca_pml_ob1_component = {
7979
.pmlm_finalize = mca_pml_ob1_component_fini,
8080
};
8181

82-
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool,
83-
size_t* size,
84-
mca_mpool_base_registration_t** registration);
82+
void *mca_pml_ob1_seg_alloc (void *ctx, size_t* size);
8583

86-
void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool,
87-
void* segment );
84+
void mca_pml_ob1_seg_free (void *ctx, void *segment);
8885

8986
static inline int mca_pml_ob1_param_register_int(
9087
const char* param_name,
@@ -354,13 +351,12 @@ int mca_pml_ob1_component_fini(void)
354351
return OMPI_SUCCESS;
355352
}
356353

357-
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool,
358-
size_t* size,
359-
mca_mpool_base_registration_t** registration) {
354+
void *mca_pml_ob1_seg_alloc (void *ctx, size_t *size)
355+
{
360356
return malloc(*size);
361357
}
362358

363-
void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool,
364-
void* segment ) {
359+
void mca_pml_ob1_seg_free (void *ctx, void *segment)
360+
{
365361
free(segment);
366362
}

ompi/mca/pml/ob1/pml_ob1_recvfrag.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ do { \
8181
buffers[0].addr = (char*) \
8282
mca_pml_ob1.allocator->alc_alloc( mca_pml_ob1.allocator, \
8383
buffers[0].len, \
84-
0, NULL); \
84+
0); \
8585
_ptr = (unsigned char*)(buffers[0].addr); \
8686
macro_segments[0].seg_addr.pval = buffers[0].addr; \
8787
} \

ompi/mca/vprotocol/base/vprotocol_base_request.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ int mca_vprotocol_base_request_parasite(void)
4242
pml_fl_save.fl_max_to_alloc,
4343
pml_fl_save.fl_num_per_alloc,
4444
pml_fl_save.fl_mpool,
45-
pml_fl_save.fl_mpool_reg_flags,
46-
0,
45+
pml_fl_save.fl_rcache_reg_flags,
46+
pml_fl_save.fl_rcache,
4747
pml_fl_save.item_init,
4848
pml_fl_save.ctx);
4949
if(OMPI_SUCCESS != ret) return ret;
@@ -71,8 +71,8 @@ int mca_vprotocol_base_request_parasite(void)
7171
pml_fl_save.fl_max_to_alloc,
7272
pml_fl_save.fl_num_per_alloc,
7373
pml_fl_save.fl_mpool,
74-
pml_fl_save.fl_mpool_reg_flags,
75-
0,
74+
pml_fl_save.fl_rcache_reg_flags,
75+
pml_fl_save.fl_rcache,
7676
pml_fl_save.item_init,
7777
pml_fl_save.ctx);
7878
if(OMPI_SUCCESS != ret) return ret;

ompi/mpi/c/alloc_mem.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -12,6 +13,8 @@
1213
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
1314
* Copyright (c) 2015 Research Organization for Information Science
1415
* and Technology (RIST). All rights reserved.
16+
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
17+
* reserved.
1518
* $COPYRIGHT$
1619
*
1720
* Additional copyrights may follow
@@ -43,6 +46,8 @@ static const char FUNC_NAME[] = "MPI_Alloc_mem";
4346

4447
int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
4548
{
49+
char info_value[MPI_MAX_INFO_VAL + 1];
50+
char *mpool_hints = NULL;
4651

4752
if (MPI_PARAM_CHECK) {
4853
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
@@ -69,7 +74,16 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
6974

7075
OPAL_CR_ENTER_LIBRARY();
7176

72-
*((void **) baseptr) = mca_mpool_base_alloc((size_t) size, (struct opal_info_t*)info);
77+
if (MPI_INFO_NULL != info) {
78+
int flag;
79+
(void) ompi_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag);
80+
if (flag) {
81+
mpool_hints = info_value;
82+
}
83+
}
84+
85+
*((void **) baseptr) = mca_mpool_base_alloc ((size_t) size, (struct opal_info_t*)info,
86+
mpool_hints);
7387
OPAL_CR_EXIT_LIBRARY();
7488
if (NULL == *((void **) baseptr)) {
7589
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM,

ompi/runtime/ompi_mpi_init.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -620,13 +620,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
620620

621621
/* Select which MPI components to use */
622622

623-
if (OMPI_SUCCESS !=
624-
(ret = mca_mpool_base_init(OPAL_ENABLE_PROGRESS_THREADS,
625-
ompi_mpi_thread_multiple))) {
626-
error = "mca_mpool_base_init() failed";
627-
goto error;
628-
}
629-
630623
if (OMPI_SUCCESS !=
631624
(ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS,
632625
ompi_mpi_thread_multiple))) {

opal/class/opal_free_list.c

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include "opal/align.h"
2929
#include "opal/util/output.h"
3030
#include "opal/mca/mpool/mpool.h"
31+
#include "opal/mca/mpool/base/base.h"
32+
#include "opal/mca/rcache/rcache.h"
3133

3234
typedef struct opal_free_list_item_t opal_free_list_memory_t;
3335

@@ -49,17 +51,22 @@ static void opal_free_list_construct(opal_free_list_t* fl)
4951
fl->fl_payload_buffer_alignment = 0;
5052
fl->fl_frag_class = OBJ_CLASS(opal_free_list_item_t);
5153
fl->fl_mpool = NULL;
54+
fl->fl_rcache = NULL;
5255
/* default flags */
53-
fl->fl_mpool_reg_flags = MCA_MPOOL_FLAGS_CACHE_BYPASS |
54-
MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM;
56+
fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS |
57+
MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM;
5558
fl->ctx = NULL;
5659
OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
5760
}
5861

5962
static void opal_free_list_allocation_release (opal_free_list_t *fl, opal_free_list_memory_t *fl_mem)
6063
{
64+
if (NULL != fl->fl_rcache) {
65+
fl->fl_rcache->rcache_deregister (fl->fl_rcache, fl_mem->registration);
66+
}
67+
6168
if (NULL != fl->fl_mpool) {
62-
fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr, fl_mem->registration);
69+
fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr);
6370
} else if (fl_mem->ptr) {
6471
free (fl_mem->ptr);
6572
}
@@ -108,8 +115,9 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
108115
opal_class_t *frag_class, size_t payload_buffer_size,
109116
size_t payload_buffer_alignment, int num_elements_to_alloc,
110117
int max_elements_to_alloc, int num_elements_per_alloc,
111-
mca_mpool_base_module_t* mpool, int mpool_reg_flags,
112-
void *unused0, opal_free_list_item_init_fn_t item_init, void *ctx)
118+
mca_mpool_base_module_t *mpool, int rcache_reg_flags,
119+
mca_rcache_base_module_t *rcache, opal_free_list_item_init_fn_t item_init,
120+
void *ctx)
113121
{
114122
/* alignment must be more than zero and power of two */
115123
if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1))) {
@@ -137,11 +145,12 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
137145
flist->fl_max_to_alloc = max_elements_to_alloc;
138146
flist->fl_num_allocated = 0;
139147
flist->fl_num_per_alloc = num_elements_per_alloc;
140-
flist->fl_mpool = mpool;
148+
flist->fl_mpool = mpool ? mpool : mca_mpool_base_default_module;
149+
flist->fl_rcache = rcache;
141150
flist->fl_frag_alignment = frag_alignment;
142151
flist->fl_payload_buffer_alignment = payload_buffer_alignment;
143152
flist->item_init = item_init;
144-
flist->fl_mpool_reg_flags |= mpool_reg_flags;
153+
flist->fl_rcache_reg_flags |= rcache_reg_flags;
145154
flist->ctx = ctx;
146155

147156
if (num_elements_to_alloc) {
@@ -153,10 +162,10 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
153162

154163
int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
155164
{
156-
unsigned char *ptr, *mpool_alloc_ptr = NULL, *payload_ptr = NULL;
165+
unsigned char *ptr, *payload_ptr = NULL;
157166
opal_free_list_memory_t *alloc_ptr;
158167
size_t alloc_size, head_size, elem_size = 0;
159-
mca_mpool_base_registration_t *reg = NULL;
168+
mca_rcache_base_registration_t *reg = NULL;
160169
int rc = OPAL_SUCCESS;
161170

162171
if (flist->fl_max_to_alloc && (flist->fl_num_allocated + num_elements) >
@@ -187,30 +196,29 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
187196
assert (elem_size > 0);
188197

189198
/* allocate the rest from the mpool (or use memalign/malloc) */
190-
if(flist->fl_mpool != NULL) {
191-
payload_ptr = mpool_alloc_ptr =
192-
(unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool,
193-
num_elements * elem_size,
194-
flist->fl_payload_buffer_alignment,
195-
flist->fl_mpool_reg_flags, &reg);
196-
} else {
197-
#ifdef HAVE_POSIX_MEMALIGN
198-
posix_memalign ((void **) &mpool_alloc_ptr, flist->fl_payload_buffer_alignment,
199-
num_elements * elem_size);
200-
payload_ptr = mpool_alloc_ptr;
201-
#else
202-
mpool_alloc_ptr = (unsigned char *) malloc (num_elements * elem_size +
203-
flist->fl_payload_buffer_alignment);
204-
payload_ptr = (unsigned char *) OPAL_ALIGN((uintptr_t)mpool_alloc_ptr,
205-
flist->fl_payload_buffer_alignment,
206-
uintptr_t);
207-
#endif
208-
}
209-
210-
if(NULL == mpool_alloc_ptr) {
199+
payload_ptr =
200+
(unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool,
201+
num_elements * elem_size,
202+
flist->fl_payload_buffer_alignment, 0);
203+
if (NULL == payload_ptr) {
211204
free(alloc_ptr);
212205
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
213206
}
207+
208+
if (flist->fl_rcache) {
209+
rc = flist->fl_rcache->rcache_register (flist->fl_rcache, payload_ptr, num_elements * elem_size,
210+
flist->fl_rcache_reg_flags, MCA_RCACHE_ACCESS_ANY, &reg);
211+
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
212+
free (alloc_ptr);
213+
if (flist->fl_mpool) {
214+
flist->fl_mpool->mpool_free (flist->fl_mpool, payload_ptr);
215+
} else {
216+
free (payload_ptr);
217+
}
218+
219+
return rc;
220+
}
221+
}
214222
}
215223

216224
/* make the alloc_ptr a list item, save the chunk in the allocations list,
@@ -219,7 +227,7 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
219227
opal_list_append(&(flist->fl_allocations), (opal_list_item_t*)alloc_ptr);
220228

221229
alloc_ptr->registration = reg;
222-
alloc_ptr->ptr = mpool_alloc_ptr;
230+
alloc_ptr->ptr = payload_ptr;
223231

224232
ptr = (unsigned char*)alloc_ptr + sizeof(opal_free_list_memory_t);
225233
ptr = OPAL_ALIGN_PTR(ptr, flist->fl_frag_alignment, unsigned char*);

0 commit comments

Comments
 (0)