Skip to content

Commit 2da22cb

Browse files
authored
Merge pull request #9308 from wckzhang/api41x
v4.1.x: common/ofi: Utilize new libfabric API to import memhooks monitor
2 parents e13b194 + 52ffd38 commit 2da22cb

File tree

5 files changed

+171
-5
lines changed

5 files changed

+171
-5
lines changed

config/opal_check_ofi.m4

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ dnl
33
dnl Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved.
44
dnl Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights
55
dnl reserved.
6+
dnl Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights
7+
dnl reserved.
68
dnl $COPYRIGHT$
79
dnl
810
dnl Additional copyrights may follow
@@ -155,6 +157,18 @@ AC_DEFUN([_OPAL_CHECK_OFI],[
155157
[AC_MSG_WARN([OFI libfabric support requested (via --with-ofi or --with-libfabric), but not found.])
156158
AC_MSG_ERROR([Cannot continue.])])
157159
])
160+
opal_ofi_import_monitor=no
161+
AS_IF([test $opal_ofi_happy = "yes"],
162+
[OPAL_CHECK_OFI_VERSION_GE([1,14],
163+
[opal_ofi_import_monitor=yes],
164+
[opal_ofi_import_monitor=no])])
165+
166+
167+
if test "$opal_ofi_import_monitor" = "yes"; then
168+
AC_DEFINE_UNQUOTED([OPAL_OFI_IMPORT_MONITOR_SUPPORT],1,
169+
[Whether libfabric supports monitor import])
170+
fi
171+
158172
])dnl
159173

160174

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Copyright (c) 2014-2021 Cisco Systems, Inc. All rights reserved
66
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
77
* reserved.
8-
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
8+
* Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved.
99
* Copyright (c) 2020 Triad National Security, LLC. All rights
1010
* reserved.
1111
* $COPYRIGHT$
@@ -282,8 +282,7 @@ ompi_mtl_ofi_component_open(void)
282282
"provider_exclude")) {
283283
return OMPI_ERR_NOT_AVAILABLE;
284284
}
285-
286-
return OMPI_SUCCESS;
285+
return opal_common_ofi_init();
287286
}
288287

289288
static int
@@ -298,6 +297,7 @@ static int
298297
ompi_mtl_ofi_component_close(void)
299298
{
300299
opal_common_ofi_mca_deregister();
300+
opal_common_ofi_fini();
301301
return OMPI_SUCCESS;
302302
}
303303

opal/mca/btl/ofi/btl_ofi_component.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* reserved.
1515
* Copyright (c) 2018 Intel, Inc, All rights reserved
1616
*
17-
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
17+
* Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved.
1818
* Copyright (c) 2020 Triad National Security, LLC. All rights
1919
* reserved.
2020
* $COPYRIGHT$
@@ -219,7 +219,7 @@ static int mca_btl_ofi_component_register(void)
219219
static int mca_btl_ofi_component_open(void)
220220
{
221221
mca_btl_ofi_component.module_count = 0;
222-
return OPAL_SUCCESS;
222+
return opal_common_ofi_init();
223223
}
224224

225225
/*
@@ -228,6 +228,7 @@ static int mca_btl_ofi_component_open(void)
228228
static int mca_btl_ofi_component_close(void)
229229
{
230230
opal_common_ofi_mca_deregister();
231+
opal_common_ofi_fini();
231232
/* If we don't sleep, sockets provider freaks out. Ummm this is a scary comment */
232233
sleep(1);
233234
return OPAL_SUCCESS;

opal/mca/common/ofi/common_ofi.c

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
* and Technology (RIST). All rights reserved.
99
* Copyright (c) 2020-2021 Cisco Systems, Inc. All rights reserved
1010
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
11+
* Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights
12+
* reserved.
1113
* $COPYRIGHT$
1214
*
1315
* Additional copyrights may follow
@@ -38,6 +40,120 @@ OPAL_DECLSPEC opal_common_ofi_module_t opal_common_ofi = {
3840
};
3941

4042
static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream,usnic";
43+
static bool opal_common_ofi_initialized = false;
44+
static int opal_common_ofi_init_ref_cnt = 0;
45+
46+
#if OPAL_OFI_IMPORT_MONITOR_SUPPORT
47+
48+
static int opal_common_ofi_monitor_start(struct fid_mem_monitor *monitor)
49+
{
50+
return 0;
51+
}
52+
static void opal_common_ofi_monitor_stop(struct fid_mem_monitor *monitor)
53+
{
54+
return;
55+
}
56+
static int opal_common_ofi_monitor_subscribe(struct fid_mem_monitor *monitor,
57+
const void *addr, size_t len)
58+
{
59+
return 0;
60+
}
61+
static void opal_common_ofi_monitor_unsubscribe(struct fid_mem_monitor *monitor,
62+
const void *addr, size_t len)
63+
{
64+
return;
65+
}
66+
static bool opal_common_ofi_monitor_valid(struct fid_mem_monitor *monitor,
67+
const void *addr, size_t len)
68+
{
69+
return true;
70+
}
71+
72+
static struct fid_mem_monitor *opal_common_ofi_monitor;
73+
static struct fid *opal_common_ofi_cache_fid;
74+
static struct fi_ops_mem_monitor opal_common_ofi_export_ops = {
75+
.size = sizeof(struct fi_ops_mem_monitor),
76+
.start = opal_common_ofi_monitor_start,
77+
.stop = opal_common_ofi_monitor_stop,
78+
.subscribe = opal_common_ofi_monitor_subscribe,
79+
.unsubscribe = opal_common_ofi_monitor_unsubscribe,
80+
.valid = opal_common_ofi_monitor_valid,
81+
};
82+
83+
OPAL_DECLSPEC void opal_common_ofi_mem_release_cb(void *buf, size_t length,
84+
void *cbdata, bool from_alloc)
85+
{
86+
opal_common_ofi_monitor->import_ops->notify(opal_common_ofi_monitor,
87+
buf, length);
88+
}
89+
#endif /* OPAL_OFI_IMPORT_MONITOR_SUPPORT */
90+
91+
OPAL_DECLSPEC int opal_common_ofi_init(void)
92+
{
93+
int ret;
94+
95+
opal_common_ofi_init_ref_cnt++;
96+
if (opal_common_ofi_initialized) {
97+
return OPAL_SUCCESS;
98+
}
99+
#if OPAL_OFI_IMPORT_MONITOR_SUPPORT
100+
101+
mca_base_framework_open(&opal_memory_base_framework, 0);
102+
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT)
103+
!= (((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT))
104+
& opal_mem_hooks_support_level())) {
105+
return OPAL_SUCCESS;
106+
}
107+
108+
ret = fi_open(FI_VERSION(1,13), "mr_cache", NULL, 0, 0, &opal_common_ofi_cache_fid, NULL);
109+
if (ret) {
110+
goto err;
111+
}
112+
113+
opal_common_ofi_monitor = calloc(1, sizeof(*opal_common_ofi_monitor));
114+
if (!opal_common_ofi_monitor) {
115+
goto err;
116+
}
117+
118+
opal_common_ofi_monitor->fid.fclass = FI_CLASS_MEM_MONITOR;
119+
opal_common_ofi_monitor->export_ops = &opal_common_ofi_export_ops;
120+
ret = fi_import_fid(opal_common_ofi_cache_fid, &opal_common_ofi_monitor->fid, 0);
121+
if (ret) {
122+
goto err;
123+
}
124+
opal_mem_hooks_register_release(opal_common_ofi_mem_release_cb, NULL);
125+
opal_common_ofi_initialized = true;
126+
127+
return OPAL_SUCCESS;
128+
err:
129+
if (opal_common_ofi_cache_fid) {
130+
fi_close(opal_common_ofi_cache_fid);
131+
}
132+
if (opal_common_ofi_monitor) {
133+
free(opal_common_ofi_monitor);
134+
}
135+
136+
return OPAL_ERROR;
137+
#else
138+
opal_common_ofi_initialized = true;
139+
return OPAL_SUCCESS;
140+
#endif
141+
}
142+
143+
OPAL_DECLSPEC int opal_common_ofi_fini(void)
144+
{
145+
if (opal_common_ofi_initialized && !--opal_common_ofi_init_ref_cnt) {
146+
#if OPAL_OFI_IMPORT_MONITOR_SUPPORT
147+
opal_mem_hooks_unregister_release(opal_common_ofi_mem_release_cb);
148+
fi_close(opal_common_ofi_cache_fid);
149+
fi_close(&opal_common_ofi_monitor->fid);
150+
free(opal_common_ofi_monitor);
151+
#endif
152+
opal_common_ofi_initialized = false;
153+
}
154+
155+
return OPAL_SUCCESS;
156+
}
41157

42158
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item)
43159
{

opal/mca/common/ofi/common_ofi.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
* reserved.
66
* Copyright (c) 2020 Triad National Security, LLC. All rights
77
* reserved.
8+
* Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights
9+
* reserved.
810
*
911
* $COPYRIGHT$
1012
*
@@ -20,7 +22,11 @@
2022
#include "opal/mca/base/mca_base_var.h"
2123
#include "opal/mca/base/mca_base_framework.h"
2224
#include "opal/util/proc.h"
25+
#include "opal/memoryhooks/memory.h"
2326
#include <rdma/fabric.h>
27+
#if OPAL_OFI_IMPORT_MONITOR_SUPPORT
28+
#include <rdma/fi_ext.h>
29+
#endif
2430

2531
BEGIN_C_DECLS
2632

@@ -33,6 +39,7 @@ typedef struct opal_common_ofi_module {
3339
} opal_common_ofi_module_t;
3440

3541
extern opal_common_ofi_module_t opal_common_ofi;
42+
extern mca_base_framework_t opal_memory_base_framework;
3643

3744
OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component);
3845
OPAL_DECLSPEC void opal_common_ofi_mca_register(void);
@@ -54,6 +61,34 @@ OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void);
5461
*/
5562
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);
5663

64+
#if OPAL_OFI_IMPORT_MONITOR_SUPPORT
65+
/*
66+
* @param buf (IN) Pointer to the start of the allocation
67+
* @param length (IN) Length of the allocation
68+
* @param cbdata (IN) Data passed to memory hooks when callback
69+
* was registered
70+
* @param from_alloc (IN) True if the callback is caused by a call to the
71+
* general allocation routines (malloc, calloc, free,
72+
* etc.) or directly from the user (mmap, munmap, etc.)
73+
*
74+
* Callback function triggered when memory is about to be freed.
75+
* is about to be freed. The callback will be triggered according to
76+
* the note in opal_mem_hooks_register_release().
77+
*
78+
*/
79+
OPAL_DECLSPEC void opal_common_ofi_mem_release_cb(void *buf, size_t length, void *cbdata, bool from_alloc);
80+
#endif /* OPAL_OFI_IMPORT_MONITOR_SUPPORT */
81+
82+
/*
83+
* Initializes common objects for libfabric
84+
*/
85+
OPAL_DECLSPEC int opal_common_ofi_init(void);
86+
87+
/*
88+
* Cleans up common objects for libfabric
89+
*/
90+
OPAL_DECLSPEC int opal_common_ofi_fini(void);
91+
5792
END_C_DECLS
5893

5994
struct fi_info* opal_mca_common_ofi_select_provider(struct fi_info *provider_list,

0 commit comments

Comments
 (0)