Skip to content

Commit 6685213

Browse files
authored
Merge pull request #10365 from jjhursey/v4.1-fix-ucx-common
common/ucx: fix variable registration
2 parents 31b9088 + d79d5e8 commit 6685213

File tree

2 files changed

+78
-57
lines changed

2 files changed

+78
-57
lines changed

opal/mca/common/ucx/common_ucx.c

Lines changed: 77 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED.
4+
* Copyright (c) 2019 Intel, Inc. All rights reserved.
5+
* Copyright (c) 2019 Research Organization for Information Science
6+
* and Technology (RIST). All rights reserved.
7+
* Copyright (c) 2021 Triad National Security, LLC. All rights
8+
* reserved.
9+
* Copyright (c) 2022 Google, LLC. All rights reserved.
10+
* Copyright (c) 2022 IBM Corporation. All rights reserved.
311
* $COPYRIGHT$
412
*
513
* Additional copyrights may follow
@@ -25,14 +33,16 @@
2533

2634
extern mca_base_framework_t opal_memory_base_framework;
2735

28-
opal_common_ucx_module_t opal_common_ucx = {
29-
.verbose = 0,
36+
opal_common_ucx_module_t opal_common_ucx =
37+
{
3038
.progress_iterations = 100,
31-
.registered = 0,
32-
.opal_mem_hooks = 1,
33-
.tls = NULL
39+
.opal_mem_hooks = 1,
40+
.tls = NULL,
41+
.devices = NULL,
3442
};
3543

44+
static opal_mutex_t opal_common_ucx_mutex = OPAL_MUTEX_STATIC_INIT;
45+
3646
static void opal_common_ucx_mem_release_cb(void *buf, size_t length,
3747
void *cbdata, bool from_alloc)
3848
{
@@ -41,60 +51,70 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length,
4151

4252
OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component)
4353
{
44-
static const char *default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc";
45-
static const char *default_devices = "mlx*";
46-
static int registered = 0;
47-
static int hook_index;
48-
static int verbose_index;
49-
static int progress_index;
50-
static int tls_index;
51-
static int devices_index;
52-
53-
if (!registered) {
54-
verbose_index = mca_base_var_register("opal", "opal_common", "ucx", "verbose",
55-
"Verbose level of the UCX components",
56-
MCA_BASE_VAR_TYPE_INT, NULL, 0,
57-
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
58-
MCA_BASE_VAR_SCOPE_LOCAL,
59-
&opal_common_ucx.verbose);
60-
progress_index = mca_base_var_register("opal", "opal_common", "ucx", "progress_iterations",
61-
"Set number of calls of internal UCX progress "
62-
"calls per opal_progress call",
63-
MCA_BASE_VAR_TYPE_INT, NULL, 0,
64-
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
65-
MCA_BASE_VAR_SCOPE_LOCAL,
66-
&opal_common_ucx.progress_iterations);
67-
hook_index = mca_base_var_register("opal", "opal_common", "ucx", "opal_mem_hooks",
68-
"Use OPAL memory hooks, instead of UCX internal "
69-
"memory hooks", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
70-
OPAL_INFO_LVL_3,
54+
char *default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc";
55+
char *default_devices = "mlx*";
56+
int hook_index;
57+
int verbose_index;
58+
int progress_index;
59+
int tls_index;
60+
int devices_index;
61+
62+
OPAL_THREAD_LOCK(&opal_common_ucx_mutex);
63+
64+
/* It is harmless to re-register variables so go ahead an re-register. */
65+
verbose_index = mca_base_var_register("opal", "opal_common", "ucx", "verbose",
66+
"Verbose level of the UCX components",
67+
MCA_BASE_VAR_TYPE_INT, NULL, 0,
68+
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
69+
MCA_BASE_VAR_SCOPE_LOCAL, &opal_common_ucx.verbose);
70+
progress_index = mca_base_var_register("opal", "opal_common", "ucx", "progress_iterations",
71+
"Set number of calls of internal UCX progress "
72+
"calls per opal_progress call",
73+
MCA_BASE_VAR_TYPE_INT, NULL, 0,
74+
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
7175
MCA_BASE_VAR_SCOPE_LOCAL,
72-
&opal_common_ucx.opal_mem_hooks);
73-
74-
opal_common_ucx.tls = malloc(sizeof(*opal_common_ucx.tls));
76+
&opal_common_ucx.progress_iterations);
77+
hook_index = mca_base_var_register("opal", "opal_common", "ucx", "opal_mem_hooks",
78+
"Use OPAL memory hooks, instead of UCX internal "
79+
"memory hooks",
80+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_3,
81+
MCA_BASE_VAR_SCOPE_LOCAL,
82+
&opal_common_ucx.opal_mem_hooks);
83+
84+
if (NULL == opal_common_ucx.tls) {
85+
// Extra level of string indirection needed to make ompi_info
86+
// happy since it will unload this library before the MCA base
87+
// cleans up the MCA vars. This will cause the string to go
88+
// out of scope unless we place the pointer to it on the heap.
89+
opal_common_ucx.tls = (char **) malloc(sizeof(char *));
7590
*opal_common_ucx.tls = strdup(default_tls);
76-
tls_index = mca_base_var_register("opal", "opal_common", "ucx", "tls",
77-
"List of UCX transports which should be supported on the system, to enable "
78-
"selecting the UCX component. Special values: any (any available). "
79-
"A '^' prefix negates the list. "
80-
"For example, in order to exclude on shared memory and TCP transports, "
81-
"please set to '^posix,sysv,self,tcp,cma,knem,xpmem'.",
82-
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
83-
OPAL_INFO_LVL_3,
84-
MCA_BASE_VAR_SCOPE_LOCAL,
85-
opal_common_ucx.tls);
86-
87-
opal_common_ucx.devices = malloc(sizeof(*opal_common_ucx.devices));
91+
}
92+
93+
tls_index = mca_base_var_register(
94+
"opal", "opal_common", "ucx", "tls",
95+
"List of UCX transports which should be supported on the system, to enable "
96+
"selecting the UCX component. Special values: any (any available). "
97+
"A '^' prefix negates the list. "
98+
"For example, in order to exclude on shared memory and TCP transports, "
99+
"please set to '^posix,sysv,self,tcp,cma,knem,xpmem'.",
100+
MCA_BASE_VAR_TYPE_STRING, NULL, 0,
101+
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
102+
MCA_BASE_VAR_SCOPE_LOCAL,
103+
opal_common_ucx.tls);
104+
105+
if (NULL == opal_common_ucx.devices) {
106+
opal_common_ucx.devices = (char **) malloc(sizeof(char *));
88107
*opal_common_ucx.devices = strdup(default_devices);
89-
devices_index = mca_base_var_register("opal", "opal_common", "ucx", "devices",
90-
"List of device driver pattern names, which, if supported by UCX, will "
91-
"bump its priority above ob1. Special values: any (any available)",
92-
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
93-
OPAL_INFO_LVL_3,
94-
MCA_BASE_VAR_SCOPE_LOCAL,
95-
opal_common_ucx.devices);
96-
registered = 1;
97108
}
109+
devices_index = mca_base_var_register(
110+
"opal", "opal_common", "ucx", "devices",
111+
"List of device driver pattern names, which, if supported by UCX, will "
112+
"bump its priority above ob1. Special values: any (any available)",
113+
MCA_BASE_VAR_TYPE_STRING, NULL, 0,
114+
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
115+
MCA_BASE_VAR_SCOPE_LOCAL,
116+
opal_common_ucx.devices);
117+
98118
if (component) {
99119
mca_base_var_register_synonym(verbose_index, component->mca_project_name,
100120
component->mca_type_name,
@@ -230,7 +250,7 @@ opal_common_ucx_support_level(ucp_context_h context)
230250
int ret;
231251
#endif
232252

233-
is_any_tl = !strcmp(*opal_common_ucx.tls, "any");
253+
is_any_tl = !strcmp(*opal_common_ucx.tls, "any");
234254
is_any_device = !strcmp(*opal_common_ucx.devices, "any");
235255

236256
/* Check for special value "any" */
@@ -242,7 +262,7 @@ opal_common_ucx_support_level(ucp_context_h context)
242262

243263
#if HAVE_DECL_OPEN_MEMSTREAM
244264
/* Split transports list */
245-
negate = ('^' == (*opal_common_ucx.tls)[0]);
265+
negate = ('^' == (*opal_common_ucx.tls)[0]);
246266
tl_list = opal_argv_split(*opal_common_ucx.tls + (negate ? 1 : 0), ',');
247267
if (tl_list == NULL) {
248268
MCA_COMMON_UCX_VERBOSE(1, "failed to split tl list '%s', ucx is disabled",

opal/mca/common/ucx/common_ucx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/*
22
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
33
* All rights reserved.
4+
* Copyright (c) 2022 IBM Corporation. All rights reserved.
45
* $COPYRIGHT$
56
*
67
* Additional copyrights may follow

0 commit comments

Comments
 (0)