Skip to content

Commit 92950ee

Browse files
authored
Merge pull request #10229 from awlauria/fix_variable_regis
v5.0.x: common/ucx: fix variable registration
2 parents e852ad0 + 800852a commit 92950ee

File tree

2 files changed

+62
-88
lines changed

2 files changed

+62
-88
lines changed

opal/mca/common/ucx/common_ucx.c

Lines changed: 60 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED.
34
* Copyright (c) 2019 Intel, Inc. All rights reserved.
45
* Copyright (c) 2019 Research Organization for Information Science
56
* and Technology (RIST). All rights reserved.
67
* Copyright (c) 2021 Triad National Security, LLC. All rights
78
* reserved.
9+
* Copyright (c) 2022 Google, LLC. All rights reserved.
810
*
911
* $COPYRIGHT$
1012
*
@@ -31,11 +33,11 @@
3133

3234
extern mca_base_framework_t opal_memory_base_framework;
3335

34-
opal_common_ucx_module_t opal_common_ucx = {.verbose = 0,
35-
.progress_iterations = 100,
36-
.registered = 0,
37-
.opal_mem_hooks = 1,
38-
.tls = NULL};
36+
opal_common_ucx_module_t opal_common_ucx =
37+
{
38+
.progress_iterations = 100,
39+
.opal_mem_hooks = 1,
40+
};
3941

4042
static opal_mutex_t opal_common_ucx_mutex = OPAL_MUTEX_STATIC_INIT;
4143

@@ -46,87 +48,59 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length, void *cbdat
4648

4749
OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component)
4850
{
49-
static const char *default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc";
50-
static const char *default_devices = "mlx*";
51-
static int hook_index;
52-
static int verbose_index;
53-
static int progress_index;
54-
static int tls_index;
55-
static int devices_index;
56-
int param;
51+
char *default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc";
52+
char *default_devices = "mlx*";
53+
int hook_index;
54+
int verbose_index;
55+
int progress_index;
56+
int tls_index;
57+
int devices_index;
5758

5859
OPAL_THREAD_LOCK(&opal_common_ucx_mutex);
5960

60-
param = mca_base_var_find("opal", "opal_common", "ucx", "verbose");
61-
if (0 > param) {
62-
verbose_index = mca_base_var_register("opal", "opal_common", "ucx", "verbose",
63-
"Verbose level of the UCX components",
64-
MCA_BASE_VAR_TYPE_INT, NULL, 0,
65-
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
66-
MCA_BASE_VAR_SCOPE_LOCAL, &opal_common_ucx.verbose);
67-
}
68-
69-
param = mca_base_var_find("opal", "opal_common", "ucx", "progress_iterations");
70-
if (0 > param) {
71-
progress_index = mca_base_var_register("opal", "opal_common", "ucx", "progress_iterations",
72-
"Set number of calls of internal UCX progress "
73-
"calls per opal_progress call",
74-
MCA_BASE_VAR_TYPE_INT, NULL, 0,
75-
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
76-
MCA_BASE_VAR_SCOPE_LOCAL,
77-
&opal_common_ucx.progress_iterations);
78-
}
79-
80-
param = mca_base_var_find("opal", "opal_common", "ucx", "opal_mem_hooks");
81-
if (0 > param) {
82-
hook_index = mca_base_var_register("opal", "opal_common", "ucx", "opal_mem_hooks",
83-
"Use OPAL memory hooks, instead of UCX internal "
84-
"memory hooks",
85-
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_3,
61+
/* It is harmless to re-register variables so go ahead an re-register. */
62+
verbose_index = mca_base_var_register("opal", "opal_common", "ucx", "verbose",
63+
"Verbose level of the UCX components",
64+
MCA_BASE_VAR_TYPE_INT, NULL, 0,
65+
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
66+
MCA_BASE_VAR_SCOPE_LOCAL, &opal_common_ucx.verbose);
67+
progress_index = mca_base_var_register("opal", "opal_common", "ucx", "progress_iterations",
68+
"Set number of calls of internal UCX progress "
69+
"calls per opal_progress call",
70+
MCA_BASE_VAR_TYPE_INT, NULL, 0,
71+
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
8672
MCA_BASE_VAR_SCOPE_LOCAL,
87-
&opal_common_ucx.opal_mem_hooks);
73+
&opal_common_ucx.progress_iterations);
74+
hook_index = mca_base_var_register("opal", "opal_common", "ucx", "opal_mem_hooks",
75+
"Use OPAL memory hooks, instead of UCX internal "
76+
"memory hooks",
77+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_3,
78+
MCA_BASE_VAR_SCOPE_LOCAL,
79+
&opal_common_ucx.opal_mem_hooks);
80+
81+
if (NULL == opal_common_ucx.tls) {
82+
opal_common_ucx.tls = default_tls;
8883
}
8984

90-
param = mca_base_var_find("opal", "opal_common", "ucx", "tls");
91-
if (0 > param) {
92-
93-
/*
94-
* this monkey business is needed because of the way the MCA VARs framework tries to handle pointers to strings
95-
* when destructing the MCA var database. If you don't do something like this,the MCA var framework will try
96-
* to dereference a pointer which itself is no longer a valid address owing to having been previously dlclosed.
97-
* Same for the devices pointer below.
98-
*/
99-
if (NULL == opal_common_ucx.tls) {
100-
opal_common_ucx.tls = malloc(sizeof(*opal_common_ucx.tls));
101-
assert(NULL != opal_common_ucx.tls);
102-
}
103-
*opal_common_ucx.tls = strdup(default_tls);
104-
tls_index = mca_base_var_register(
105-
"opal", "opal_common", "ucx", "tls",
106-
"List of UCX transports which should be supported on the system, to enable "
107-
"selecting the UCX component. Special values: any (any available). "
108-
"A '^' prefix negates the list. "
109-
"For example, in order to exclude on shared memory and TCP transports, "
110-
"please set to '^posix,sysv,self,tcp,cma,knem,xpmem'.",
111-
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
112-
opal_common_ucx.tls);
113-
}
114-
115-
param = mca_base_var_find("opal", "opal_common", "ucx", "devices");
116-
if (0 > param) {
117-
118-
if (NULL == opal_common_ucx.devices) {
119-
opal_common_ucx.devices = malloc(sizeof(*opal_common_ucx.devices));
120-
assert(NULL != opal_common_ucx.devices);
121-
}
122-
*opal_common_ucx.devices = strdup(default_devices);
123-
devices_index = mca_base_var_register(
124-
"opal", "opal_common", "ucx", "devices",
125-
"List of device driver pattern names, which, if supported by UCX, will "
126-
"bump its priority above ob1. Special values: any (any available)",
127-
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
128-
opal_common_ucx.devices);
85+
tls_index = mca_base_var_register(
86+
"opal", "opal_common", "ucx", "tls",
87+
"List of UCX transports which should be supported on the system, to enable "
88+
"selecting the UCX component. Special values: any (any available). "
89+
"A '^' prefix negates the list. "
90+
"For example, in order to exclude on shared memory and TCP transports, "
91+
"please set to '^posix,sysv,self,tcp,cma,knem,xpmem'.",
92+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
93+
&opal_common_ucx.tls);
94+
95+
if (NULL == opal_common_ucx.devices) {
96+
opal_common_ucx.devices = default_devices;
12997
}
98+
devices_index = mca_base_var_register(
99+
"opal", "opal_common", "ucx", "devices",
100+
"List of device driver pattern names, which, if supported by UCX, will "
101+
"bump its priority above ob1. Special values: any (any available)",
102+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
103+
&opal_common_ucx.devices);
130104

131105
if (component) {
132106
mca_base_var_register_synonym(verbose_index, component->mca_project_name,
@@ -258,8 +232,8 @@ OPAL_DECLSPEC opal_common_ucx_support_level_t opal_common_ucx_support_level(ucp_
258232
int ret;
259233
#endif
260234

261-
is_any_tl = !strcmp(*opal_common_ucx.tls, "any");
262-
is_any_device = !strcmp(*opal_common_ucx.devices, "any");
235+
is_any_tl = !strcmp(opal_common_ucx.tls, "any");
236+
is_any_device = !strcmp(opal_common_ucx.devices, "any");
263237

264238
/* Check for special value "any" */
265239
if (is_any_tl && is_any_device) {
@@ -270,19 +244,19 @@ OPAL_DECLSPEC opal_common_ucx_support_level_t opal_common_ucx_support_level(ucp_
270244

271245
#if HAVE_DECL_OPEN_MEMSTREAM
272246
/* Split transports list */
273-
negate = ('^' == (*opal_common_ucx.tls)[0]);
274-
tl_list = opal_argv_split(*opal_common_ucx.tls + (negate ? 1 : 0), ',');
247+
negate = ('^' == (opal_common_ucx.tls)[0]);
248+
tl_list = opal_argv_split(opal_common_ucx.tls + (negate ? 1 : 0), ',');
275249
if (tl_list == NULL) {
276250
MCA_COMMON_UCX_VERBOSE(1, "failed to split tl list '%s', ucx is disabled",
277-
*opal_common_ucx.tls);
251+
opal_common_ucx.tls);
278252
goto out;
279253
}
280254

281255
/* Split devices list */
282-
device_list = opal_argv_split(*opal_common_ucx.devices, ',');
256+
device_list = opal_argv_split(opal_common_ucx.devices, ',');
283257
if (device_list == NULL) {
284258
MCA_COMMON_UCX_VERBOSE(1, "failed to split devices list '%s', ucx is disabled",
285-
*opal_common_ucx.devices);
259+
opal_common_ucx.devices);
286260
goto out_free_tl_list;
287261
}
288262

opal/mca/common/ucx/common_ucx.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ typedef struct opal_common_ucx_module {
9090
int progress_iterations;
9191
int registered;
9292
bool opal_mem_hooks;
93-
char **tls;
94-
char **devices;
93+
char *tls;
94+
char *devices;
9595
} opal_common_ucx_module_t;
9696

9797
typedef struct opal_common_ucx_del_proc {

0 commit comments

Comments
 (0)