1
+ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
1
2
/*
2
3
* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED.
3
4
* Copyright (c) 2019 Intel, Inc. All rights reserved.
4
5
* Copyright (c) 2019 Research Organization for Information Science
5
6
* and Technology (RIST). All rights reserved.
6
7
* Copyright (c) 2021 Triad National Security, LLC. All rights
7
8
* reserved.
9
+ * Copyright (c) 2022 Google, LLC. All rights reserved.
8
10
*
9
11
* $COPYRIGHT$
10
12
*
@@ -33,13 +35,11 @@ static int use_safety_valve = 0;
33
35
34
36
extern mca_base_framework_t opal_memory_base_framework ;
35
37
36
- opal_common_ucx_module_t opal_common_ucx = {.verbose = 0 ,
37
- .progress_iterations = 100 ,
38
- .registered = 0 ,
39
- .opal_mem_hooks = 1 ,
40
- .tls = NULL };
41
-
42
- static opal_mutex_t opal_common_ucx_mutex = OPAL_MUTEX_STATIC_INIT ;
38
+ opal_common_ucx_module_t opal_common_ucx =
39
+ {
40
+ .progress_iterations = 100 ,
41
+ .opal_mem_hooks = 1 ,
42
+ };
43
43
44
44
static void opal_common_ucx_mem_release_cb (void * buf , size_t length , void * cbdata , bool from_alloc )
45
45
{
@@ -48,87 +48,57 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length, void *cbdat
48
48
49
49
OPAL_DECLSPEC void opal_common_ucx_mca_var_register (const mca_base_component_t * component )
50
50
{
51
- static const char * default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc" ;
52
- static const char * default_devices = "mlx*" ;
53
- static int hook_index ;
54
- static int verbose_index ;
55
- static int progress_index ;
56
- static int tls_index ;
57
- static int devices_index ;
58
- int param ;
59
-
60
- OPAL_THREAD_LOCK (& opal_common_ucx_mutex );
61
-
62
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "verbose" );
63
- if (0 > param ) {
64
- verbose_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "verbose" ,
65
- "Verbose level of the UCX components" ,
66
- MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
67
- MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
68
- MCA_BASE_VAR_SCOPE_LOCAL , & opal_common_ucx .verbose );
69
- }
70
-
71
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "progress_iterations" );
72
- if (0 > param ) {
73
- progress_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "progress_iterations" ,
74
- "Set number of calls of internal UCX progress "
75
- "calls per opal_progress call" ,
76
- MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
77
- MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
78
- MCA_BASE_VAR_SCOPE_LOCAL ,
79
- & opal_common_ucx .progress_iterations );
80
- }
81
-
82
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "opal_mem_hooks" );
83
- if (0 > param ) {
84
- hook_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "opal_mem_hooks" ,
85
- "Use OPAL memory hooks, instead of UCX internal "
86
- "memory hooks" ,
87
- MCA_BASE_VAR_TYPE_BOOL , NULL , 0 , 0 , OPAL_INFO_LVL_3 ,
51
+ char * default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc" ;
52
+ char * default_devices = "mlx*" ;
53
+ int hook_index ;
54
+ int verbose_index ;
55
+ int progress_index ;
56
+ int tls_index ;
57
+ int devices_index ;
58
+
59
+ /* It is harmless to re-register variables so go ahead an re-register. */
60
+ verbose_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "verbose" ,
61
+ "Verbose level of the UCX components" ,
62
+ MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
63
+ MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
64
+ MCA_BASE_VAR_SCOPE_LOCAL , & opal_common_ucx .verbose );
65
+ progress_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "progress_iterations" ,
66
+ "Set number of calls of internal UCX progress "
67
+ "calls per opal_progress call" ,
68
+ MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
69
+ MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
88
70
MCA_BASE_VAR_SCOPE_LOCAL ,
89
- & opal_common_ucx .opal_mem_hooks );
71
+ & opal_common_ucx .progress_iterations );
72
+ hook_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "opal_mem_hooks" ,
73
+ "Use OPAL memory hooks, instead of UCX internal "
74
+ "memory hooks" ,
75
+ MCA_BASE_VAR_TYPE_BOOL , NULL , 0 , 0 , OPAL_INFO_LVL_3 ,
76
+ MCA_BASE_VAR_SCOPE_LOCAL ,
77
+ & opal_common_ucx .opal_mem_hooks );
78
+
79
+ if (NULL == opal_common_ucx .tls ) {
80
+ opal_common_ucx .tls = default_tls ;
90
81
}
91
82
92
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "tls" );
93
- if (0 > param ) {
94
-
95
- /*
96
- * this monkey business is needed because of the way the MCA VARs framework tries to handle pointers to strings
97
- * when destructing the MCA var database. If you don't do something like this,the MCA var framework will try
98
- * to dereference a pointer which itself is no longer a valid address owing to having been previously dlclosed.
99
- * Same for the devices pointer below.
100
- */
101
- if (NULL == opal_common_ucx .tls ) {
102
- opal_common_ucx .tls = malloc (sizeof (* opal_common_ucx .tls ));
103
- assert (NULL != opal_common_ucx .tls );
104
- }
105
- * opal_common_ucx .tls = strdup (default_tls );
106
- tls_index = mca_base_var_register (
107
- "opal" , "opal_common" , "ucx" , "tls" ,
108
- "List of UCX transports which should be supported on the system, to enable "
109
- "selecting the UCX component. Special values: any (any available). "
110
- "A '^' prefix negates the list. "
111
- "For example, in order to exclude on shared memory and TCP transports, "
112
- "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'." ,
113
- MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 , OPAL_INFO_LVL_3 , MCA_BASE_VAR_SCOPE_LOCAL ,
114
- opal_common_ucx .tls );
115
- }
116
-
117
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "devices" );
118
- if (0 > param ) {
119
-
120
- if (NULL == opal_common_ucx .devices ) {
121
- opal_common_ucx .devices = malloc (sizeof (* opal_common_ucx .devices ));
122
- assert (NULL != opal_common_ucx .devices );
123
- }
124
- * opal_common_ucx .devices = strdup (default_devices );
125
- devices_index = mca_base_var_register (
126
- "opal" , "opal_common" , "ucx" , "devices" ,
127
- "List of device driver pattern names, which, if supported by UCX, will "
128
- "bump its priority above ob1. Special values: any (any available)" ,
129
- MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 , OPAL_INFO_LVL_3 , MCA_BASE_VAR_SCOPE_LOCAL ,
130
- opal_common_ucx .devices );
83
+ tls_index = mca_base_var_register (
84
+ "opal" , "opal_common" , "ucx" , "tls" ,
85
+ "List of UCX transports which should be supported on the system, to enable "
86
+ "selecting the UCX component. Special values: any (any available). "
87
+ "A '^' prefix negates the list. "
88
+ "For example, in order to exclude on shared memory and TCP transports, "
89
+ "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'." ,
90
+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 , OPAL_INFO_LVL_3 , MCA_BASE_VAR_SCOPE_LOCAL ,
91
+ & opal_common_ucx .tls );
92
+
93
+ if (NULL == opal_common_ucx .devices ) {
94
+ opal_common_ucx .devices = default_devices ;
131
95
}
96
+ devices_index = mca_base_var_register (
97
+ "opal" , "opal_common" , "ucx" , "devices" ,
98
+ "List of device driver pattern names, which, if supported by UCX, will "
99
+ "bump its priority above ob1. Special values: any (any available)" ,
100
+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 , OPAL_INFO_LVL_3 , MCA_BASE_VAR_SCOPE_LOCAL ,
101
+ & opal_common_ucx .devices );
132
102
133
103
if (component ) {
134
104
mca_base_var_register_synonym (verbose_index , component -> mca_project_name ,
@@ -147,8 +117,6 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *
147
117
component -> mca_type_name , component -> mca_component_name ,
148
118
"devices" , 0 );
149
119
}
150
-
151
- OPAL_THREAD_UNLOCK (& opal_common_ucx_mutex );
152
120
}
153
121
154
122
OPAL_DECLSPEC void opal_common_ucx_mca_register (void )
@@ -261,8 +229,8 @@ OPAL_DECLSPEC opal_common_ucx_support_level_t opal_common_ucx_support_level(ucp_
261
229
int ret ;
262
230
#endif
263
231
264
- is_any_tl = !strcmp (* opal_common_ucx .tls , "any" );
265
- is_any_device = !strcmp (* opal_common_ucx .devices , "any" );
232
+ is_any_tl = !strcmp (opal_common_ucx .tls , "any" );
233
+ is_any_device = !strcmp (opal_common_ucx .devices , "any" );
266
234
267
235
/* Check for special value "any" */
268
236
if (is_any_tl && is_any_device ) {
@@ -273,19 +241,19 @@ OPAL_DECLSPEC opal_common_ucx_support_level_t opal_common_ucx_support_level(ucp_
273
241
274
242
#if HAVE_DECL_OPEN_MEMSTREAM
275
243
/* Split transports list */
276
- negate = ('^' == (* opal_common_ucx .tls )[0 ]);
277
- tl_list = opal_argv_split (* opal_common_ucx .tls + (negate ? 1 : 0 ), ',' );
244
+ negate = ('^' == (opal_common_ucx .tls )[0 ]);
245
+ tl_list = opal_argv_split (opal_common_ucx .tls + (negate ? 1 : 0 ), ',' );
278
246
if (tl_list == NULL ) {
279
247
MCA_COMMON_UCX_VERBOSE (1 , "failed to split tl list '%s', ucx is disabled" ,
280
- * opal_common_ucx .tls );
248
+ opal_common_ucx .tls );
281
249
goto out ;
282
250
}
283
251
284
252
/* Split devices list */
285
- device_list = opal_argv_split (* opal_common_ucx .devices , ',' );
253
+ device_list = opal_argv_split (opal_common_ucx .devices , ',' );
286
254
if (device_list == NULL ) {
287
255
MCA_COMMON_UCX_VERBOSE (1 , "failed to split devices list '%s', ucx is disabled" ,
288
- * opal_common_ucx .devices );
256
+ opal_common_ucx .devices );
289
257
goto out_free_tl_list ;
290
258
}
291
259
0 commit comments