1
+ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
1
2
/*
2
3
* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED.
4
+ * Copyright (c) 2019 Intel, Inc. All rights reserved.
5
+ * Copyright (c) 2019 Research Organization for Information Science
6
+ * and Technology (RIST). All rights reserved.
7
+ * Copyright (c) 2021 Triad National Security, LLC. All rights
8
+ * reserved.
9
+ * Copyright (c) 2022 Google, LLC. All rights reserved.
10
+ * Copyright (c) 2022 IBM Corporation. All rights reserved.
3
11
* $COPYRIGHT$
4
12
*
5
13
* Additional copyrights may follow
25
33
26
34
extern mca_base_framework_t opal_memory_base_framework ;
27
35
28
- opal_common_ucx_module_t opal_common_ucx = {
29
- . verbose = 0 ,
36
+ opal_common_ucx_module_t opal_common_ucx =
37
+ {
30
38
.progress_iterations = 100 ,
31
- .registered = 0 ,
32
- .opal_mem_hooks = 1 ,
33
- .tls = NULL
39
+ .opal_mem_hooks = 1 ,
40
+ .tls = NULL ,
41
+ .devices = NULL ,
34
42
};
35
43
44
+ static opal_mutex_t opal_common_ucx_mutex = OPAL_MUTEX_STATIC_INIT ;
45
+
36
46
static void opal_common_ucx_mem_release_cb (void * buf , size_t length ,
37
47
void * cbdata , bool from_alloc )
38
48
{
@@ -41,60 +51,70 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length,
41
51
42
52
OPAL_DECLSPEC void opal_common_ucx_mca_var_register (const mca_base_component_t * component )
43
53
{
44
- static const char * default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc" ;
45
- static const char * default_devices = "mlx*" ;
46
- static int registered = 0 ;
47
- static int hook_index ;
48
- static int verbose_index ;
49
- static int progress_index ;
50
- static int tls_index ;
51
- static int devices_index ;
52
-
53
- if (!registered ) {
54
- verbose_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "verbose" ,
55
- "Verbose level of the UCX components" ,
56
- MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
57
- MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
58
- MCA_BASE_VAR_SCOPE_LOCAL ,
59
- & opal_common_ucx .verbose );
60
- progress_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "progress_iterations" ,
61
- "Set number of calls of internal UCX progress "
62
- "calls per opal_progress call" ,
63
- MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
64
- MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
65
- MCA_BASE_VAR_SCOPE_LOCAL ,
66
- & opal_common_ucx .progress_iterations );
67
- hook_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "opal_mem_hooks" ,
68
- "Use OPAL memory hooks, instead of UCX internal "
69
- "memory hooks" , MCA_BASE_VAR_TYPE_BOOL , NULL , 0 , 0 ,
70
- OPAL_INFO_LVL_3 ,
54
+ char * default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc" ;
55
+ char * default_devices = "mlx*" ;
56
+ int hook_index ;
57
+ int verbose_index ;
58
+ int progress_index ;
59
+ int tls_index ;
60
+ int devices_index ;
61
+
62
+ OPAL_THREAD_LOCK (& opal_common_ucx_mutex );
63
+
64
+ /* It is harmless to re-register variables so go ahead an re-register. */
65
+ verbose_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "verbose" ,
66
+ "Verbose level of the UCX components" ,
67
+ MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
68
+ MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
69
+ MCA_BASE_VAR_SCOPE_LOCAL , & opal_common_ucx .verbose );
70
+ progress_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "progress_iterations" ,
71
+ "Set number of calls of internal UCX progress "
72
+ "calls per opal_progress call" ,
73
+ MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
74
+ MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
71
75
MCA_BASE_VAR_SCOPE_LOCAL ,
72
- & opal_common_ucx .opal_mem_hooks );
73
-
74
- opal_common_ucx .tls = malloc (sizeof (* opal_common_ucx .tls ));
76
+ & opal_common_ucx .progress_iterations );
77
+ hook_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "opal_mem_hooks" ,
78
+ "Use OPAL memory hooks, instead of UCX internal "
79
+ "memory hooks" ,
80
+ MCA_BASE_VAR_TYPE_BOOL , NULL , 0 , 0 , OPAL_INFO_LVL_3 ,
81
+ MCA_BASE_VAR_SCOPE_LOCAL ,
82
+ & opal_common_ucx .opal_mem_hooks );
83
+
84
+ if (NULL == opal_common_ucx .tls ) {
85
+ // Extra level of string indirection needed to make ompi_info
86
+ // happy since it will unload this library before the MCA base
87
+ // cleans up the MCA vars. This will cause the string to go
88
+ // out of scope unless we place the pointer to it on the heap.
89
+ opal_common_ucx .tls = (char * * ) malloc (sizeof (char * ));
75
90
* opal_common_ucx .tls = strdup (default_tls );
76
- tls_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "tls" ,
77
- "List of UCX transports which should be supported on the system, to enable "
78
- "selecting the UCX component. Special values: any (any available). "
79
- "A '^' prefix negates the list. "
80
- "For example, in order to exclude on shared memory and TCP transports, "
81
- "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'." ,
82
- MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
83
- OPAL_INFO_LVL_3 ,
84
- MCA_BASE_VAR_SCOPE_LOCAL ,
85
- opal_common_ucx .tls );
86
-
87
- opal_common_ucx .devices = malloc (sizeof (* opal_common_ucx .devices ));
91
+ }
92
+
93
+ tls_index = mca_base_var_register (
94
+ "opal" , "opal_common" , "ucx" , "tls" ,
95
+ "List of UCX transports which should be supported on the system, to enable "
96
+ "selecting the UCX component. Special values: any (any available). "
97
+ "A '^' prefix negates the list. "
98
+ "For example, in order to exclude on shared memory and TCP transports, "
99
+ "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'." ,
100
+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 ,
101
+ MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
102
+ MCA_BASE_VAR_SCOPE_LOCAL ,
103
+ opal_common_ucx .tls );
104
+
105
+ if (NULL == opal_common_ucx .devices ) {
106
+ opal_common_ucx .devices = (char * * ) malloc (sizeof (char * ));
88
107
* opal_common_ucx .devices = strdup (default_devices );
89
- devices_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "devices" ,
90
- "List of device driver pattern names, which, if supported by UCX, will "
91
- "bump its priority above ob1. Special values: any (any available)" ,
92
- MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
93
- OPAL_INFO_LVL_3 ,
94
- MCA_BASE_VAR_SCOPE_LOCAL ,
95
- opal_common_ucx .devices );
96
- registered = 1 ;
97
108
}
109
+ devices_index = mca_base_var_register (
110
+ "opal" , "opal_common" , "ucx" , "devices" ,
111
+ "List of device driver pattern names, which, if supported by UCX, will "
112
+ "bump its priority above ob1. Special values: any (any available)" ,
113
+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 ,
114
+ MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
115
+ MCA_BASE_VAR_SCOPE_LOCAL ,
116
+ opal_common_ucx .devices );
117
+
98
118
if (component ) {
99
119
mca_base_var_register_synonym (verbose_index , component -> mca_project_name ,
100
120
component -> mca_type_name ,
@@ -230,7 +250,7 @@ opal_common_ucx_support_level(ucp_context_h context)
230
250
int ret ;
231
251
#endif
232
252
233
- is_any_tl = !strcmp (* opal_common_ucx .tls , "any" );
253
+ is_any_tl = !strcmp (* opal_common_ucx .tls , "any" );
234
254
is_any_device = !strcmp (* opal_common_ucx .devices , "any" );
235
255
236
256
/* Check for special value "any" */
@@ -242,7 +262,7 @@ opal_common_ucx_support_level(ucp_context_h context)
242
262
243
263
#if HAVE_DECL_OPEN_MEMSTREAM
244
264
/* Split transports list */
245
- negate = ('^' == (* opal_common_ucx .tls )[0 ]);
265
+ negate = ('^' == (* opal_common_ucx .tls )[0 ]);
246
266
tl_list = opal_argv_split (* opal_common_ucx .tls + (negate ? 1 : 0 ), ',' );
247
267
if (tl_list == NULL ) {
248
268
MCA_COMMON_UCX_VERBOSE (1 , "failed to split tl list '%s', ucx is disabled" ,
0 commit comments