Skip to content

Commit a94101f

Browse files
committed
mostly another hack around for PML selection, allows CM be select itself if an
MTL is available, if not OB1 is used. Still prevents DR and OB1 from stomping on each other though. This commit was SVN r13481.
1 parent 4e506e6 commit a94101f

File tree

8 files changed

+89
-47
lines changed

8 files changed

+89
-47
lines changed

ompi/mca/btl/mx/btl_mx_component.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,8 +377,10 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
377377

378378
/* First check if MX is available ... */
379379
if( MX_SUCCESS != (status = mx_init()) ) {
380-
opal_output( 0, "mca_btl_mx_component_init: mx_init() failed with status = %d (%s)\n",
381-
status, mx_strerror(status) );
380+
if(MX_ALREADY_INITIALIZED != status) {
381+
opal_output( 0, "mca_btl_mx_component_init: mx_init() failed with status = %d (%s)\n",
382+
status, mx_strerror(status) );
383+
}
382384
mca_pml_base_modex_send(&mca_btl_mx_component.super.btl_version,
383385
NULL, 0);
384386
return NULL;

ompi/mca/mtl/mx/mtl_mx_component.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,14 @@ ompi_mtl_mx_component_init(bool enable_progress_threads,
126126
/* initialize the mx library */
127127
mx_return = mx_init();
128128

129-
if(mx_return!=MX_SUCCESS) {
130-
opal_output(ompi_mtl_base_output,
131-
"Error in mx_init (error %s)\n",
132-
mx_strerror(mx_return));
133-
return NULL;
129+
if(MX_SUCCESS != mx_return){
130+
if(MX_ALREADY_INITIALIZED != mx_return) {
131+
opal_output(ompi_mtl_base_output,
132+
"Error in mx_init (error %s)\n",
133+
mx_strerror(mx_return));
134+
} else {
135+
return NULL;
136+
}
134137
}
135138

136139
ret = ompi_mtl_mx_module_init();

ompi/mca/pml/base/base.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
#include "opal/mca/mca.h"
2525
#include "ompi/mca/pml/pml.h"
26-
26+
#include "ompi/class/ompi_pointer_array.h"
2727

2828
/*
2929
* Global functions for the PML
@@ -46,7 +46,7 @@ OMPI_DECLSPEC extern int mca_pml_base_output;
4646
OMPI_DECLSPEC extern opal_list_t mca_pml_base_components_available;
4747
OMPI_DECLSPEC extern mca_pml_base_component_t mca_pml_base_selected_component;
4848
OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml;
49-
OMPI_DECLSPEC extern char* mca_pml_base_pml;
49+
OMPI_DECLSPEC extern ompi_pointer_array_t mca_pml_base_pml;
5050

5151
#if defined(c_plusplus) || defined(__cplusplus)
5252
}

ompi/mca/pml/base/pml_base_open.c

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,15 @@ mca_pml_base_module_t mca_pml = {
6969

7070
opal_list_t mca_pml_base_components_available;
7171
mca_pml_base_component_t mca_pml_base_selected_component;
72-
char *mca_pml_base_pml;
72+
ompi_pointer_array_t mca_pml_base_pml;
7373

7474
/**
7575
* Function for finding and opening either all MCA components, or the one
7676
* that was specifically requested via a MCA parameter.
7777
*/
7878
int mca_pml_base_open(void)
7979
{
80-
char* default_pml;
80+
char* default_pml = NULL;
8181

8282
/* Open up all available components */
8383

@@ -95,26 +95,32 @@ int mca_pml_base_open(void)
9595

9696
/**
9797
* Right now our selection of BTLs is completely broken. If we have
98-
* multiple PMLs we will open all BTLs several times, leading to
98+
* multiple PMLs that use BTLs than we will open all BTLs several times, leading to
9999
* undefined behaviors. The simplest solution, at least until we
100-
* figure out the correct way to do it, is to force a default value
101-
* in the mca_pml_base_pml global.
100+
* figure out the correct way to do it, is to force a default PML that
101+
* uses BTLs and any other PMLs that do not in the mca_pml_base_pml array.
102102
*/
103+
104+
OBJ_CONSTRUCT(&mca_pml_base_pml, ompi_pointer_array_t);
105+
103106
#if MCA_pml_DIRECT_CALL
104-
default_pml = stringify(MCA_pml_DIRECT_CALL_COMPONENT);
107+
ompi_pointer_array_add(&mca_pml_base_pml,
108+
stringify(MCA_pml_DIRECT_CALL_COMPONENT));
105109
#else
106-
default_pml = "ob1";
110+
111+
112+
mca_base_param_reg_string_name("pml", NULL,
113+
"Specify a specific PML to use",
114+
false, false, "", &default_pml);
115+
116+
if(0 == strlen(default_pml)){
117+
ompi_pointer_array_add(&mca_pml_base_pml, strdup("ob1"));
118+
ompi_pointer_array_add(&mca_pml_base_pml, strdup("cm"));
119+
} else {
120+
ompi_pointer_array_add(&mca_pml_base_pml, strdup(default_pml));
121+
}
107122
#endif
108123

109-
mca_base_param_lookup_string(
110-
mca_base_param_register_string("pml",
111-
NULL,
112-
NULL,
113-
NULL,
114-
default_pml),
115-
&mca_pml_base_pml);
116-
if( NULL == mca_pml_base_pml )
117-
mca_pml_base_pml = default_pml;
118-
119124
return OMPI_SUCCESS;
125+
120126
}

ompi/mca/pml/base/pml_base_select.c

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,15 @@ typedef struct opened_component_t {
4747
int mca_pml_base_select(bool enable_progress_threads,
4848
bool enable_mpi_threads)
4949
{
50-
int priority = 0, best_priority = 0;
50+
int i, priority = 0, best_priority = 0;
51+
bool skip_pml = false;
5152
opal_list_item_t *item = NULL;
5253
mca_base_component_list_item_t *cli = NULL;
5354
mca_pml_base_component_t *component = NULL, *best_component = NULL;
5455
mca_pml_base_module_t *module = NULL, *best_module = NULL;
5556
opal_list_t opened;
5657
opened_component_t *om = NULL;
57-
58+
5859
/* Traverse the list of available components; call their init
5960
functions. */
6061

@@ -67,16 +68,22 @@ int mca_pml_base_select(bool enable_progress_threads,
6768
item = opal_list_get_next(item) ) {
6869
cli = (mca_base_component_list_item_t *) item;
6970
component = (mca_pml_base_component_t *) cli->cli_component;
70-
71+
skip_pml = false;
7172
/* if there is an include list - item must be in the list to be included */
72-
if( (NULL != mca_pml_base_pml) &&
73-
(strcmp(component->pmlm_version.mca_component_name, mca_pml_base_pml) != 0) ) {
74-
opal_output_verbose( 10, mca_pml_base_output,
75-
"select: component %s not in the include list",
76-
component->pmlm_version.mca_component_name );
73+
for( i = 0; i < ompi_pointer_array_get_size(&mca_pml_base_pml); i++) {
74+
if((strcmp(component->pmlm_version.mca_component_name,
75+
(char *) ompi_pointer_array_get_item(&mca_pml_base_pml, i)) != 0)) {
76+
opal_output_verbose( 10, mca_pml_base_output,
77+
"select: component %s not in the include list",
78+
component->pmlm_version.mca_component_name );
79+
skip_pml = true;
80+
} else {
81+
skip_pml = false;
82+
}
83+
}
84+
if(skip_pml) {
7785
continue;
7886
}
79-
8087
if (NULL == component->pmlm_init) {
8188
opal_output_verbose( 10, mca_pml_base_output,
8289
"select: no init function; ignoring component %s",
@@ -87,6 +94,7 @@ int mca_pml_base_select(bool enable_progress_threads,
8794
"select: initializing %s component %s",
8895
component->pmlm_version.mca_type_name,
8996
component->pmlm_version.mca_component_name );
97+
priority = best_priority;
9098
module = component->pmlm_init(&priority, enable_progress_threads,
9199
enable_mpi_threads);
92100
if (NULL == module) {
@@ -113,16 +121,21 @@ int mca_pml_base_select(bool enable_progress_threads,
113121
}
114122

115123
/* Finished querying all components. Check for the bozo case. */
116-
124+
117125
if( NULL == best_component ) {
118126
opal_show_help("help-mca-base.txt", "find-available:none-found", true, "pml");
119-
if( NULL != mca_pml_base_pml ) {
120-
orte_errmgr.error_detected(1, "PML %s cannot be selected", mca_pml_base_pml, NULL);
121-
} else {
127+
for( i = 0; i < ompi_pointer_array_get_size(&mca_pml_base_pml); i++) {
128+
orte_errmgr.error_detected(1, "PML %s cannot be selected", (char*) ompi_pointer_array_get_item(&mca_pml_base_pml, i), NULL);
129+
}
130+
if(0 == i) {
122131
orte_errmgr.error_detected(2, "No pml component available. This shouldn't happen.", NULL);
123132
}
124133
}
125-
134+
135+
opal_output_verbose( 10, mca_pml_base_output,
136+
"selected %s best priority %d\n",
137+
best_component->pmlm_version.mca_component_name, best_priority);
138+
126139
/* Finalize all non-selected components */
127140

128141
for (item = opal_list_remove_first(&opened);

ompi/mca/pml/cm/pml_cm_component.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ mca_pml_base_component_1_0_0_t mca_pml_cm_component = {
6161
static int free_list_num = 0;
6262
static int free_list_max = 0;
6363
static int free_list_inc = 0;
64-
static int default_priority = 0;
64+
static int default_priority = 2;
6565

6666
static int
6767
mca_pml_cm_component_open(void)
@@ -100,7 +100,7 @@ mca_pml_cm_component_open(void)
100100
"CM PML selection priority",
101101
false,
102102
false,
103-
1,
103+
2,
104104
&default_priority);
105105

106106
return OMPI_SUCCESS;
@@ -120,13 +120,20 @@ mca_pml_cm_component_init(int* priority,
120120
bool enable_mpi_threads)
121121
{
122122
int ret;
123-
123+
if((*priority) > default_priority) {
124+
*priority = default_priority;
125+
return NULL;
126+
}
124127
*priority = default_priority;
125-
128+
opal_output_verbose( 10, 0,
129+
"in cm pml priority is %d\n", *priority);
126130
/* find a useable MTL */
127131
ret = ompi_mtl_base_select(enable_progress_threads, enable_mpi_threads);
128-
if (OMPI_SUCCESS != ret) return NULL;
129-
132+
if (OMPI_SUCCESS != ret) {
133+
*priority = -1;
134+
return NULL;
135+
}
136+
130137
/* update our tag / context id max values based on MTL
131138
information */
132139
ompi_pml_cm.super.pml_max_contextid = ompi_mtl->mtl_max_contextid;

ompi/mca/pml/dr/pml_dr_component.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,12 @@ mca_pml_base_module_t* mca_pml_dr_component_init(int* priority,
192192
bool enable_progress_threads,
193193
bool enable_mpi_threads)
194194
{
195+
if((*priority) > mca_pml_dr.priority) {
196+
*priority = mca_pml_dr.priority;
197+
return NULL;
198+
}
195199
*priority = mca_pml_dr.priority;
196-
200+
197201
/* buffered send */
198202
if(OMPI_SUCCESS != mca_pml_base_bsend_init(enable_mpi_threads)) {
199203
opal_output(0, "mca_pml_dr_component_init: mca_pml_bsend_init failed\n");

ompi/mca/pml/ob1/pml_ob1_component.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,13 @@ mca_pml_base_module_t* mca_pml_ob1_component_init(int* priority,
219219
bool enable_progress_threads,
220220
bool enable_mpi_threads)
221221
{
222+
opal_output_verbose( 10, 0,
223+
"in ob1, my priority is %d\n", mca_pml_ob1.priority);
224+
225+
if((*priority) > mca_pml_ob1.priority) {
226+
*priority = mca_pml_ob1.priority;
227+
return NULL;
228+
}
222229
*priority = mca_pml_ob1.priority;
223230

224231
/* buffered send */

0 commit comments

Comments
 (0)