Skip to content

Commit 71ec545

Browse files
committed
Merge pull request #1037 from rhc54/topic/singleton
Fix singleton operations when running under a SLURM allocation.
2 parents 0f23037 + 363f62a commit 71ec545

File tree

6 files changed

+42
-39
lines changed

6 files changed

+42
-39
lines changed

opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,11 @@ int PMIx_Init(pmix_proc_t *proc)
241241
return PMIX_SUCCESS;
242242
}
243243

244+
/* if we don't see the required info, then we cannot init */
245+
if (NULL == getenv("PMIX_NAMESPACE")) {
246+
return PMIX_ERR_INVALID_NAMESPACE;
247+
}
248+
244249
/* setup the globals */
245250
pmix_globals_init();
246251
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);

opal/mca/pmix/s1/pmix_s1_component.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ static int pmix_s1_component_register(void)
9090
static int pmix_s1_component_query(mca_base_module_t **module, int *priority)
9191
{
9292
/* disqualify ourselves if we are not under slurm */
93-
if (NULL == getenv("SLURM_JOBID")) {
93+
if (NULL == getenv("SLURM_STEP_NUM_TASKS")) {
9494
*priority = 0;
9595
*module = NULL;
9696
return OPAL_ERROR;

opal/mca/pmix/s2/pmix_s2_component.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ static int pmix_s2_component_query(mca_base_module_t **module, int *priority)
9090
{
9191
/* disqualify ourselves if we are not under slurm, and
9292
* if they didn't set mpi=pmix2 */
93-
if (NULL == getenv("SLURM_JOBID") ||
93+
if (NULL == getenv("SLURM_STEP_NUM_TASKS") ||
9494
NULL == getenv("PMI_FD")) {
9595
*priority = 0;
9696
*module = NULL;

orte/mca/ess/pmi/ess_pmi_component.c

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -71,33 +71,24 @@ static int pmi_component_query(mca_base_module_t **module, int *priority)
7171

7272
/* all APPS must use pmix */
7373
if (ORTE_PROC_IS_APP) {
74-
/* open and setup pmix */
75-
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
76-
ORTE_ERROR_LOG(ret);
77-
*priority = -1;
78-
*module = NULL;
79-
return ret;
80-
}
81-
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
82-
/* don't error log this as it might not be an error at all */
83-
*priority = -1;
84-
*module = NULL;
85-
(void) mca_base_framework_close(&opal_pmix_base_framework);
86-
return ret;
87-
}
88-
/* initialize the selected module */
89-
if (OPAL_SUCCESS != (ret = opal_pmix.init())) {
90-
/* cannot run */
91-
*priority = -1;
92-
*module = NULL;
93-
(void) mca_base_framework_close(&opal_pmix_base_framework);
94-
return ret;
74+
if (NULL == opal_pmix.initialized) {
75+
/* open and setup pmix */
76+
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
77+
ORTE_ERROR_LOG(ret);
78+
*priority = -1;
79+
*module = NULL;
80+
return ret;
81+
}
82+
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
83+
/* don't error log this as it might not be an error at all */
84+
*priority = -1;
85+
*module = NULL;
86+
(void) mca_base_framework_close(&opal_pmix_base_framework);
87+
return ret;
88+
}
9589
}
96-
if (!opal_pmix.initialized()) {
97-
/* we may have everything setup, but we are not
98-
* in a PMIx environment and so we need to disqualify
99-
* ourselves - we are likely a singleton and will
100-
* pick things up from there */
90+
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
91+
/* we cannot be in a PMI environment */
10192
*priority = -1;
10293
*module = NULL;
10394
return ORTE_ERROR;

orte/mca/ess/pmi/ess_pmi_module.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,12 @@ static int rte_init(void)
9797
goto error;
9898
}
9999

100-
/* we don't have to call pmix.init because the pmix select did it */
100+
/* initialize the selected module */
101+
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
102+
/* we cannot run */
103+
error = "pmix init";
104+
goto error;
105+
}
101106
u32ptr = &u32;
102107
u16ptr = &u16;
103108

orte/mca/ess/singleton/ess_singleton_module.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,18 +165,20 @@ static int rte_init(void)
165165
}
166166

167167
/* open and setup pmix */
168-
if (OPAL_SUCCESS != (rc = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
169-
ORTE_ERROR_LOG(rc);
170-
return rc;
171-
}
172-
if (OPAL_SUCCESS != (rc = opal_pmix_base_select())) {
173-
ORTE_ERROR_LOG(rc);
174-
return rc;
168+
if (NULL == opal_pmix.initialized) {
169+
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
170+
error = "opening pmix";
171+
goto error;
172+
}
173+
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
174+
error = "select pmix";
175+
goto error;
176+
}
175177
}
176178
/* initialize the selected module */
177-
if (OPAL_SUCCESS != (rc = opal_pmix.init())) {
178-
ORTE_ERROR_LOG(rc);
179-
return rc;
179+
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
180+
error = "init pmix";
181+
goto error;
180182
}
181183

182184
/* pmix.init set our process name down in the OPAL layer,

0 commit comments

Comments
 (0)