Skip to content

Fix singleton operations when running under a SLURM allocation. #1037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 19, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,11 @@ int PMIx_Init(pmix_proc_t *proc)
return PMIX_SUCCESS;
}

/* if we don't see the required info, then we cannot init */
if (NULL == getenv("PMIX_NAMESPACE")) {
return PMIX_ERR_INVALID_NAMESPACE;
}

/* setup the globals */
pmix_globals_init();
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);
Expand Down
2 changes: 1 addition & 1 deletion opal/mca/pmix/s1/pmix_s1_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static int pmix_s1_component_register(void)
static int pmix_s1_component_query(mca_base_module_t **module, int *priority)
{
/* disqualify ourselves if we are not under slurm */
if (NULL == getenv("SLURM_JOBID")) {
if (NULL == getenv("SLURM_STEP_NUM_TASKS")) {
*priority = 0;
*module = NULL;
return OPAL_ERROR;
Expand Down
2 changes: 1 addition & 1 deletion opal/mca/pmix/s2/pmix_s2_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static int pmix_s2_component_query(mca_base_module_t **module, int *priority)
{
/* disqualify ourselves if we are not under slurm, and
* if they didn't set mpi=pmix2 */
if (NULL == getenv("SLURM_JOBID") ||
if (NULL == getenv("SLURM_STEP_NUM_TASKS") ||
NULL == getenv("PMI_FD")) {
*priority = 0;
*module = NULL;
Expand Down
43 changes: 17 additions & 26 deletions orte/mca/ess/pmi/ess_pmi_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,33 +71,24 @@ static int pmi_component_query(mca_base_module_t **module, int *priority)

/* all APPS must use pmix */
if (ORTE_PROC_IS_APP) {
/* open and setup pmix */
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
*priority = -1;
*module = NULL;
return ret;
}
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
/* don't error log this as it might not be an error at all */
*priority = -1;
*module = NULL;
(void) mca_base_framework_close(&opal_pmix_base_framework);
return ret;
}
/* initialize the selected module */
if (OPAL_SUCCESS != (ret = opal_pmix.init())) {
/* cannot run */
*priority = -1;
*module = NULL;
(void) mca_base_framework_close(&opal_pmix_base_framework);
return ret;
if (NULL == opal_pmix.initialized) {
/* open and setup pmix */
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
*priority = -1;
*module = NULL;
return ret;
}
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
/* don't error log this as it might not be an error at all */
*priority = -1;
*module = NULL;
(void) mca_base_framework_close(&opal_pmix_base_framework);
return ret;
}
}
if (!opal_pmix.initialized()) {
/* we may have everything setup, but we are not
* in a PMIx environment and so we need to disqualify
* ourselves - we are likely a singleton and will
* pick things up from there */
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
/* we cannot be in a PMI environment */
*priority = -1;
*module = NULL;
return ORTE_ERROR;
Expand Down
7 changes: 6 additions & 1 deletion orte/mca/ess/pmi/ess_pmi_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,12 @@ static int rte_init(void)
goto error;
}

/* we don't have to call pmix.init because the pmix select did it */
/* initialize the selected module */
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
/* we cannot run */
error = "pmix init";
goto error;
}
u32ptr = &u32;
u16ptr = &u16;

Expand Down
22 changes: 12 additions & 10 deletions orte/mca/ess/singleton/ess_singleton_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,18 +165,20 @@ static int rte_init(void)
}

/* open and setup pmix */
if (OPAL_SUCCESS != (rc = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (OPAL_SUCCESS != (rc = opal_pmix_base_select())) {
ORTE_ERROR_LOG(rc);
return rc;
if (NULL == opal_pmix.initialized) {
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
error = "opening pmix";
goto error;
}
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
error = "select pmix";
goto error;
}
}
/* initialize the selected module */
if (OPAL_SUCCESS != (rc = opal_pmix.init())) {
ORTE_ERROR_LOG(rc);
return rc;
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
error = "init pmix";
goto error;
}

/* pmix.init set our process name down in the OPAL layer,
Expand Down