Skip to content

Commit b643852

Browse files
author
Ralph Castain
committed
Properly terminate the job when executable not found
Signed-off-by: Ralph Castain <[email protected]>
1 parent d83d2be commit b643852

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

orte/mca/odls/base/odls_base_default_fns.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,6 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
940940
* to this place as our default directory
941941
*/
942942
getcwd(basedir, sizeof(basedir));
943-
944943
/* find the jobdat for this job */
945944
if (NULL == (jobdat = orte_get_job_data_object(job))) {
946945
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
@@ -1144,6 +1143,17 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
11441143
opal_argv_free(argvptr);
11451144
}
11461145
if (ORTE_SUCCESS != rc) {
1146+
/* cycle through children to find those for this jobid */
1147+
for (idx=0; idx < orte_local_children->size; idx++) {
1148+
if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) {
1149+
continue;
1150+
}
1151+
if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID) &&
1152+
j == (int)child->app_idx) {
1153+
child->exit_code = rc;
1154+
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
1155+
}
1156+
}
11471157
goto GETOUT;
11481158
}
11491159

orte/tools/prun/prun.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,18 +194,19 @@ static void evhandler(int status,
194194
}
195195
}
196196

197-
/* we _always_ have to execute the evhandler callback or
198-
* else the event progress engine will hang */
199-
if (NULL != cbfunc) {
200-
cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata);
201-
}
202197
/* only terminate if this was our job - keep in mind that we
203198
* can get notifications of job termination prior to our spawn
204199
* having completed! */
205200
if (!fired && (myjobid != ORTE_JOBID_INVALID && jobid == myjobid)) {
206201
fired = true;
207202
active = false;
208203
}
204+
205+
/* we _always_ have to execute the evhandler callback or
206+
* else the event progress engine will hang */
207+
if (NULL != cbfunc) {
208+
cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata);
209+
}
209210
}
210211

211212

0 commit comments

Comments
 (0)