@@ -235,79 +235,56 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
235235 "%s ras:base:allocate nothing found in module - proceeding to hostfile" ,
236236 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME )));
237237
238- /* nothing was found, or no active module was alive. Our next
239- * option is to look for a hostfile and assign our global
240- * pool from there.
241- *
242- * Individual hostfile names, if given, are included
243- * in the app_contexts for this job. We therefore need to
244- * retrieve the app_contexts for the job, and then cycle
245- * through them to see if anything is there. The parser will
246- * add the nodes found in each hostfile to our list - i.e.,
247- * the resulting list contains the UNION of all nodes specified
248- * in hostfiles from across all app_contexts
249- *
250- * We then continue to add any hosts provided by dash-host and
251- * the default hostfile, if we have it. We will then filter out
252- * all the non-desired hosts (i.e., those not specified by
253- * -host and/or -hostfile) when we start the mapping process
254- *
255- * Note that any relative node syntax found in the hostfiles will
256- * generate an error in this scenario, so only non-relative syntax
257- * can be present
258- */
259- if (NULL != orte_default_hostfile ) {
238+ /* nothing was found, or no active module was alive. We first see
239+ * if we were given a rankfile - if so, use it as the hosts will be
240+ * taken from the mapping */
241+ if (NULL != orte_rankfile ) {
260242 OPAL_OUTPUT_VERBOSE ((5 , orte_ras_base_framework .framework_output ,
261- "%s ras:base:allocate parsing default hostfile %s" ,
243+ "%s ras:base:allocate parsing rankfile %s" ,
262244 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
263- orte_default_hostfile ));
245+ orte_rankfile ));
264246
265- /* a default hostfile was provided - parse it */
247+ /* a rankfile was provided - parse it */
266248 if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes (& nodes ,
267- orte_default_hostfile ))) {
249+ orte_rankfile ))) {
268250 OBJ_DESTRUCT (& nodes );
269251 ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
270252 OBJ_RELEASE (caddy );
271253 return ;
272254 }
273255 }
274256
275- if (NULL != orte_rankfile ) {
276- OPAL_OUTPUT_VERBOSE ((5 , orte_ras_base_framework .framework_output ,
277- "%s ras:base:allocate parsing rankfile %s" ,
278- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
279- orte_rankfile ));
280-
281- /* a rankfile was provided - parse it */
282- if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes (& nodes ,
283- orte_rankfile ))) {
284- OBJ_DESTRUCT (& nodes );
257+ /* if something was found in the rankfile, we use that as our global
258+ * pool - set it and we are done
259+ */
260+ if (!opal_list_is_empty (& nodes )) {
261+ /* store the results in the global resource pool - this removes the
262+ * list items
263+ */
264+ if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert (& nodes , jdata ))) {
265+ ORTE_ERROR_LOG (rc );
285266 ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
286267 OBJ_RELEASE (caddy );
287268 return ;
288269 }
270+ /* rankfile is considered equivalent to an RM allocation */
271+ if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping ))) {
272+ ORTE_SET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping , ORTE_MAPPING_NO_OVERSUBSCRIBE );
273+ }
274+ /* cleanup */
275+ OBJ_DESTRUCT (& nodes );
276+ goto DISPLAY ;
289277 }
278+
279+ /* if a dash-host has been provided, aggregate across all the
280+ * app_contexts. Any hosts the user wants to add via comm_spawn
281+ * can be done so using the add_host option */
290282 for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
291283 if (NULL == (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
292284 continue ;
293285 }
294- if (orte_get_attribute (& app -> attributes , ORTE_APP_HOSTFILE , (void * * )& hosts , OPAL_STRING )) {
295- OPAL_OUTPUT_VERBOSE ((5 , orte_ras_base_framework .framework_output ,
296- "%s ras:base:allocate adding hostfile %s" ,
297- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), hosts ));
298-
299- /* hostfile was specified - parse it and add it to the list */
300- if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes (& nodes , hosts ))) {
301- free (hosts );
302- OBJ_DESTRUCT (& nodes );
303- /* set an error event */
304- ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
305- OBJ_RELEASE (caddy );
306- return ;
307- }
308- free (hosts );
309- } else if (!orte_soft_locations &&
310- orte_get_attribute (& app -> attributes , ORTE_APP_DASH_HOST , (void * * )& hosts , OPAL_STRING )) {
286+ if (!orte_soft_locations &&
287+ orte_get_attribute (& app -> attributes , ORTE_APP_DASH_HOST , (void * * )& hosts , OPAL_STRING )) {
311288 /* if we are using soft locations, then any dash-host would
312289 * just include desired nodes and not required. We don't want
313290 * to pick them up here as this would mean the request was
@@ -329,7 +306,7 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
329306 }
330307 }
331308
332- /* if something was found in the hostfile (s), we use that as our global
309+ /* if something was found in the dash-host (s), we use that as our global
333310 * pool - set it and we are done
334311 */
335312 if (!opal_list_is_empty (& nodes )) {
@@ -347,25 +324,79 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
347324 goto DISPLAY ;
348325 }
349326
350- OPAL_OUTPUT_VERBOSE ((5 , orte_ras_base_framework .framework_output ,
351- "%s ras:base:allocate nothing found in hostfiles - checking for rankfile" ,
352- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME )));
327+ /* Our next option is to look for a hostfile and assign our global
328+ * pool from there.
329+ *
330+ * Individual hostfile names, if given, are included
331+ * in the app_contexts for this job. We therefore need to
332+ * retrieve the app_contexts for the job, and then cycle
333+ * through them to see if anything is there. The parser will
334+ * add the nodes found in each hostfile to our list - i.e.,
335+ * the resulting list contains the UNION of all nodes specified
336+ * in hostfiles from across all app_contexts
337+ *
338+ * Note that any relative node syntax found in the hostfiles will
339+ * generate an error in this scenario, so only non-relative syntax
340+ * can be present
341+ */
342+ for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
343+ if (NULL == (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
344+ continue ;
345+ }
346+ if (orte_get_attribute (& app -> attributes , ORTE_APP_HOSTFILE , (void * * )& hosts , OPAL_STRING )) {
347+ OPAL_OUTPUT_VERBOSE ((5 , orte_ras_base_framework .framework_output ,
348+ "%s ras:base:allocate adding hostfile %s" ,
349+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), hosts ));
350+
351+ /* hostfile was specified - parse it and add it to the list */
352+ if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes (& nodes , hosts ))) {
353+ free (hosts );
354+ OBJ_DESTRUCT (& nodes );
355+ /* set an error event */
356+ ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
357+ OBJ_RELEASE (caddy );
358+ return ;
359+ }
360+ free (hosts );
361+ }
362+ }
353363
354- /* Our next option is to look for a rankfile - if one was provided, we
355- * will use its nodes to create a default allocation pool
364+ /* if something was found in the hostfiles(s), we use that as our global
365+ * pool - set it and we are done
356366 */
357- if (NULL != orte_rankfile ) {
358- /* check the rankfile for node information */
359- if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes (& nodes ,
360- orte_rankfile ))) {
367+ if (!opal_list_is_empty (& nodes )) {
368+ /* store the results in the global resource pool - this removes the
369+ * list items
370+ */
371+ if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert (& nodes , jdata ))) {
361372 ORTE_ERROR_LOG (rc );
373+ ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
374+ OBJ_RELEASE (caddy );
375+ return ;
376+ }
377+ /* cleanup */
378+ OBJ_DESTRUCT (& nodes );
379+ goto DISPLAY ;
380+ }
381+
382+ /* if nothing was found so far, then look for a default hostfile */
383+ if (NULL != orte_default_hostfile ) {
384+ OPAL_OUTPUT_VERBOSE ((5 , orte_ras_base_framework .framework_output ,
385+ "%s ras:base:allocate parsing default hostfile %s" ,
386+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
387+ orte_default_hostfile ));
388+
389+ /* a default hostfile was provided - parse it */
390+ if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes (& nodes ,
391+ orte_default_hostfile ))) {
362392 OBJ_DESTRUCT (& nodes );
363393 ORTE_FORCED_TERMINATE (ORTE_ERROR_DEFAULT_EXIT_CODE );
364394 OBJ_RELEASE (caddy );
365- return ;
395+ return ;
366396 }
367397 }
368- /* if something was found in rankfile, we use that as our global
398+
399+ /* if something was found in the default hostfile, we use that as our global
369400 * pool - set it and we are done
370401 */
371402 if (!opal_list_is_empty (& nodes )) {
@@ -378,18 +409,13 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
378409 OBJ_RELEASE (caddy );
379410 return ;
380411 }
381- /* rankfile is considered equivalent to an RM allocation */
382- if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping ))) {
383- ORTE_SET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping , ORTE_MAPPING_NO_OVERSUBSCRIBE );
384- }
385412 /* cleanup */
386413 OBJ_DESTRUCT (& nodes );
387414 goto DISPLAY ;
388415 }
389416
390-
391417 OPAL_OUTPUT_VERBOSE ((5 , orte_ras_base_framework .framework_output ,
392- "%s ras:base:allocate nothing found in rankfile - inserting current node" ,
418+ "%s ras:base:allocate nothing found in hostfiles - inserting current node" ,
393419 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME )));
394420
395421 addlocal :
0 commit comments