Skip to content

Commit 55357ce

Browse files
author
Ralph Castain
authored
Merge pull request #5406 from rhc54/topic/dyn
Control inheritance of launch directives by child jobs
2 parents 8b09010 + 6b6e63a commit 55357ce

File tree

3 files changed

+40
-23
lines changed

3 files changed

+40
-23
lines changed

orte/mca/rmaps/base/base.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -71,6 +71,8 @@ typedef struct {
7171
orte_ranking_policy_t ranking;
7272
/* device specification for min distance mapping */
7373
char *device;
74+
/* whether or not child jobs should inherit launch directives */
75+
bool inherit;
7476
} orte_rmaps_base_t;
7577

7678
/**

orte/mca/rmaps/base/rmaps_base_frame.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ static bool rmaps_base_display_devel_map = false;
6969
static bool rmaps_base_display_diffable_map = false;
7070
static char *rmaps_base_topo_file = NULL;
7171
static char *rmaps_dist_device = NULL;
72+
static bool rmaps_base_inherit = false;
7273

7374
static int orte_rmaps_base_register(mca_base_register_flag_t flags)
7475
{
@@ -223,6 +224,12 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags)
223224
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
224225
MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_topo_file);
225226

227+
rmaps_base_inherit = false;
228+
(void) mca_base_var_register("orte", "rmaps", "base", "inherit",
229+
"Whether child jobs shall inherit launch directives",
230+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
231+
OPAL_INFO_LVL_9,
232+
MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_inherit);
226233

227234
return ORTE_SUCCESS;
228235
}
@@ -254,6 +261,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
254261
orte_rmaps_base.mapping = 0;
255262
orte_rmaps_base.ranking = 0;
256263
orte_rmaps_base.device = NULL;
264+
orte_rmaps_base.inherit = rmaps_base_inherit;
257265

258266
/* if a topology file was given, then set our topology
259267
* from it. Even though our actual topology may differ,

orte/mca/rmaps/base/rmaps_base_map_job.c

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
5454
orte_job_t *parent;
5555
orte_vpid_t nprocs;
5656
orte_app_context_t *app;
57+
bool inherit = false;
5758

5859
ORTE_ACQUIRE_OBJECT(caddy);
5960
jdata = caddy->jdata;
@@ -64,32 +65,36 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
6465
"mca:rmaps: mapping job %s",
6566
ORTE_JOBID_PRINT(jdata->jobid));
6667

67-
if (NULL == jdata->map->ppr && NULL != orte_rmaps_base.ppr) {
68-
jdata->map->ppr = strdup(orte_rmaps_base.ppr);
68+
/* if this is a dynamic job launch and they didn't explicitly
69+
* request inheritance, then don't inherit the launch directives */
70+
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, NULL, OPAL_NAME)) {
71+
inherit = orte_rmaps_base.inherit;
72+
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
73+
"mca:rmaps: dynamic job %s %s inherit launch directives",
74+
ORTE_JOBID_PRINT(jdata->jobid),
75+
inherit ? "will" : "will not");
76+
} else {
77+
/* initial launch always takes on MCA params */
78+
inherit = true;
79+
}
80+
81+
if (inherit) {
82+
if (NULL == jdata->map->ppr && NULL != orte_rmaps_base.ppr) {
83+
jdata->map->ppr = strdup(orte_rmaps_base.ppr);
84+
}
85+
if (0 == jdata->map->cpus_per_rank) {
86+
jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank;
87+
}
6988
}
7089
if (NULL != jdata->map->ppr) {
7190
/* get the procs/object */
7291
ppx = strtoul(jdata->map->ppr, NULL, 10);
7392
if (NULL != strstr(jdata->map->ppr, "node")) {
7493
pernode = true;
75-
} else {
76-
pernode = false;
77-
}
78-
} else {
79-
if (orte_rmaps_base_pernode) {
80-
ppx = 1;
81-
pernode = true;
82-
} else if (0 < orte_rmaps_base_n_pernode) {
83-
ppx = orte_rmaps_base_n_pernode;
84-
pernode = true;
85-
} else if (0 < orte_rmaps_base_n_persocket) {
86-
ppx = orte_rmaps_base_n_persocket;
94+
} else if (NULL != strstr(jdata->map->ppr, "socket")) {
8795
persocket = true;
8896
}
8997
}
90-
if (0 == jdata->map->cpus_per_rank) {
91-
jdata->map->cpus_per_rank = orte_rmaps_base.cpus_per_rank;
92-
}
9398

9499
/* compute the number of procs and check validity */
95100
nprocs = 0;
@@ -151,12 +156,13 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
151156
"mca:rmaps: setting mapping policies for job %s nprocs %d",
152157
ORTE_JOBID_PRINT(jdata->jobid), (int)nprocs);
153158

154-
if (!jdata->map->display_map) {
159+
if (inherit && !jdata->map->display_map) {
155160
jdata->map->display_map = orte_rmaps_base.display_map;
156161
}
162+
157163
/* set the default mapping policy IFF it wasn't provided */
158164
if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
159-
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
165+
if (inherit && (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
160166
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
161167
"mca:rmaps mapping given by MCA param");
162168
jdata->map->mapping = orte_rmaps_base.mapping;
@@ -216,12 +222,13 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
216222

217223
/* check for no-use-local directive */
218224
if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
219-
if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
225+
if (inherit && (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
220226
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
221227
}
222228
}
223229

224-
/* ditto for rank policy */
230+
/* we don't have logic to determine default rank policy, so
231+
* just inherit it if they didn't give us one */
225232
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
226233
jdata->map->ranking = orte_rmaps_base.ranking;
227234
}
@@ -230,7 +237,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
230237
* already (e.g., during the call to comm_spawn), then we don't
231238
* override it */
232239
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
233-
if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
240+
if (inherit && OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
234241
/* if the user specified a default binding policy via
235242
* MCA param, then we use it - this can include a directive
236243
* to overload */

0 commit comments

Comments
 (0)