Skip to content

Commit c26ed7d

Browse files
author
Ralph Castain
committed
Fix comm_spawn when ORTE progress thread is enabled by ensuring that all operations on the global list of active collectives are done in events to avoid conflicts.
This commit was SVN r27658.
1 parent 3e1b13b commit c26ed7d

File tree

4 files changed

+105
-48
lines changed

4 files changed

+105
-48
lines changed

orte/mca/grpcomm/bad/grpcomm_bad_module.c

Lines changed: 72 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ static int xcast(orte_jobid_t job,
5454
orte_rml_tag_t tag);
5555
static int bad_allgather(orte_grpcomm_collective_t *coll);
5656
static int bad_barrier(orte_grpcomm_collective_t *coll);
57+
static int bad_modex(orte_grpcomm_collective_t *modex);
5758

5859
/* Module def */
5960
orte_grpcomm_base_module_t orte_grpcomm_bad_module = {
@@ -62,7 +63,7 @@ orte_grpcomm_base_module_t orte_grpcomm_bad_module = {
6263
xcast,
6364
bad_allgather,
6465
bad_barrier,
65-
orte_grpcomm_base_modex
66+
bad_modex
6667
};
6768

6869
/**
@@ -133,29 +134,14 @@ static int xcast(orte_jobid_t job,
133134
return rc;
134135
}
135136

136-
137-
static int bad_barrier(orte_grpcomm_collective_t *coll)
137+
static void process_barrier(int fd, short args, void *cbdata)
138138
{
139+
orte_grpcomm_caddy_t *caddy = (orte_grpcomm_caddy_t*)cbdata;
140+
orte_grpcomm_collective_t *coll = caddy->op;
139141
int rc;
140142
opal_buffer_t *buf;
141143
orte_namelist_t *nm;
142144

143-
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
144-
"%s grpcomm:bad entering barrier",
145-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
146-
147-
/* if I am alone, just execute the callback */
148-
if (1 == orte_process_info.num_procs) {
149-
coll->active = false;
150-
if (NULL != coll->cbfunc) {
151-
coll->cbfunc(NULL, coll->cbdata);
152-
}
153-
return ORTE_SUCCESS;
154-
}
155-
156-
/* mark the collective as active */
157-
coll->active = true;
158-
159145
/* setup the collective */
160146
opal_list_append(&orte_grpcomm_base.active_colls, &coll->super);
161147

@@ -183,41 +169,47 @@ static int bad_barrier(orte_grpcomm_collective_t *coll)
183169
ORTE_ERROR_LOG(rc);
184170
OBJ_RELEASE(buf);
185171
opal_list_remove_item(&orte_grpcomm_base.active_colls, &coll->super);
186-
return rc;
172+
return;
187173
}
188174

189175
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
190176
"%s grpcomm:bad barrier underway",
191177
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
192-
193-
return rc;
194178
}
195179

196-
static int bad_allgather(orte_grpcomm_collective_t *gather)
180+
static int bad_barrier(orte_grpcomm_collective_t *coll)
197181
{
198-
int rc;
199-
opal_buffer_t *buf;
200-
orte_namelist_t *nm;
201-
opal_list_item_t *item;
202-
203182
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
204-
"%s grpcomm:bad entering allgather",
183+
"%s grpcomm:bad entering barrier",
205184
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
206185

207-
/* if I am alone and nobody else is participating, then
208-
* nothing really to do
209-
*/
210-
if (1 == orte_process_info.num_procs &&
211-
0 == opal_list_get_size(&gather->participants)) {
212-
gather->active = false;
213-
if (NULL != gather->cbfunc) {
214-
gather->cbfunc(&gather->buffer, gather->cbdata);
186+
/* if I am alone, just execute the callback */
187+
if (1 == orte_process_info.num_procs) {
188+
coll->active = false;
189+
if (NULL != coll->cbfunc) {
190+
coll->cbfunc(NULL, coll->cbdata);
215191
}
216192
return ORTE_SUCCESS;
217193
}
218194

219195
/* mark the collective as active */
220-
gather->active = true;
196+
coll->active = true;
197+
198+
/* push it into the event library for processing as
199+
* we will be accessing global lists
200+
*/
201+
ORTE_GRPCOMM_ACTIVATE(coll, process_barrier);
202+
return ORTE_SUCCESS;
203+
}
204+
205+
static void process_allgather(int fd, short args, void *cbdata)
206+
{
207+
orte_grpcomm_caddy_t *caddy = (orte_grpcomm_caddy_t*)cbdata;
208+
orte_grpcomm_collective_t *gather = caddy->op;
209+
int rc;
210+
opal_buffer_t *buf;
211+
orte_namelist_t *nm;
212+
opal_list_item_t *item;
221213

222214
/* if this is an original request, then record the collective */
223215
if (NULL == gather->next_cb) {
@@ -250,7 +242,7 @@ static int bad_allgather(orte_grpcomm_collective_t *gather)
250242
ORTE_ERROR_LOG(rc);
251243
OBJ_RELEASE(buf);
252244
opal_list_remove_item(&orte_grpcomm_base.active_colls, &gather->super);
253-
return rc;
245+
return;
254246
}
255247
} else {
256248
/* send directly to each participant - note that this will
@@ -274,15 +266,54 @@ static int bad_allgather(orte_grpcomm_collective_t *gather)
274266
ORTE_ERROR_LOG(rc);
275267
OBJ_RELEASE(buf);
276268
opal_list_remove_item(&orte_grpcomm_base.active_colls, &gather->super);
277-
return rc;
269+
return;
278270
}
279271
}
280-
return ORTE_SUCCESS;
272+
return;
281273
}
282274

283275
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
284276
"%s grpcomm:bad allgather underway",
285277
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
278+
}
279+
280+
static int bad_allgather(orte_grpcomm_collective_t *gather)
281+
{
282+
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
283+
"%s grpcomm:bad entering allgather",
284+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
285+
286+
/* if I am alone and nobody else is participating, then
287+
* nothing really to do
288+
*/
289+
if (1 == orte_process_info.num_procs &&
290+
0 == opal_list_get_size(&gather->participants)) {
291+
gather->active = false;
292+
if (NULL != gather->cbfunc) {
293+
gather->cbfunc(&gather->buffer, gather->cbdata);
294+
}
295+
return ORTE_SUCCESS;
296+
}
286297

298+
/* mark the collective as active */
299+
gather->active = true;
300+
301+
/* push it into the event library for processing as
302+
* we will be accessing global lists
303+
*/
304+
ORTE_GRPCOMM_ACTIVATE(gather, process_allgather);
305+
return ORTE_SUCCESS;
306+
}
307+
308+
static int bad_modex(orte_grpcomm_collective_t *modex)
309+
{
310+
/* mark the collective as active */
311+
modex->active = true;
312+
313+
/* we need to get this into the event library
314+
* to avoid race conditions with modex data arriving
315+
* from other sources via the RML
316+
*/
317+
ORTE_GRPCOMM_ACTIVATE(modex, orte_grpcomm_base_modex);
287318
return ORTE_SUCCESS;
288319
}

orte/mca/grpcomm/base/base.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,28 @@ typedef struct {
6666
#endif
6767
} orte_grpcomm_base_t;
6868

69+
typedef struct {
70+
opal_object_t super;
71+
opal_event_t ev;
72+
orte_grpcomm_collective_t *op;
73+
} orte_grpcomm_caddy_t;
74+
OBJ_CLASS_DECLARATION(orte_grpcomm_caddy_t);
75+
76+
#define ORTE_GRPCOMM_ACTIVATE(o, cb) \
77+
do { \
78+
orte_grpcomm_caddy_t *caddy; \
79+
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output, \
80+
"%s ACTIVATING GRCPCOMM OP %d at %s:%d", \
81+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
82+
(o)->id, __FILE__, __LINE__)); \
83+
caddy = OBJ_NEW(orte_grpcomm_caddy_t); \
84+
caddy->op = (o); \
85+
opal_event_set(orte_event_base, &caddy->ev, -1, \
86+
OPAL_EV_WRITE, (cb), caddy); \
87+
opal_event_set_priority(&caddy->ev, ORTE_MSG_PRI); \
88+
opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); \
89+
} while(0);
90+
6991
ORTE_DECLSPEC extern orte_grpcomm_base_t orte_grpcomm_base;
7092

7193
ORTE_DECLSPEC orte_grpcomm_collective_t* orte_grpcomm_base_setup_collective(orte_grpcomm_coll_id_t id);
@@ -82,7 +104,7 @@ ORTE_DECLSPEC void orte_grpcomm_base_rollup_recv(int status, orte_process_name_t
82104
/* modex support */
83105
ORTE_DECLSPEC void orte_grpcomm_base_store_peer_modex(opal_buffer_t *rbuf, void *cbdata);
84106
ORTE_DECLSPEC void orte_grpcomm_base_store_modex(opal_buffer_t *rbuf, void *cbdata);
85-
ORTE_DECLSPEC int orte_grpcomm_base_modex(orte_grpcomm_collective_t *modex);
107+
ORTE_DECLSPEC void orte_grpcomm_base_modex(int fd, short args, void *cbdata);
86108
ORTE_DECLSPEC int orte_grpcomm_base_pack_modex_entries(opal_buffer_t *buf);
87109
ORTE_DECLSPEC int orte_grpcomm_base_update_modex_entries(orte_process_name_t *proc_name,
88110
opal_buffer_t *rbuf);

orte/mca/grpcomm/base/grpcomm_base_modex.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,10 @@ orte_grpcomm_coll_id_t orte_grpcomm_base_get_coll_id(void)
6262

6363

6464
/*************** MODEX SECTION **************/
65-
int orte_grpcomm_base_modex(orte_grpcomm_collective_t *modex)
65+
void orte_grpcomm_base_modex(int fd, short args, void *cbdata)
6666
{
67+
orte_grpcomm_caddy_t *caddy = (orte_grpcomm_caddy_t*)cbdata;
68+
orte_grpcomm_collective_t *modex = caddy->op;
6769
int rc;
6870
orte_namelist_t *nm;
6971
opal_list_item_t *item;
@@ -76,7 +78,6 @@ int orte_grpcomm_base_modex(orte_grpcomm_collective_t *modex)
7678

7779
if (0 == opal_list_get_size(&modex->participants)) {
7880
/* record the collective */
79-
modex->active = true;
8081
modex->next_cbdata = modex;
8182
opal_list_append(&orte_grpcomm_base.active_colls, &modex->super);
8283

@@ -138,7 +139,6 @@ int orte_grpcomm_base_modex(orte_grpcomm_collective_t *modex)
138139
/* now add the modex to the global list of active collectives */
139140
modex->next_cb = orte_grpcomm_base_store_peer_modex;
140141
modex->next_cbdata = modex;
141-
modex->active = true;
142142
opal_list_append(&orte_grpcomm_base.active_colls, &modex->super);
143143

144144
/* this is not amongst our peers, but rather between a select
@@ -215,10 +215,10 @@ int orte_grpcomm_base_modex(orte_grpcomm_collective_t *modex)
215215
"%s grpcomm:base:modex: modex posted",
216216
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
217217

218-
return ORTE_SUCCESS;
218+
return;
219219

220220
cleanup:
221-
return rc;
221+
return;
222222
}
223223

224224
void orte_grpcomm_base_store_peer_modex(opal_buffer_t *rbuf, void *cbdata)

orte/mca/grpcomm/base/grpcomm_base_open.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12-
* Copyright (c) 2011 Los Alamos National Security, LLC.
12+
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
1313
* All rights reserved.
1414
* $COPYRIGHT$
1515
*
@@ -138,3 +138,7 @@ OBJ_CLASS_INSTANCE(orte_grpcomm_collective_t,
138138
opal_list_item_t,
139139
collective_constructor,
140140
collective_destructor);
141+
142+
OBJ_CLASS_INSTANCE(orte_grpcomm_caddy_t,
143+
opal_object_t,
144+
NULL, NULL);

0 commit comments

Comments
 (0)