Skip to content

Commit 4f104be

Browse files
authored
Merge pull request #3723 from rhc54/cmr30x/orte
Update ORTE and PMIx-related components to release status
2 parents c595cea + a98d8f4 commit 4f104be

File tree

199 files changed

+6971
-5257
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

199 files changed

+6971
-5257
lines changed

ompi/dpm/dpm.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
165165
sizeof(ompi_proc_t *));
166166
for (i=0 ; i<group->grp_proc_count ; i++) {
167167
if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) {
168-
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
169-
rc = ORTE_ERR_NOT_FOUND;
168+
OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND);
169+
rc = OMPI_ERR_NOT_FOUND;
170170
free(proc_list);
171171
goto exit;
172172
}
@@ -672,10 +672,10 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
672672
for (i = 0; i < count; ++i) {
673673
app = OBJ_NEW(opal_pmix_app_t);
674674
if (NULL == app) {
675-
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
675+
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
676676
OPAL_LIST_DESTRUCT(&apps);
677677
opal_progress_event_users_decrement();
678-
return ORTE_ERR_OUT_OF_RESOURCE;
678+
return OMPI_ERR_OUT_OF_RESOURCE;
679679
}
680680
/* add the app to the job data */
681681
opal_list_append(&apps, &app->super);
@@ -900,9 +900,9 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
900900
ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag);
901901
if ( flag ) {
902902
if (0 == strcmp(stdin_target, "all")) {
903-
ui32 = ORTE_VPID_WILDCARD;
903+
ui32 = OPAL_VPID_WILDCARD;
904904
} else if (0 == strcmp(stdin_target, "none")) {
905-
ui32 = ORTE_VPID_INVALID;
905+
ui32 = OPAL_VPID_INVALID;
906906
} else {
907907
ui32 = strtoul(stdin_target, NULL, 10);
908908
}
@@ -918,7 +918,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
918918
*/
919919
if ( !have_wdir ) {
920920
if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) {
921-
ORTE_ERROR_LOG(rc);
921+
OMPI_ERROR_LOG(rc);
922922
OPAL_LIST_DESTRUCT(&apps);
923923
opal_progress_event_users_decrement();
924924
return rc;

opal/include/opal/constants.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ enum {
9696
OPAL_ERR_PROC_MIGRATE = (OPAL_ERR_BASE - 65),
9797
OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66),
9898
OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67),
99-
OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68)
99+
OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68),
100+
OPAL_ERR_MODEL_DECLARED = (OPAL_ERR_BASE - 69)
100101
};
101102

102103
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)

opal/mca/pmix/base/base.h

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
#include "opal_config.h"
1616
#include "opal/types.h"
17-
17+
#include "opal/threads/threads.h"
1818
#include "opal/mca/mca.h"
1919
#include "opal/mca/base/mca_base_framework.h"
2020

@@ -55,13 +55,133 @@ OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info,
5555

5656
OPAL_DECLSPEC void opal_pmix_base_set_evbase(opal_event_base_t *evbase);
5757

58+
#define opal_pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread)
59+
typedef pthread_cond_t opal_pmix_condition_t;
60+
#define opal_pmix_condition_broadcast(a) pthread_cond_broadcast(a)
61+
#define opal_pmix_condition_signal(a) pthread_cond_signal(a)
62+
#define OPAL_PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER
63+
64+
typedef struct {
65+
opal_mutex_t mutex;
66+
opal_pmix_condition_t cond;
67+
volatile bool active;
68+
} opal_pmix_lock_t;
69+
70+
5871
typedef struct {
5972
opal_event_base_t *evbase;
6073
int timeout;
74+
int initialized;
75+
opal_pmix_lock_t lock;
6176
} opal_pmix_base_t;
6277

6378
extern opal_pmix_base_t opal_pmix_base;
6479

80+
#define OPAL_PMIX_CONSTRUCT_LOCK(l) \
81+
do { \
82+
OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \
83+
pthread_cond_init(&(l)->cond, NULL); \
84+
(l)->active = true; \
85+
} while(0)
86+
87+
#define OPAL_PMIX_DESTRUCT_LOCK(l) \
88+
do { \
89+
OBJ_DESTRUCT(&(l)->mutex); \
90+
pthread_cond_destroy(&(l)->cond); \
91+
} while(0)
92+
93+
94+
#if OPAL_ENABLE_DEBUG
95+
#define OPAL_PMIX_ACQUIRE_THREAD(lck) \
96+
do { \
97+
opal_mutex_lock(&(lck)->mutex); \
98+
if (opal_debug_threads) { \
99+
opal_output(0, "Waiting for thread %s:%d", \
100+
__FILE__, __LINE__); \
101+
} \
102+
while ((lck)->active) { \
103+
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
104+
} \
105+
if (opal_debug_threads) { \
106+
opal_output(0, "Thread obtained %s:%d", \
107+
__FILE__, __LINE__); \
108+
} \
109+
(lck)->active = true; \
110+
} while(0)
111+
#else
112+
#define OPAL_PMIX_ACQUIRE_THREAD(lck) \
113+
do { \
114+
opal_mutex_lock(&(lck)->mutex); \
115+
while ((lck)->active) { \
116+
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
117+
} \
118+
(lck)->active = true; \
119+
} while(0)
120+
#endif
121+
122+
123+
#if OPAL_ENABLE_DEBUG
124+
#define OPAL_PMIX_WAIT_THREAD(lck) \
125+
do { \
126+
opal_mutex_lock(&(lck)->mutex); \
127+
if (opal_debug_threads) { \
128+
opal_output(0, "Waiting for thread %s:%d", \
129+
__FILE__, __LINE__); \
130+
} \
131+
while ((lck)->active) { \
132+
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
133+
} \
134+
if (opal_debug_threads) { \
135+
opal_output(0, "Thread obtained %s:%d", \
136+
__FILE__, __LINE__); \
137+
} \
138+
OPAL_ACQUIRE_OBJECT(&lck); \
139+
opal_mutex_unlock(&(lck)->mutex); \
140+
} while(0)
141+
#else
142+
#define OPAL_PMIX_WAIT_THREAD(lck) \
143+
do { \
144+
opal_mutex_lock(&(lck)->mutex); \
145+
while ((lck)->active) { \
146+
opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \
147+
} \
148+
OPAL_ACQUIRE_OBJECT(lck); \
149+
opal_mutex_unlock(&(lck)->mutex); \
150+
} while(0)
151+
#endif
152+
153+
154+
#if OPAL_ENABLE_DEBUG
155+
#define OPAL_PMIX_RELEASE_THREAD(lck) \
156+
do { \
157+
if (opal_debug_threads) { \
158+
opal_output(0, "Releasing thread %s:%d", \
159+
__FILE__, __LINE__); \
160+
} \
161+
(lck)->active = false; \
162+
opal_pmix_condition_broadcast(&(lck)->cond); \
163+
opal_mutex_unlock(&(lck)->mutex); \
164+
} while(0)
165+
#else
166+
#define OPAL_PMIX_RELEASE_THREAD(lck) \
167+
do { \
168+
assert(0 != opal_mutex_trylock(&(lck)->mutex)); \
169+
(lck)->active = false; \
170+
opal_pmix_condition_broadcast(&(lck)->cond); \
171+
opal_mutex_unlock(&(lck)->mutex); \
172+
} while(0)
173+
#endif
174+
175+
176+
#define OPAL_PMIX_WAKEUP_THREAD(lck) \
177+
do { \
178+
opal_mutex_lock(&(lck)->mutex); \
179+
(lck)->active = false; \
180+
OPAL_POST_OBJECT(lck); \
181+
opal_pmix_condition_broadcast(&(lck)->cond); \
182+
opal_mutex_unlock(&(lck)->mutex); \
183+
} while(0)
184+
65185
END_C_DECLS
66186

67187
#endif

opal/mca/pmix/base/pmix_base_fns.c

Lines changed: 11 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -92,39 +92,6 @@ int opal_pmix_base_notify_event(int status,
9292
return OPAL_SUCCESS;
9393
}
9494

95-
struct lookup_caddy_t {
96-
volatile bool active;
97-
int status;
98-
opal_pmix_pdata_t *pdat;
99-
};
100-
101-
/******** DATA EXCHANGE ********/
102-
static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata)
103-
{
104-
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
105-
cd->status = status;
106-
if (OPAL_SUCCESS == status && NULL != data) {
107-
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
108-
if (NULL != p) {
109-
cd->pdat->proc = p->proc;
110-
if (p->value.type == cd->pdat->value.type) {
111-
if (NULL != cd->pdat->value.key) {
112-
free(cd->pdat->value.key);
113-
}
114-
(void)opal_value_xfer(&cd->pdat->value, &p->value);
115-
}
116-
}
117-
}
118-
cd->active = false;
119-
}
120-
121-
static void opcbfunc(int status, void *cbdata)
122-
{
123-
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
124-
cd->status = status;
125-
cd->active = false;
126-
}
127-
12895
int opal_pmix_base_exchange(opal_value_t *indat,
12996
opal_pmix_pdata_t *outdat,
13097
int timeout)
@@ -133,8 +100,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
133100
opal_list_t ilist, mlist;
134101
opal_value_t *info;
135102
opal_pmix_pdata_t *pdat;
136-
struct lookup_caddy_t caddy;
137-
char **keys;
138103

139104
/* protect the incoming value */
140105
opal_dss.copy((void**)&info, indat, OPAL_VALUE);
@@ -148,31 +113,10 @@ int opal_pmix_base_exchange(opal_value_t *indat,
148113
opal_list_append(&ilist, &info->super);
149114

150115
/* publish it with "session" scope */
151-
if (NULL == opal_pmix.publish_nb) {
152-
rc = opal_pmix.publish(&ilist);
153-
OPAL_LIST_DESTRUCT(&ilist);
154-
if (OPAL_SUCCESS != rc) {
155-
OPAL_ERROR_LOG(rc);
156-
return rc;
157-
}
158-
} else {
159-
caddy.status = -1;
160-
caddy.active = true;
161-
caddy.pdat = NULL;
162-
rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy);
163-
if (OPAL_SUCCESS != rc) {
164-
OPAL_ERROR_LOG(rc);
165-
OPAL_LIST_DESTRUCT(&ilist);
166-
return rc;
167-
}
168-
while (caddy.active) {
169-
usleep(10);
170-
}
171-
OPAL_LIST_DESTRUCT(&ilist);
172-
if (OPAL_SUCCESS != caddy.status) {
173-
OPAL_ERROR_LOG(caddy.status);
174-
return caddy.status;
175-
}
116+
rc = opal_pmix.publish(&ilist);
117+
OPAL_LIST_DESTRUCT(&ilist);
118+
if (OPAL_SUCCESS != rc) {
119+
return rc;
176120
}
177121

178122
/* lookup the other side's info - if a non-blocking form
@@ -206,45 +150,20 @@ int opal_pmix_base_exchange(opal_value_t *indat,
206150

207151
/* if a non-blocking version of lookup isn't
208152
* available, then use the blocking version */
209-
if (NULL == opal_pmix.lookup_nb) {
210-
OBJ_CONSTRUCT(&ilist, opal_list_t);
211-
opal_list_append(&ilist, &pdat->super);
212-
rc = opal_pmix.lookup(&ilist, &mlist);
213-
OPAL_LIST_DESTRUCT(&mlist);
153+
OBJ_CONSTRUCT(&ilist, opal_list_t);
154+
opal_list_append(&ilist, &pdat->super);
155+
rc = opal_pmix.lookup(&ilist, &mlist);
156+
OPAL_LIST_DESTRUCT(&mlist);
157+
if (OPAL_SUCCESS != rc) {
214158
OPAL_LIST_DESTRUCT(&ilist);
215-
if (OPAL_SUCCESS != rc) {
216-
OPAL_ERROR_LOG(rc);
217-
return rc;
218-
}
219-
} else {
220-
caddy.status = -1;
221-
caddy.active = true;
222-
caddy.pdat = pdat;
223-
keys = NULL;
224-
opal_argv_append_nosize(&keys, pdat->value.key);
225-
rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy);
226-
if (OPAL_SUCCESS != rc) {
227-
OPAL_ERROR_LOG(rc);
228-
OPAL_LIST_DESTRUCT(&mlist);
229-
opal_argv_free(keys);
230-
return rc;
231-
}
232-
while (caddy.active) {
233-
usleep(10);
234-
}
235-
opal_argv_free(keys);
236-
OPAL_LIST_DESTRUCT(&mlist);
237-
if (OPAL_SUCCESS != caddy.status) {
238-
OPAL_ERROR_LOG(caddy.status);
239-
return caddy.status;
240-
}
159+
return rc;
241160
}
242161

243162
/* pass back the result */
244163
outdat->proc = pdat->proc;
245164
free(outdat->value.key);
246165
rc = opal_value_xfer(&outdat->value, &pdat->value);
247-
OBJ_RELEASE(pdat);
166+
OPAL_LIST_DESTRUCT(&ilist);
248167
return rc;
249168
}
250169

opal/mca/pmix/base/pmix_base_frame.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "opal/constants.h"
1414

1515
#include "opal/mca/mca.h"
16+
#include "opal/threads/thread_usage.h"
1617
#include "opal/util/argv.h"
1718
#include "opal/util/output.h"
1819
#include "opal/mca/base/base.h"
@@ -35,7 +36,16 @@ opal_pmix_base_module_t opal_pmix = { 0 };
3536
bool opal_pmix_collect_all_data = true;
3637
int opal_pmix_verbose_output = -1;
3738
bool opal_pmix_base_async_modex = false;
38-
opal_pmix_base_t opal_pmix_base = {0};
39+
opal_pmix_base_t opal_pmix_base = {
40+
.evbase = NULL,
41+
.timeout = 0,
42+
.initialized = 0,
43+
.lock = {
44+
.mutex = OPAL_MUTEX_STATIC_INIT,
45+
.cond = OPAL_PMIX_CONDITION_STATIC_INIT,
46+
.active = false
47+
}
48+
};
3949

4050
static int opal_pmix_base_frame_register(mca_base_register_flag_t flags)
4151
{

opal/mca/pmix/cray/pmix_cray.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
static char cray_pmi_version[128];
3636

37-
static int cray_init(void);
37+
static int cray_init(opal_list_t *ilist);
3838
static int cray_fini(void);
3939
static int cray_initialized(void);
4040
static int cray_abort(int flat, const char *msg,
@@ -282,7 +282,7 @@ static void cray_get_more_info(void)
282282
return;
283283
}
284284

285-
static int cray_init(void)
285+
static int cray_init(opal_list_t *ilist)
286286
{
287287
int i, spawned, size, rank, appnum, my_node;
288288
int rc, ret = OPAL_ERROR;

0 commit comments

Comments
 (0)