Skip to content

Commit e161a5e

Browse files
hppritcharhc54
authored andcommitted
examples: add a test to simulate open mpi group
usage. The current algorithm in Open MPI for supporting MPI 4 Sessions support makes use of an extended CID (communicator ID) concept. This approach is leading to problems supporting Sessions for PMLs that can't handle exCID information easily. A much simpler approach to handing the exCID exchange mechanism is demonstrated in this PR. By making use of PMIx dmodex feature in combination with PMIx Group functionality, it should be much easier to support MPI Sessions in PMLs other than OB1. Signed-off-by: Howard Pritchard <[email protected]>
1 parent 1c1d4e1 commit e161a5e

File tree

3 files changed

+239
-1
lines changed

3 files changed

+239
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ examples/pubi
8484
examples/server
8585
examples/tool
8686
examples/group
87+
examples/group_dmodex
8788
examples/asyncgroup
8889
examples/hello
8990
examples/bad_exit

examples/Makefile.am

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ headers = examples.h
2424

2525
AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix
2626

27-
noinst_PROGRAMS = client client2 dmodex dynamic fault pub pubi tool debugger debuggerd alloc jctrl group asyncgroup hello nodeinfo abi_no_init abi_with_init group_lcl_cid
27+
noinst_PROGRAMS = client client2 dmodex dynamic fault pub pubi \
28+
tool debugger debuggerd alloc jctrl group group_dmodex asyncgroup \
29+
hello nodeinfo abi_no_init abi_with_init group_lcl_cid
2830

2931
if !WANT_HIDDEN
3032
# these examples use internal symbols
@@ -84,6 +86,10 @@ group_SOURCES = group.c examples.h
8486
group_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
8587
group_LDADD = $(top_builddir)/src/libpmix.la
8688

89+
group_dmodex_SOURCES = group_dmodex.c examples.h
90+
group_dmodex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
91+
group_dmodex_LDADD = $(top_builddir)/src/libpmix.la
92+
8793
asyncgroup_SOURCES = asyncgroup.c examples.h
8894
asyncgroup_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
8995
asyncgroup_LDADD = $(top_builddir)/src/libpmix.la

examples/group_dmodex.c

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
/*
2+
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3+
* University Research and Technology
4+
* Corporation. All rights reserved.
5+
* Copyright (c) 2004-2011 The University of Tennessee and The University
6+
* of Tennessee Research Foundation. All rights
7+
* reserved.
8+
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* University of Stuttgart. All rights reserved.
10+
* Copyright (c) 2004-2005 The Regents of the University of California.
11+
* All rights reserved.
12+
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
13+
* All rights reserved.
14+
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
15+
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
16+
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
17+
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
18+
* Copyright (c) 2019 IBM Corporation. All rights reserved.
19+
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
20+
* Copyright (c) 2022 Triad National Security, LLC.
21+
* All rights reserved.
22+
*
23+
* $COPYRIGHT$
24+
*
25+
* Additional copyrights may follow
26+
*
27+
* $HEADER$
28+
*
29+
*/
30+
31+
/*
32+
* This test simulates the way Open MPI uses the PMIx_Group_construct to
33+
* implement MPI4 functions:
34+
* - MPI_Comm_create_from_group
35+
* - MPI_Intercomm_create_from_groups
36+
*/
37+
38+
#include <pthread.h>
39+
#include <stdbool.h>
40+
#include <stdio.h>
41+
#include <stdlib.h>
42+
#include <time.h>
43+
#include <unistd.h>
44+
45+
#include <pmix.h>
46+
#include "examples.h"
47+
48+
static pmix_proc_t myproc;
49+
static uint32_t get_timeout = 600; /* default 600 secs to get remote data */
50+
51+
static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status,
52+
const pmix_proc_t *source, pmix_info_t info[], size_t ninfo,
53+
pmix_info_t results[], size_t nresults,
54+
pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata)
55+
{
56+
EXAMPLES_HIDE_UNUSED_PARAMS(evhdlr_registration_id, source,
57+
info, ninfo, results, nresults,
58+
cbfunc, cbdata);
59+
60+
fprintf(stderr, "Client %s:%d NOTIFIED with status %d\n", myproc.nspace, myproc.rank, status);
61+
}
62+
63+
static void op_callbk(pmix_status_t status, void *cbdata)
64+
{
65+
mylock_t *lock = (mylock_t *) cbdata;
66+
67+
fprintf(stderr, "Client %s:%d OP CALLBACK CALLED WITH STATUS %d\n", myproc.nspace, myproc.rank,
68+
status);
69+
lock->status = status;
70+
DEBUG_WAKEUP_THREAD(lock);
71+
}
72+
73+
static void errhandler_reg_callbk(pmix_status_t status, size_t errhandler_ref, void *cbdata)
74+
{
75+
mylock_t *lock = (mylock_t *) cbdata;
76+
77+
fprintf(stderr,
78+
"Client %s:%d ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu\n",
79+
myproc.nspace, myproc.rank, status, (unsigned long) errhandler_ref);
80+
lock->status = status;
81+
DEBUG_WAKEUP_THREAD(lock);
82+
}
83+
84+
int main(int argc, char **argv)
85+
{
86+
int rc;
87+
size_t n;
88+
pmix_value_t *val = NULL;
89+
pmix_value_t value;
90+
pmix_proc_t proc, *procs;
91+
uint32_t nprocs;
92+
mylock_t lock;
93+
pmix_info_t *results, info, tinfo;
94+
size_t nresults, cid;
95+
char tmp[1024];
96+
97+
EXAMPLES_HIDE_UNUSED_PARAMS(argc, argv);
98+
99+
/* init us */
100+
if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) {
101+
fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %s\n", myproc.nspace, myproc.rank,
102+
PMIx_Error_string(rc));
103+
exit(0);
104+
}
105+
fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
106+
107+
PMIX_PROC_CONSTRUCT(&proc);
108+
PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD);
109+
110+
/* get our job size */
111+
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) {
112+
fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %s\n", myproc.nspace,
113+
myproc.rank, PMIx_Error_string(rc));
114+
goto done;
115+
}
116+
nprocs = val->data.uint32;
117+
PMIX_VALUE_RELEASE(val);
118+
fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs);
119+
120+
/* register our default errhandler */
121+
DEBUG_CONSTRUCT_LOCK(&lock);
122+
PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, errhandler_reg_callbk,
123+
(void *) &lock);
124+
DEBUG_WAIT_THREAD(&lock);
125+
rc = lock.status;
126+
DEBUG_DESTRUCT_LOCK(&lock);
127+
if (PMIX_SUCCESS != rc) {
128+
goto done;
129+
}
130+
131+
/* call fence to sync */
132+
PMIX_PROC_CONSTRUCT(&proc);
133+
PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD);
134+
if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) {
135+
fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank,
136+
rc);
137+
goto done;
138+
}
139+
140+
PMIX_PROC_CREATE(procs, nprocs);
141+
for (n = 0; n < nprocs; n++) {
142+
PMIX_PROC_LOAD(&procs[n], myproc.nspace, n);
143+
}
144+
PMIX_INFO_LOAD(&info, PMIX_GROUP_ASSIGN_CONTEXT_ID, NULL, PMIX_BOOL);
145+
rc = PMIx_Group_construct("ourgroup", procs, nprocs, &info, 1, &results, &nresults);
146+
if (PMIX_SUCCESS != rc) {
147+
fprintf(stderr, "Client ns %s rank %d: PMIx_Group_construct failed: %s\n",
148+
myproc.nspace, myproc.rank, PMIx_Error_string(rc));
149+
goto done;
150+
}
151+
/* we should have a single results object */
152+
if (NULL != results) {
153+
cid = 0;
154+
PMIX_VALUE_GET_NUMBER(rc, &results[0].value, cid, size_t);
155+
fprintf(stderr, "%d Group construct complete with status %s KEY %s CID %ld\n",
156+
myproc.rank, PMIx_Error_string(rc), results[0].key, cid);
157+
} else {
158+
fprintf(stderr, "%d Group construct complete, but no CID returned\n", myproc.rank);
159+
goto done;
160+
}
161+
PMIX_PROC_FREE(procs, nprocs);
162+
163+
/*
164+
* put some data
165+
*/
166+
(void) snprintf(tmp, 1024, "%s-%lu-%d-remote", myproc.nspace, cid, myproc.rank);
167+
value.type = PMIX_UINT64;
168+
value.data.uint64 = 1234UL + (unsigned long) myproc.rank;
169+
if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, tmp, &value))) {
170+
fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace,
171+
myproc.rank, rc);
172+
goto done;
173+
}
174+
175+
/* commit the data to the server */
176+
if (PMIX_SUCCESS != (rc = PMIx_Commit())) {
177+
fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace,
178+
myproc.rank, rc);
179+
goto done;
180+
}
181+
182+
/*
183+
* destruct the group
184+
*/
185+
rc = PMIx_Group_destruct("ourgroup", NULL, 0);
186+
if (PMIX_SUCCESS != rc) {
187+
fprintf(stderr, "Client ns %s rank %d: PMIx_Group_destruct failed: %s\n", myproc.nspace,
188+
myproc.rank, PMIx_Error_string(rc));
189+
goto done;
190+
}
191+
192+
PMIX_INFO_CONSTRUCT(&tinfo);
193+
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &get_timeout, PMIX_UINT32);
194+
for (n = 0; n < nprocs; n++) {
195+
proc.rank = n;
196+
(void)snprintf(tmp, 1024, "%s-%lu-%d-remote", myproc.nspace, cid, (int)n);
197+
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, &tinfo, 1, &val))) {
198+
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n",
199+
myproc.nspace, (int)n, tmp, rc);
200+
goto done;
201+
}
202+
if (PMIX_UINT64 != val->type) {
203+
fprintf(stderr, "%s:%d: PMIx_Get Key %s returned wrong type: %d\n",
204+
myproc.nspace, myproc.rank, tmp, val->type);
205+
PMIX_VALUE_RELEASE(val);
206+
goto done;
207+
}
208+
if ((1234UL + (unsigned long)n) != val->data.uint64) {
209+
fprintf(stderr, "%s:%d: PMIx_Get Key %s returned wrong value: %lu\n",
210+
myproc.nspace, myproc.rank, tmp, (unsigned long)val->data.uint64);
211+
PMIX_VALUE_RELEASE(val);
212+
goto done;
213+
}
214+
PMIX_VALUE_RELEASE(val);
215+
}
216+
217+
done:
218+
/* finalize us */
219+
DEBUG_CONSTRUCT_LOCK(&lock);
220+
PMIx_Deregister_event_handler(1, op_callbk, &lock);
221+
DEBUG_WAIT_THREAD(&lock);
222+
DEBUG_DESTRUCT_LOCK(&lock);
223+
224+
if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
225+
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace,
226+
myproc.rank, PMIx_Error_string(rc));
227+
}
228+
fprintf(stderr, "%s:%d COMPLETE\n", myproc.nspace, myproc.rank);
229+
fflush(stderr);
230+
return (0);
231+
}

0 commit comments

Comments
 (0)