Skip to content

Commit 9850832

Browse files
author
Ralph Castain
authored
Merge pull request #3273 from rhc54/topic/pmix2.0
Update to PMIx v2.0alpha
2 parents b373670 + 2cc5fea commit 9850832

18 files changed

+1127
-459
lines changed

opal/mca/pmix/pmix2x/pmix/NEWS

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
1+
Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
2+
Copyright (c) 2017 IBM Corporation. All rights reserved.
23
$COPYRIGHT$
34

45
Additional copyrights may follow
@@ -23,6 +24,32 @@ current release as well as the "stable" bug fix release branch.
2324
Master (not on release branches yet)
2425
------------------------------------
2526

27+
1.2.2 -- 21 March 2017
28+
----------------------
29+
- Compiler fix for Sun/Oracle CC (PR #322)
30+
- Fix missing include (PR #326)
31+
- Improve error checking around posix_fallocate (PR #329)
32+
- Fix possible memory corruption (PR #331)
33+
34+
35+
1.2.1 -- 21 Feb. 2017
36+
----------------------
37+
- dstore: Fix data corruption bug in key overwrite cases
38+
- dstore: Performance and scalability fixes
39+
- sm: Use posix_fallocate() before mmap
40+
- pmi1/pmi2: Restore support
41+
- dstore: Fix extension slot size allocation (Issue #280)
42+
43+
44+
1.2.0 -- 14 Dec. 2016
45+
----------------------
46+
- Add shared memory data storage (dstore) option. Default: enabled
47+
Configure option: --disable-dstore
48+
- PMIx_Commit performance improvements
49+
- Disable errhandler support
50+
- Keep job info in the shared memory dstore
51+
- PMIx_Get performance and memory improvements
52+
2653
1.1.5
2754
-----
2855
- Add pmix_version.h to support direct detection of PMIx library version

opal/mca/pmix/pmix2x/pmix/VERSION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@ release=0
2323
# The only requirement is that it must be entirely printable ASCII
2424
# characters and have no white space.
2525

26-
greek=
26+
greek=a1
2727

2828
# If repo_rev is empty, then the repository version number will be
2929
# obtained during "make dist" via the "git describe --tags --always"
3030
# command, or with the date (if "git describe" fails) in the form of
3131
# "date<date>".
3232

33-
repo_rev=git4cdd5e0
33+
repo_rev=gitc442ba8
3434

3535
# If tarball_version is not empty, it is used as the version string in
3636
# the tarball filename, regardless of all other versions listed in
@@ -44,7 +44,7 @@ tarball_version=
4444

4545
# The date when this release was created
4646

47-
date="Mar 11, 2017"
47+
date="Apr 02, 2017"
4848

4949
# The shared library version of each of PMIx's public libraries.
5050
# These versions are maintained in accordance with the "Library

opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ dnl All rights reserved.
1313
dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
1414
dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
1515
dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
16-
dnl Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
16+
dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
17+
dnl Copyright (c) 2017 Research Organization for Information Science
18+
dnl and Technology (RIST). All rights reserved.
1719
dnl
1820
dnl $COPYRIGHT$
1921
dnl
@@ -278,7 +280,7 @@ for val in ${$1}; do
278280
# http://www.open-mpi.org/community/lists/devel/2012/08/11362.php).
279281

280282
case $val in
281-
-Xclang)
283+
-Xclang|-Xg)
282284
pmix_found=0
283285
pmix_i=`expr $pmix_count + 1`
284286
;;
@@ -366,7 +368,7 @@ AC_DEFUN([PMIX_FLAGS_UNIQ],[
366368
# https://github.com/open-mpi/ompi/issues/324).
367369

368370
case $val in
369-
-Xclang)
371+
-Xclang|-Xg)
370372
pmix_found=0
371373
pmix_i=`expr $pmix_count + 1`
372374
;;

opal/mca/pmix/pmix2x/pmix/examples/Makefile.am

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix
2323

24-
noinst_PROGRAMS = client dmodex dynamic fault pub tool debugger debuggerd alloc
24+
noinst_PROGRAMS = client dmodex dynamic fault pub tool debugger debuggerd alloc jctrl
2525
if !WANT_HIDDEN
2626
# these examples use internal symbols
2727
# use --disable-visibility
@@ -40,11 +40,14 @@ debuggerd_SOURCES = debuggerd.c
4040
debuggerd_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
4141
debuggerd_LDADD = $(top_builddir)/src/libpmix.la
4242

43-
4443
alloc_SOURCES = alloc.c
4544
alloc_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
4645
alloc_LDADD = $(top_builddir)/src/libpmix.la
4746

47+
jctrl_SOURCES = jctrl.c
48+
jctrl_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
49+
jctrl_LDADD = $(top_builddir)/src/libpmix.la
50+
4851
dmodex_SOURCES = dmodex.c
4952
dmodex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
5053
dmodex_LDADD = $(top_builddir)/src/libpmix.la
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
/*
2+
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3+
* University Research and Technology
4+
* Corporation. All rights reserved.
5+
* Copyright (c) 2004-2011 The University of Tennessee and The University
6+
* of Tennessee Research Foundation. All rights
7+
* reserved.
8+
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* University of Stuttgart. All rights reserved.
10+
* Copyright (c) 2004-2005 The Regents of the University of California.
11+
* All rights reserved.
12+
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
13+
* All rights reserved.
14+
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
15+
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
16+
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
17+
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
18+
* $COPYRIGHT$
19+
*
20+
* Additional copyrights may follow
21+
*
22+
* $HEADER$
23+
*
24+
*/
25+
26+
#define _GNU_SOURCE
27+
#include <stdbool.h>
28+
#include <stdio.h>
29+
#include <stdlib.h>
30+
#include <unistd.h>
31+
#include <time.h>
32+
#include <signal.h>
33+
34+
#include <pmix.h>
35+
36+
static pmix_proc_t myproc;
37+
38+
/* this is the event notification function we pass down below
39+
* when registering for general events - i.e.,, the default
40+
* handler. We don't technically need to register one, but it
41+
* is usually good practice to catch any events that occur */
42+
static void notification_fn(size_t evhdlr_registration_id,
43+
pmix_status_t status,
44+
const pmix_proc_t *source,
45+
pmix_info_t info[], size_t ninfo,
46+
pmix_info_t results[], size_t nresults,
47+
pmix_event_notification_cbfunc_fn_t cbfunc,
48+
void *cbdata)
49+
{
50+
if (NULL != cbfunc) {
51+
cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
52+
}
53+
}
54+
55+
/* event handler registration is done asynchronously because it
56+
* may involve the PMIx server registering with the host RM for
57+
* external events. So we provide a callback function that returns
58+
* the status of the request (success or an error), plus a numerical index
59+
* to the registered event. The index is used later on to deregister
60+
* an event handler - if we don't explicitly deregister it, then the
61+
* PMIx server will do so when it see us exit */
62+
static void evhandler_reg_callbk(pmix_status_t status,
63+
size_t evhandler_ref,
64+
void *cbdata)
65+
{
66+
volatile int *active = (volatile int*)cbdata;
67+
68+
if (PMIX_SUCCESS != status) {
69+
fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
70+
myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref);
71+
}
72+
*active = status;
73+
}
74+
75+
static void infocbfunc(pmix_status_t status,
76+
pmix_info_t *info, size_t ninfo,
77+
void *cbdata,
78+
pmix_release_cbfunc_t release_fn,
79+
void *release_cbdata)
80+
{
81+
volatile int *active = (volatile int*)cbdata;
82+
83+
/* release the caller */
84+
if (NULL != release_fn) {
85+
release_fn(release_cbdata);
86+
}
87+
88+
*active = status;
89+
}
90+
91+
int main(int argc, char **argv)
92+
{
93+
int rc;
94+
pmix_value_t value;
95+
pmix_value_t *val = &value;
96+
pmix_proc_t proc;
97+
uint32_t nprocs, n;
98+
pmix_info_t *info, *iptr;
99+
bool flag;
100+
volatile int active;
101+
pmix_data_array_t *dptr;
102+
103+
/* init us - note that the call to "init" includes the return of
104+
* any job-related info provided by the RM. */
105+
if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) {
106+
fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc);
107+
exit(0);
108+
}
109+
fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
110+
111+
112+
/* register our default event handler - again, this isn't strictly
113+
* required, but is generally good practice */
114+
active = -1;
115+
PMIx_Register_event_handler(NULL, 0, NULL, 0,
116+
notification_fn, evhandler_reg_callbk, (void*)&active);
117+
while (-1 == active) {
118+
sleep(1);
119+
}
120+
if (0 != active) {
121+
fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank);
122+
exit(active);
123+
}
124+
125+
/* job-related info is found in our nspace, assigned to the
126+
* wildcard rank as it doesn't relate to a specific rank. Setup
127+
* a name to retrieve such values */
128+
PMIX_PROC_CONSTRUCT(&proc);
129+
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
130+
proc.rank = PMIX_RANK_WILDCARD;
131+
132+
/* get our universe size */
133+
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) {
134+
fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc);
135+
goto done;
136+
}
137+
nprocs = val->data.uint32;
138+
PMIX_VALUE_RELEASE(val);
139+
fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs);
140+
141+
/* inform the RM that we are preemptible, and that our checkpoint methods are
142+
* "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */
143+
PMIX_INFO_CREATE(info, 2);
144+
flag = true;
145+
PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL);
146+
/* can't use "load" to load a pmix_data_array_t */
147+
(void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN);
148+
info[1].value.type = PMIX_DATA_ARRAY;
149+
dptr = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t));
150+
info[1].value.data.darray = dptr;
151+
dptr->type = PMIX_INFO;
152+
dptr->size = 2;
153+
PMIX_INFO_CREATE(dptr->array, dptr->size);
154+
rc = SIGUSR2;
155+
iptr = (pmix_info_t*)dptr->array;
156+
PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT);
157+
rc = PMIX_JCTRL_CHECKPOINT;
158+
PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS);
159+
160+
/* since this is informational and not a requested operation, the target parameter
161+
* doesn't mean anything and can be ignored */
162+
active = -1;
163+
if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&active))) {
164+
fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
165+
goto done;
166+
}
167+
while (-1 == active) {
168+
sleep(1);
169+
}
170+
PMIX_INFO_FREE(info, 2);
171+
if (0 != active) {
172+
fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
173+
exit(active);
174+
}
175+
176+
/* now request that this process be monitored using heartbeats */
177+
PMIX_INFO_CREATE(iptr, 1);
178+
PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER);
179+
180+
PMIX_INFO_CREATE(info, 3);
181+
PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING);
182+
n = 5; // require a heartbeat every 5 seconds
183+
PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32);
184+
n = 2; // two heartbeats can be missed before declaring us "stalled"
185+
PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32);
186+
187+
/* make the request */
188+
active = -1;
189+
if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT,
190+
info, 3, infocbfunc, (void*)&active))) {
191+
fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
192+
goto done;
193+
}
194+
while (-1 == active) {
195+
sleep(1);
196+
}
197+
PMIX_INFO_FREE(iptr, 1);
198+
PMIX_INFO_FREE(info, 3);
199+
if (0 != active) {
200+
fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
201+
exit(active);
202+
}
203+
204+
/* send a heartbeat */
205+
PMIx_Heartbeat();
206+
207+
/* call fence to synchronize with our peers - no need to
208+
* collect any info as we didn't "put" anything */
209+
PMIX_INFO_CREATE(info, 1);
210+
flag = false;
211+
PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL);
212+
if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) {
213+
fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc);
214+
goto done;
215+
}
216+
PMIX_INFO_FREE(info, 1);
217+
218+
219+
done:
220+
/* finalize us */
221+
fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
222+
if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
223+
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
224+
} else {
225+
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
226+
}
227+
fflush(stderr);
228+
return(0);
229+
}

opal/mca/pmix/pmix2x/pmix/include/pmix.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -523,8 +523,14 @@ pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t
523523
pmix_info_cbfunc_t cbfunc, void *cbdata);
524524

525525
/* define a special macro to simplify sending of a heartbeat */
526-
#define PMIx_Heartbeat() \
527-
PMIx_Process_monitor_nb(PMIX_SEND_HEARTBEAT, NULL, 0, NULL, NULL)
526+
#define PMIx_Heartbeat() \
527+
do { \
528+
pmix_info_t _in; \
529+
PMIX_INFO_CONSTRUCT(&_in); \
530+
PMIX_INFO_LOAD(&_in, PMIX_SEND_HEARTBEAT, NULL, PMIX_POINTER); \
531+
PMIx_Process_monitor_nb(&_in, PMIX_SUCCESS, NULL, 0, NULL, NULL); \
532+
PMIX_INFO_DESTRUCT(&_in); \
533+
} while(0)
528534

529535
#if defined(c_plusplus) || defined(__cplusplus)
530536
}

0 commit comments

Comments
 (0)