diff --git a/ompi/errhandler/errcode-internal.h b/ompi/errhandler/errcode-internal.h index 745098b5a0d..1a910ea53d5 100644 --- a/ompi/errhandler/errcode-internal.h +++ b/ompi/errhandler/errcode-internal.h @@ -14,6 +14,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,7 +70,7 @@ static inline int ompi_errcode_get_mpi_code(int errcode) it */ for (i = 0; i < ompi_errcode_intern_lastused; i++) { errc = (ompi_errcode_intern_t *)opal_pointer_array_get_item(&ompi_errcodes_intern, i); - if (errc->code == errcode) { + if (NULL != errc && errc->code == errcode) { ret = errc->mpi_code; break; } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 38b82da30c4..97ece1dae1e 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -17,7 +17,7 @@ * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. diff --git a/opal/mca/btl/usnic/btl_usnic_hwloc.c b/opal/mca/btl/usnic/btl_usnic_hwloc.c index a435a8a4043..0429b388e51 100644 --- a/opal/mca/btl/usnic/btl_usnic_hwloc.c +++ b/opal/mca/btl/usnic/btl_usnic_hwloc.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,8 +41,8 @@ static int get_distance_matrix(void) * responsible for freeing it. */ if (NULL == matrix) { - matrix = hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology, - HWLOC_OBJ_NODE); + matrix = (struct hwloc_distances_s*)hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology, + HWLOC_OBJ_NODE); } return (NULL == matrix) ? OPAL_ERROR : OPAL_SUCCESS; diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index fcc5f6d4ad0..aba3b8210dc 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2017 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -2047,8 +2047,9 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t * hwloc_obj_t root = NULL; int depth; unsigned i; -#endif +#else unsigned distances_nr = 0; +#endif for (device_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_OS_DEVICE, 0); device_obj; device_obj = hwloc_get_next_osdev(topo, device_obj)) { if (device_obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS @@ -2070,7 +2071,7 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t * /* find distance matrix for all numa nodes */ #if HWLOC_API_VERSION < 0x20000 - distances = hwloc_get_whole_distance_matrix_by_type(topo, HWLOC_OBJ_NODE); + distances = (struct hwloc_distances_s*)hwloc_get_whole_distance_matrix_by_type(topo, HWLOC_OBJ_NODE); if (NULL == distances) { /* we can try to find distances under group object. This info can be there. */ depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NODE); diff --git a/opal/mca/pmix/pmix2x/pmix/LICENSE b/opal/mca/pmix/pmix2x/pmix/LICENSE index f9e6f047910..06f1248793d 100644 --- a/opal/mca/pmix/pmix2x/pmix/LICENSE +++ b/opal/mca/pmix/pmix2x/pmix/LICENSE @@ -26,7 +26,7 @@ Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. Copyright (c) 2006-2010 The University of Houston. All rights reserved. Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. Copyright (c) 2007-2008 UT-Battelle, LLC. All rights reserved. -Copyright (c) 2007-2010 IBM Corporation. All rights reserved. +Copyright (c) 2007-2018 IBM Corporation. All rights reserved. Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing Centre, Federal Republic of Germany Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany @@ -36,7 +36,7 @@ Copyright (c) 2008-2009 Institut National de Recherche en Informatique. All rights reserved. Copyright (c) 2007 Lawrence Livermore National Security, LLC. All rights reserved. -Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. +Copyright (c) 2007-2018 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. @@ -45,7 +45,7 @@ Copyright (c) 2010 ARM ltd. All rights reserved. Copyright (c) 2010-2011 Alex Brick . All rights reserved. Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights reserved. -Copyright (c) 2013-2014 Intel, Inc. All rights reserved. +Copyright (c) 2013-2018 Intel, Inc. All rights reserved. Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index 13e5bd7257a..7904bdd8c80 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -21,7 +21,7 @@ example, a bug might be fixed in the master, and then moved to the current release as well as the "stable" bug fix release branch. -2.1.2 -- TBD +2.1.2 -- 6 July 2018 ---------------------- - Added PMIX_VERSION_RELEASE string to pmix_version.h - Added PMIX_SPAWNED and PMIX_PARENT_ID keys to all procs @@ -32,6 +32,7 @@ current release as well as the "stable" bug fix release branch. shared memory region upon first connection - Fix potential deadlock in PMIx_server_init in an error case - Fix uninitialized variable +- Fix several memory and file descriptor leaks 2.1.1 -- 5 Mar 2018 diff --git a/opal/mca/pmix/pmix2x/pmix/README b/opal/mca/pmix/pmix2x/pmix/README index 1baa1069877..22b0ec55bb8 100644 --- a/opal/mca/pmix/pmix2x/pmix/README +++ b/opal/mca/pmix/pmix2x/pmix/README @@ -15,7 +15,7 @@ Copyright (c) 2007 Myricom, Inc. All rights reserved. Copyright (c) 2008 IBM Corporation. All rights reserved. Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2011 University of Houston. All rights reserved. -Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +Copyright (c) 2013-2018 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -25,13 +25,14 @@ $HEADER$ =========================================================================== When submitting questions and problems, be sure to include as much -extra information as possible. This web page details all the -information that we request in order to provide assistance: +extra information as possible. The issues template on the +GitHub repo provides directions: - http://pmix.github.io/pmix/community/help/ + http://github.com/pmix/pmix/issues The best way to report bugs, send comments, or ask questions is to -sign up on the PMIx mailing list, which is hosted by GoogleGroups: +open an issue on the repo. Alternatively, you are welcome to sign +up for the developer/user mailing list: pmix@googlegroups.com @@ -48,7 +49,7 @@ Thanks for your time. More information is available in the PMIx FAQ: - http://pmix.github.io/pmix/faq/ + https://pmix.org/support We are in early days, so please be patient - info will grow as questions are addressed. @@ -63,7 +64,7 @@ General notes - The majority of PMIx's documentation is here in this file, the included man pages, and on the web site FAQ - (http://pmix.github.io/pmix/faq). This will eventually be + (https://pmix.org/support). This will eventually be supplemented with cohesive installation and user documentation files. - Systems that have been tested are: @@ -312,7 +313,7 @@ Common Questions Many common questions about building and using PMIx are answered on the FAQ: - http://pmix.github.io/pmix/faq/ + https://pmix.org/support =========================================================================== @@ -323,10 +324,10 @@ Found a bug? Got a question? Want to make a suggestion? Want to contribute to PMIx? Please let us know! When submitting questions and problems, be sure to include as much -extra information as possible. This web page details all the -information that we request in order to provide assistance: +extra information as possible. Again, the issues template is your +friend in this regard! - http://pmix.github.io/pmix/community/help/ + https://github.com/pmix/pmix/issues Questions and comments should generally be sent to the PMIx mailing list (pmix@googlegroups.com). Because of spam, only @@ -338,4 +339,4 @@ user's list: https://groups.google.com/d/forum/pmix -Make today an PMIx day! +Make today a PMIx day! diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 608758a68f5..61fb5c2aaf8 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -23,14 +23,14 @@ release=2 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek=rc1 +greek= # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitc9312a8 +repo_rev=git8b0bc1f # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jun 18, 2018" +date="Jul 06, 2018" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec b/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec index e356527c669..1b36ff5488c 100644 --- a/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec +++ b/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec @@ -192,7 +192,7 @@ Summary: An extended/exascale implementation of PMI Name: %{?_name:%{_name}}%{!?_name:pmix} -Version: 2.1.2rc1 +Version: 2.1.2 Release: 1%{?dist} License: BSD Group: Development/Libraries diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.c index e736ecca444..039a16765e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.c @@ -2879,6 +2879,8 @@ static pmix_status_t dstore_del_nspace(const char* nspace) ns_map_t *ns_map; session_t *session_tbl = NULL; ns_track_elem_t *trk = NULL; + int dstor_track_idx; + size_t session_tbl_idx; PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s delete nspace `%s`", __FILE__, __LINE__, __func__, nspace)); @@ -2887,7 +2889,8 @@ static pmix_status_t dstore_del_nspace(const char* nspace) rc = PMIX_ERR_NOT_AVAILABLE; return rc; } - + dstor_track_idx = ns_map_data->track_idx; + session_tbl_idx = ns_map_data->tbl_idx; size = pmix_value_array_get_size(_ns_map_array); ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); @@ -2899,19 +2902,6 @@ static pmix_status_t dstore_del_nspace(const char* nspace) continue; } in_use++; - break; - } - } - - if(ns_map_data->track_idx >= 0) { - trk = pmix_value_array_get_item(_ns_track_array, ns_map_data->track_idx); - if((ns_map_data->track_idx + 1) > (int)pmix_value_array_get_size(_ns_track_array)) { - rc = PMIX_ERR_VALUE_OUT_OF_BOUNDS; - PMIX_ERROR_LOG(rc); - goto exit; - } - if (true == trk->in_use) { - PMIX_DESTRUCT(trk); } } @@ -2919,10 +2909,22 @@ static pmix_status_t dstore_del_nspace(const char* nspace) * session record can only be deleted once all references are gone */ if (!in_use) { session_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); - PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, - "%s:%d:%s delete session for jobuid: %d", __FILE__, __LINE__, __func__, session_tbl[ns_map_data->tbl_idx].jobuid)); - _esh_session_release(&session_tbl[ns_map_data->tbl_idx]); + "%s:%d:%s delete session for jobuid: %d", + __FILE__, __LINE__, __func__, session_tbl[session_tbl_idx].jobuid)); + size = pmix_value_array_get_size(_ns_track_array); + if (size && (dstor_track_idx >= 0)) { + if((dstor_track_idx + 1) > size) { + rc = PMIX_ERR_VALUE_OUT_OF_BOUNDS; + PMIX_ERROR_LOG(rc); + goto exit; + } + trk = pmix_value_array_get_item(_ns_track_array, dstor_track_idx); + if (true == trk->in_use) { + PMIX_DESTRUCT(trk); + } + } + _esh_session_release(&session_tbl[session_tbl_idx]); } exit: return rc; diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am b/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am index 966b71f027b..11b535d125c 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -21,64 +21,79 @@ AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix -noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex test_pmix simptool simpdie simplegacy +headers = simptest.h -simptest_SOURCES = \ +noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex \ + test_pmix simptool simpdie simplegacy stability quietclient + +simptest_SOURCES = $(headers) \ simptest.c simptest_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simptest_LDADD = \ $(top_builddir)/src/libpmix.la -simpclient_SOURCES = \ +simpclient_SOURCES = $(headers) \ simpclient.c simpclient_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpclient_LDADD = \ $(top_builddir)/src/libpmix.la -simppub_SOURCES = \ +simppub_SOURCES = $(headers) \ simppub.c simppub_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simppub_LDADD = \ $(top_builddir)/src/libpmix.la -simpdmodex_SOURCES = \ +simpdmodex_SOURCES = $(headers) \ simpdmodex.c simpdmodex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpdmodex_LDADD = \ $(top_builddir)/src/libpmix.la -simpft_SOURCES = \ +simpft_SOURCES = $(headers) \ simpft.c simpft_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpft_LDADD = \ $(top_builddir)/src/libpmix.la -simpdyn_SOURCES = \ +simpdyn_SOURCES = $(headers) \ simpdyn.c simpdyn_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpdyn_LDADD = \ $(top_builddir)/src/libpmix.la -test_pmix_SOURCES = \ +test_pmix_SOURCES = $(headers) \ test_pmix.c test_pmix_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) test_pmix_LDADD = \ $(top_builddir)/src/libpmix.la -simptool_SOURCES = \ +simptool_SOURCES = $(headers) \ simptool.c simptool_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simptool_LDADD = \ $(top_builddir)/src/libpmix.la -simpdie_SOURCES = \ +simpdie_SOURCES = $(headers) \ simpdie.c simpdie_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simpdie_LDADD = \ $(top_builddir)/src/libpmix.la -simplegacy_SOURCES = \ +simplegacy_SOURCES = $(headers) \ simplegacy.c simplegacy_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simplegacy_LDADD = \ $(top_builddir)/src/libpmix.la + +stability_SOURCES = $(headers) \ + stability.c +stability_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) +stability_LDADD = \ + $(top_builddir)/src/libpmix.la + +quietclient_SOURCES = $(headers) \ + quietclient.c +quietclient_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) +quietclient_LDADD = \ + $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/quietclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/quietclient.c new file mode 100644 index 00000000000..674490b855f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/quietclient.c @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +/****** FUNCTIONS TESTED ****/ +/* + * PMIx_Init + * PMIx_Get + * PMIx_Register_event_handler + * PMIx_Store_internal + * PMIx_Put + * PMIx_Commit + * PMIx_Fence + * PMIx_Finalize + */ + +#include +#include + +#include +#include +#include +#include + +#include "src/class/pmix_object.h" +#include "src/util/output.h" +#include "src/util/printf.h" + +#define MAXCNT 1 + +static volatile bool completed = false; +static pmix_proc_t myproc; + +static void notification_fn(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + completed = true; +} + +static void errhandler_reg_callbk(pmix_status_t status, + size_t errhandler_ref, + void *cbdata) +{ + volatile bool *active = (volatile bool*)cbdata; + + *active = false; +} + +/* this is an event notification function that we explicitly request + * be called when the PMIX_MODEL_DECLARED notification is issued. + * We could catch it in the general event notification function and test + * the status to see if the status matched, but it often is simpler + * to declare a use-specific notification callback point. In this case, + * we are asking to know whenever a model is declared as a means + * of testing server self-notification */ +static void model_callback(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } +} + +/* event handler registration is done asynchronously */ +static void model_registration_callback(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + *active = false; +} + +int main(int argc, char **argv) +{ + int rc; + pmix_value_t value; + pmix_value_t *val = &value; + char *tmp; + pmix_proc_t proc; + uint32_t nprocs, n; + int cnt, j; + volatile bool active; + pmix_info_t *iptr; + size_t ninfo; + pmix_status_t code; + + /* init us and declare we are a test programming model */ + PMIX_INFO_CREATE(iptr, 2); + PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); + PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + exit(rc); + } + PMIX_INFO_FREE(iptr, 2); + + /* test something */ + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + exit(rc); + } + PMIX_VALUE_RELEASE(val); + + /* register a handler specifically for when models declare */ + active = true; + ninfo = 1; + PMIX_INFO_CREATE(iptr, ninfo); + PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); + code = PMIX_MODEL_DECLARED; + PMIx_Register_event_handler(&code, 1, iptr, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (active) { + usleep(10); + } + PMIX_INFO_FREE(iptr, ninfo); + + /* register our errhandler */ + active = true; + PMIx_Register_event_handler(NULL, 0, NULL, 0, + notification_fn, errhandler_reg_callbk, (void*)&active); + while (active) { + usleep(10); + } + + /* get our universe size */ + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + nprocs = val->data.uint32; + PMIX_VALUE_RELEASE(val); + + /* put a few values */ + (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); + value.type = PMIX_UINT32; + value.data.uint32 = 1234; + if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Store_internal failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + + for (cnt=0; cnt < MAXCNT; cnt++) { + (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); + value.type = PMIX_UINT64; + value.data.uint64 = 1234; + if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + + (void)asprintf(&tmp, "%s-%d-remote-%d", myproc.nspace, myproc.rank, cnt); + value.type = PMIX_STRING; + value.data.string = "1234"; + if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Put internal failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + + if (PMIX_SUCCESS != (rc = PMIx_Commit())) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Commit failed: %s", + myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); + goto done; + } + + /* call fence to ensure the data is received */ + PMIX_PROC_CONSTRUCT(&proc); + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Fence failed: %s", + myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); + goto done; + } + + /* check the returned data */ + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + for (j=0; j <= cnt; j++) { + for (n=0; n < nprocs; n++) { + proc.rank = n; + (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", + myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); + continue; + } + if (NULL == val) { + pmix_output(0, "Client ns %s rank %d: NULL value returned", + myproc.nspace, myproc.rank); + break; + } + if (PMIX_UINT64 != val->type) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + if (1234 != val->data.uint64) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + PMIX_VALUE_RELEASE(val); + free(tmp); + + if (n != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + continue; + } + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + PMIX_VALUE_RELEASE(val); + free(tmp); + } + } + } + } + + /* now get the data blob for myself */ + if (PMIX_SUCCESS == (rc = PMIx_Get(&myproc, NULL, NULL, 0, &val))) { + if (PMIX_DATA_ARRAY != val->type) { + pmix_output(0, "Client ns %s rank %d did not return an array for its internal modex blob", + myproc.nspace, myproc.rank); + PMIX_VALUE_RELEASE(val); + } else if (PMIX_INFO != val->data.darray->type) { + pmix_output(0, "Client ns %s rank %d returned an internal modex array of type %s instead of PMIX_INFO", + myproc.nspace, myproc.rank, PMIx_Data_type_string(val->data.darray->type)); + PMIX_VALUE_RELEASE(val); + } else if (0 == val->data.darray->size) { + pmix_output(0, "Client ns %s rank %d returned an internal modex array of zero length", + myproc.nspace, myproc.rank); + PMIX_VALUE_RELEASE(val); + } else { + PMIX_VALUE_RELEASE(val); + } + } else { + pmix_output(0, "Client ns %s rank %d internal modex blob FAILED with error %s(%d)", + myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc); + } + + done: + /* finalize us */ + if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { + fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + } + fflush(stderr); + return(rc); +} diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.h b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.h new file mode 100644 index 00000000000..89f1c00dd3d --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include +#include +#include +#include +#include + +typedef struct { + pthread_mutex_t mutex; + pthread_cond_t cond; + volatile bool active; + pmix_status_t status; +} mylock_t; + +#define DEBUG_CONSTRUCT_LOCK(l) \ + do { \ + pthread_mutex_init(&(l)->mutex, NULL); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + (l)->status = PMIX_SUCCESS; \ + } while(0) + +#define DEBUG_DESTRUCT_LOCK(l) \ + do { \ + pthread_mutex_destroy(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + +#define DEBUG_WAIT_THREAD(lck) \ + do { \ + pthread_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + pthread_cond_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + pthread_mutex_unlock(&(lck)->mutex); \ + } while(0) + +#define DEBUG_WAKEUP_THREAD(lck) \ + do { \ + pthread_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + pthread_cond_broadcast(&(lck)->cond); \ + pthread_mutex_unlock(&(lck)->mutex); \ + } while(0) diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/stability.c b/opal/mca/pmix/pmix2x/pmix/test/simple/stability.c new file mode 100644 index 00000000000..914dc49b37e --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/stability.c @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include PMIX_EVENT_HEADER + +#include "src/class/pmix_list.h" +#include "src/util/pmix_environ.h" +#include "src/util/output.h" +#include "src/util/printf.h" +#include "src/util/argv.h" + +#include "simptest.h" + +static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t abort_fn(const pmix_proc_t *proc, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t dmodex_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t notify_event(pmix_status_t code, + const pmix_proc_t *source, + pmix_data_range_t range, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t query_fn(pmix_proc_t *proct, + pmix_query_t *queries, size_t nqueries, + pmix_info_cbfunc_t cbfunc, + void *cbdata); +static void tool_connect_fn(pmix_info_t *info, size_t ninfo, + pmix_tool_connection_cbfunc_t cbfunc, + void *cbdata); +static void log_fn(const pmix_proc_t *client, + const pmix_info_t data[], size_t ndata, + const pmix_info_t directives[], size_t ndirs, + pmix_op_cbfunc_t cbfunc, void *cbdata); + +static pmix_server_module_t mymodule = { + .client_connected = connected, + .client_finalized = finalized, + .abort = abort_fn, + .fence_nb = fencenb_fn, + .direct_modex = dmodex_fn, + .publish = publish_fn, + .lookup = lookup_fn, + .unpublish = unpublish_fn, + .spawn = spawn_fn, + .connect = connect_fn, + .disconnect = disconnect_fn, + .register_events = register_event_fn, + .deregister_events = deregister_events, + .notify_event = notify_event, + .query = query_fn, + .tool_connected = tool_connect_fn, + .log = log_fn +}; + +typedef struct { + pmix_list_item_t super; + pmix_pdata_t pdata; +} pmix_locdat_t; +PMIX_CLASS_INSTANCE(pmix_locdat_t, + pmix_list_item_t, + NULL, NULL); + +typedef struct { + pmix_object_t super; + mylock_t lock; + pmix_event_t ev; + pmix_proc_t caller; + pmix_info_t *info; + size_t ninfo; + pmix_op_cbfunc_t cbfunc; + pmix_spawn_cbfunc_t spcbfunc; + pmix_release_cbfunc_t relcbfunc; + void *cbdata; +} myxfer_t; +static void xfcon(myxfer_t *p) +{ + DEBUG_CONSTRUCT_LOCK(&p->lock); + p->info = NULL; + p->ninfo = 0; + p->cbfunc = NULL; + p->spcbfunc = NULL; + p->cbdata = NULL; +} +static void xfdes(myxfer_t *p) +{ + DEBUG_DESTRUCT_LOCK(&p->lock); + if (NULL != p->info) { + PMIX_INFO_FREE(p->info, p->ninfo); + } +} +PMIX_CLASS_INSTANCE(myxfer_t, + pmix_object_t, + xfcon, xfdes); + +typedef struct { + pmix_list_item_t super; + int exit_code; + pid_t pid; +} wait_tracker_t; +PMIX_CLASS_INSTANCE(wait_tracker_t, + pmix_list_item_t, + NULL, NULL); + +static volatile int wakeup; +static int exit_code = 0; +static pmix_list_t pubdata; +static pmix_event_t handler; +static pmix_list_t children; +static bool istimeouttest = false; + +static void set_namespace(int nprocs, char *ranks, char *nspace, + pmix_op_cbfunc_t cbfunc, myxfer_t *x); +static void errhandler(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata); +static void wait_signal_callback(int fd, short event, void *arg); +static void errhandler_reg_callbk (pmix_status_t status, + size_t errhandler_ref, + void *cbdata); + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + myxfer_t *x = (myxfer_t*)cbdata; + + /* release the caller, if necessary */ + if (NULL != x->cbfunc) { + x->cbfunc(PMIX_SUCCESS, x->cbdata); + } + DEBUG_WAKEUP_THREAD(&x->lock); +} + +int main(int argc, char **argv) +{ + char **client_env=NULL; + char **client_argv=NULL; + char *tmp, **atmp, *executable=NULL, *nspace; + int rc, nprocs=1, n, k; + uid_t myuid; + gid_t mygid; + pid_t pid; + myxfer_t *x; + pmix_proc_t proc; + wait_tracker_t *child; + pmix_info_t *info; + size_t ninfo; + mylock_t mylock; + int ncycles=1, m, delay=0; + + /* smoke test */ + if (PMIX_SUCCESS != 0) { + fprintf(stderr, "ERROR IN COMPUTING CONSTANTS: PMIX_SUCCESS = %d\n", PMIX_SUCCESS); + exit(1); + } + + fprintf(stderr, "Testing version %s\n", PMIx_Get_version()); + + /* see if we were passed the number of procs to run or + * the executable to use */ + for (n=1; n < argc; n++) { + if (0 == strcmp("-n", argv[n]) && + NULL != argv[n+1]) { + nprocs = strtol(argv[n+1], NULL, 10); + ++n; // step over the argument + } else if (0 == strcmp("-e", argv[n]) && + NULL != argv[n+1]) { + executable = strdup(argv[n+1]); + /* check for timeout test */ + if (NULL != strstr(executable, "quietclient")) { + istimeouttest = true; + } + for (k=n+2; NULL != argv[k]; k++) { + pmix_argv_append_nosize(&client_argv, argv[k]); + } + n += k; + } else if ((0 == strcmp("-reps", argv[n]) || + 0 == strcmp("--reps", argv[n])) && + NULL != argv[n+1]) { + ncycles = strtol(argv[n+1], NULL, 10); + } else if ((0 == strcmp("-sleep", argv[n]) || + 0 == strcmp("--sleep", argv[n])) && + NULL != argv[n+1]) { + delay = strtol(argv[n+1], NULL, 10); + } else if (0 == strcmp("-h", argv[n])) { + /* print the options and exit */ + fprintf(stderr, "usage: simptest \n"); + fprintf(stderr, " -n N Number of clients to run\n"); + fprintf(stderr, " -e foo Name of the client executable to run (default: simpclient\n"); + fprintf(stderr, " -reps N Cycle for N repetitions"); + exit(0); + } + } + if (NULL == executable) { + executable = strdup("./quietclient"); + } + /* setup the server library and tell it to support tool connections */ + ninfo = 2; + + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_SERVER_TOOL_SUPPORT, NULL, PMIX_BOOL); + PMIX_INFO_LOAD(&info[1], PMIX_USOCK_DISABLE, NULL, PMIX_BOOL); + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, ninfo))) { + fprintf(stderr, "Init failed with error %d\n", rc); + return rc; + } + PMIX_INFO_FREE(info, ninfo); + + /* register the default errhandler */ + DEBUG_CONSTRUCT_LOCK(&mylock); + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "SIMPTEST-DEFAULT", PMIX_STRING); + PMIx_Register_event_handler(NULL, 0, info, ninfo, + errhandler, errhandler_reg_callbk, (void*)&mylock); + DEBUG_WAIT_THREAD(&mylock); + PMIX_INFO_FREE(info, ninfo); + if (PMIX_SUCCESS != mylock.status) { + exit(mylock.status); + } + DEBUG_DESTRUCT_LOCK(&mylock); + + /* setup the pub data, in case it is used */ + PMIX_CONSTRUCT(&pubdata, pmix_list_t); + + /* setup to see sigchld on the forked tests */ + PMIX_CONSTRUCT(&children, pmix_list_t); + event_assign(&handler, pmix_globals.evbase, SIGCHLD, + EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); + event_add(&handler, NULL); + + for (m=0; m < ncycles; m++) { + fprintf(stderr, "Running cycle %d\n", m); + /* we have a single namespace for all clients */ + atmp = NULL; + for (n=0; n < nprocs; n++) { + asprintf(&tmp, "%d", n); + pmix_argv_append_nosize(&atmp, tmp); + free(tmp); + } + tmp = pmix_argv_join(atmp, ','); + pmix_argv_free(atmp); + asprintf(&nspace, "foobar%d", m); + (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + x = PMIX_NEW(myxfer_t); + set_namespace(nprocs, tmp, nspace, opcbfunc, x); + + + /* set common argv and env */ + client_env = pmix_argv_copy(environ); + pmix_argv_prepend_nosize(&client_argv, executable); + + wakeup = nprocs; + myuid = getuid(); + mygid = getgid(); + + /* if the nspace registration hasn't completed yet, + * wait for it here */ + DEBUG_WAIT_THREAD(&x->lock); + free(tmp); + free(nspace); + PMIX_RELEASE(x); + + /* fork/exec the test */ + for (n = 0; n < nprocs; n++) { + proc.rank = n; + if (PMIX_SUCCESS != (rc = PMIx_server_setup_fork(&proc, &client_env))) { + fprintf(stderr, "Server fork setup failed with error %d\n", rc); + PMIx_server_finalize(); + return rc; + } + x = PMIX_NEW(myxfer_t); + if (PMIX_SUCCESS != (rc = PMIx_server_register_client(&proc, myuid, mygid, + NULL, opcbfunc, x))) { + fprintf(stderr, "Server register client failed with error %d\n", rc); + PMIx_server_finalize(); + return rc; + } + /* don't fork/exec the client until we know it is registered + * so we avoid a potential race condition in the server */ + DEBUG_WAIT_THREAD(&x->lock); + PMIX_RELEASE(x); + pid = fork(); + if (pid < 0) { + fprintf(stderr, "Fork failed\n"); + PMIx_server_finalize(); + return -1; + } + child = PMIX_NEW(wait_tracker_t); + child->pid = pid; + pmix_list_append(&children, &child->super); + + if (pid == 0) { + execve(executable, client_argv, client_env); + /* Does not return */ + exit(0); + } + } + pmix_argv_free(client_argv); + client_argv = NULL; + pmix_argv_free(client_env); + client_env = NULL; + + /* hang around until the client(s) finalize */ + while (0 < wakeup) { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 100000; + nanosleep(&ts, NULL); + } + + /* see if anyone exited with non-zero status */ + n=0; + PMIX_LIST_FOREACH(child, &children, wait_tracker_t) { + if (0 != child->exit_code) { + fprintf(stderr, "Child %d exited with status %d - test FAILED\n", n, child->exit_code); + goto done; + } + ++n; + } + + /* deregister the clients */ + for (n = 0; n < nprocs; n++) { + proc.rank = n; + x = PMIX_NEW(myxfer_t); + PMIx_server_deregister_client(&proc, opcbfunc, x); + DEBUG_WAIT_THREAD(&x->lock); + PMIX_RELEASE(x); + } + /* deregister the nspace */ + x = PMIX_NEW(myxfer_t); + PMIx_server_deregister_nspace(proc.nspace, opcbfunc, x); + DEBUG_WAIT_THREAD(&x->lock); + PMIX_RELEASE(x); + + PMIX_LIST_DESTRUCT(&children); + PMIX_CONSTRUCT(&children, pmix_list_t); + + sleep(delay); + } + + done: + /* deregister the event handlers */ + PMIx_Deregister_event_handler(0, NULL, NULL); + + /* release any pub data */ + PMIX_LIST_DESTRUCT(&pubdata); + + free(executable); + + /* finalize the server library */ + if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) { + fprintf(stderr, "Finalize failed with error %d\n", rc); + exit_code = rc; + } + + if (0 == exit_code) { + fprintf(stderr, "Test finished OK!\n"); + } else { + fprintf(stderr, "TEST FAILED WITH ERROR %d\n", exit_code); + } + + return exit_code; +} + +static void set_namespace(int nprocs, char *ranks, char *nspace, + pmix_op_cbfunc_t cbfunc, myxfer_t *x) +{ + char *regex, *ppn; + char hostname[PMIX_MAXHOSTNAMELEN]; + + gethostname(hostname, sizeof(hostname)); + x->ninfo = 7; + + PMIX_INFO_CREATE(x->info, x->ninfo); + (void)strncpy(x->info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[0].value.type = PMIX_UINT32; + x->info[0].value.data.uint32 = nprocs; + + (void)strncpy(x->info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); + x->info[1].value.type = PMIX_UINT32; + x->info[1].value.data.uint32 = 0; + + (void)strncpy(x->info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); + x->info[2].value.type = PMIX_UINT32; + x->info[2].value.data.uint32 = nprocs; + + (void)strncpy(x->info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); + x->info[3].value.type = PMIX_STRING; + x->info[3].value.data.string = strdup(ranks); + + PMIx_generate_regex(hostname, ®ex); + (void)strncpy(x->info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + x->info[4].value.type = PMIX_STRING; + x->info[4].value.data.string = regex; + + PMIx_generate_ppn(ranks, &ppn); + (void)strncpy(x->info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + x->info[5].value.type = PMIX_STRING; + x->info[5].value.data.string = ppn; + + (void)strncpy(x->info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); + x->info[6].value.type = PMIX_UINT32; + x->info[6].value.data.uint32 = nprocs; + + PMIx_server_register_nspace(nspace, nprocs, x->info, x->ninfo, + cbfunc, x); +} + +static void errhandler(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + return; +} + +static void errhandler_reg_callbk (pmix_status_t status, + size_t errhandler_ref, + void *cbdata) +{ + mylock_t *lock = (mylock_t*)cbdata; + + lock->status = status; + DEBUG_WAKEUP_THREAD(lock); +} + +static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + return PMIX_SUCCESS; +} +static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + /* ensure we call the cbfunc so the proc can exit! */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + return PMIX_SUCCESS; +} + +static void abcbfunc(pmix_status_t status, void *cbdata) +{ + myxfer_t *x = (myxfer_t*)cbdata; + + /* be sure to release the caller */ + if (NULL != x->cbfunc) { + x->cbfunc(status, x->cbdata); + } + PMIX_RELEASE(x); +} + +static pmix_status_t abort_fn(const pmix_proc_t *proc, + void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t rc; + myxfer_t *x; + + /* instead of aborting the specified procs, notify them + * (if they have registered their errhandler) */ + + /* use the myxfer_t object to ensure we release + * the caller when notification has been queued */ + x = PMIX_NEW(myxfer_t); + (void)strncpy(x->caller.nspace, proc->nspace, PMIX_MAX_NSLEN); + x->caller.rank = proc->rank; + + PMIX_INFO_CREATE(x->info, 2); + (void)strncpy(x->info[0].key, "DARTH", PMIX_MAX_KEYLEN); + x->info[0].value.type = PMIX_INT8; + x->info[0].value.data.int8 = 12; + (void)strncpy(x->info[1].key, "VADER", PMIX_MAX_KEYLEN); + x->info[1].value.type = PMIX_DOUBLE; + x->info[1].value.data.dval = 12.34; + x->cbfunc = cbfunc; + x->cbdata = cbdata; + + if (PMIX_SUCCESS != (rc = PMIx_Notify_event(status, &x->caller, + PMIX_RANGE_NAMESPACE, + x->info, 2, + abcbfunc, x))) { + pmix_output(0, "SERVER: FAILED NOTIFY ERROR %d", (int)rc); + } + + return PMIX_SUCCESS; +} + + +static pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + /* pass the provided data back to each participating proc */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, data, ndata, cbdata, free, data); + } + return PMIX_SUCCESS; +} + + +static pmix_status_t dmodex_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + /* if this is a timeout test, then do nothing */ + if (istimeouttest) { + return PMIX_SUCCESS; + } + + /* we don't have any data for remote procs as this + * test only runs one server - so report accordingly */ + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); + } + return PMIX_SUCCESS; +} + + +static pmix_status_t publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_locdat_t *p; + size_t n; + + for (n=0; n < ninfo; n++) { + p = PMIX_NEW(pmix_locdat_t); + (void)strncpy(p->pdata.proc.nspace, proc->nspace, PMIX_MAX_NSLEN); + p->pdata.proc.rank = proc->rank; + (void)strncpy(p->pdata.key, info[n].key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&p->pdata.value, (pmix_value_t*)&info[n].value); + pmix_list_append(&pubdata, &p->super); + } + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + return PMIX_SUCCESS; +} + + +static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + pmix_locdat_t *p, *p2; + pmix_list_t results; + size_t i, n; + pmix_pdata_t *pd = NULL; + pmix_status_t ret = PMIX_ERR_NOT_FOUND; + + PMIX_CONSTRUCT(&results, pmix_list_t); + + for (n=0; NULL != keys[n]; n++) { + PMIX_LIST_FOREACH(p, &pubdata, pmix_locdat_t) { + if (0 == strncmp(keys[n], p->pdata.key, PMIX_MAX_KEYLEN)) { + p2 = PMIX_NEW(pmix_locdat_t); + (void)strncpy(p2->pdata.proc.nspace, p->pdata.proc.nspace, PMIX_MAX_NSLEN); + p2->pdata.proc.rank = p->pdata.proc.rank; + (void)strncpy(p2->pdata.key, p->pdata.key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&p2->pdata.value, &p->pdata.value); + pmix_list_append(&results, &p2->super); + break; + } + } + } + if (0 < (n = pmix_list_get_size(&results))) { + ret = PMIX_SUCCESS; + PMIX_PDATA_CREATE(pd, n); + for (i=0; i < n; i++) { + p = (pmix_locdat_t*)pmix_list_remove_first(&results); + if (p) { + (void)strncpy(pd[i].proc.nspace, p->pdata.proc.nspace, PMIX_MAX_NSLEN); + pd[i].proc.rank = p->pdata.proc.rank; + (void)strncpy(pd[i].key, p->pdata.key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&pd[i].value, &p->pdata.value); + } + } + } + PMIX_LIST_DESTRUCT(&results); + if (NULL != cbfunc) { + cbfunc(ret, pd, n, cbdata); + } + if (0 < n) { + PMIX_PDATA_FREE(pd, n); + } + return PMIX_SUCCESS; +} + + +static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_locdat_t *p, *p2; + size_t n; + + for (n=0; NULL != keys[n]; n++) { + PMIX_LIST_FOREACH_SAFE(p, p2, &pubdata, pmix_locdat_t) { + if (0 == strncmp(keys[n], p->pdata.key, PMIX_MAX_KEYLEN)) { + pmix_list_remove_item(&pubdata, &p->super); + PMIX_RELEASE(p); + break; + } + } + } + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + return PMIX_SUCCESS; +} + +static void spcbfunc(pmix_status_t status, void *cbdata) +{ + myxfer_t *x = (myxfer_t*)cbdata; + + if (NULL != x->spcbfunc) { + x->spcbfunc(PMIX_SUCCESS, "DYNSPACE", x->cbdata); + } +} + +static pmix_status_t spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata) +{ + myxfer_t *x; + + /* in practice, we would pass this request to the local + * resource manager for launch, and then have that server + * execute our callback function. For now, we will fake + * the spawn and just pretend */ + + /* must register the nspace for the new procs before + * we return to the caller */ + x = PMIX_NEW(myxfer_t); + x->spcbfunc = cbfunc; + x->cbdata = cbdata; + + set_namespace(2, "0,1", "DYNSPACE", spcbfunc, x); + + return PMIX_SUCCESS; +} + +static int numconnects = 0; + +static pmix_status_t connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + + /* in practice, we would pass this request to the local + * resource manager for handling */ + + numconnects++; + + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + + return PMIX_SUCCESS; +} + + +static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + + /* in practice, we would pass this request to the local + * resource manager for handling */ + + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + + return PMIX_SUCCESS; +} + +static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + return PMIX_SUCCESS; +} + +static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_SUCCESS; +} + +static pmix_status_t notify_event(pmix_status_t code, + const pmix_proc_t *source, + pmix_data_range_t range, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_SUCCESS; +} + +typedef struct query_data_t { + pmix_info_t *data; + size_t ndata; +} query_data_t; + +static pmix_status_t query_fn(pmix_proc_t *proct, + pmix_query_t *queries, size_t nqueries, + pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + size_t n; + pmix_info_t *info; + + if (NULL == cbfunc) { + return PMIX_ERROR; + } + /* keep this simple */ + PMIX_INFO_CREATE(info, nqueries); + for (n=0; n < nqueries; n++) { + (void)strncpy(info[n].key, queries[n].keys[0], PMIX_MAX_KEYLEN); + info[n].value.type = PMIX_STRING; + if (0 > asprintf(&info[n].value.data.string, "%d", (int)n)) { + return PMIX_ERROR; + } + } + cbfunc(PMIX_SUCCESS, info, nqueries, cbdata, NULL, NULL); + return PMIX_SUCCESS; +} + +static void tool_connect_fn(pmix_info_t *info, size_t ninfo, + pmix_tool_connection_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_proc_t proc; + + /* just pass back an arbitrary nspace */ + (void)strncpy(proc.nspace, "TOOL", PMIX_MAX_NSLEN); + proc.rank = 0; + + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, &proc, cbdata); + } +} + +static void log_fn(const pmix_proc_t *client, + const pmix_info_t data[], size_t ndata, + const pmix_info_t directives[], size_t ndirs, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } +} + +static void wait_signal_callback(int fd, short event, void *arg) +{ + pmix_event_t *sig = (pmix_event_t*) arg; + int status; + pid_t pid; + wait_tracker_t *t2; + + if (SIGCHLD != event_get_signal(sig)) { + return; + } + + /* we can have multiple children leave but only get one + * sigchild callback, so reap all the waitpids until we + * don't get anything valid back */ + while (1) { + pid = waitpid(-1, &status, WNOHANG); + if (-1 == pid && EINTR == errno) { + /* try it again */ + continue; + } + /* if we got garbage, then nothing we can do */ + if (pid <= 0) { + return; + } + + /* we are already in an event, so it is safe to access the list */ + PMIX_LIST_FOREACH(t2, &children, wait_tracker_t) { + if (pid == t2->pid) { + t2->exit_code = status; + /* found it! */ + if (0 != status && 0 == exit_code) { + exit_code = status; + } + --wakeup; + break; + } + } + } +} diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index 6505ac7272b..38dacaa9c2e 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. diff --git a/orte/mca/iof/base/iof_base_frame.c b/orte/mca/iof/base/iof_base_frame.c index bae0b99c279..3e6584fd788 100644 --- a/orte/mca/iof/base/iof_base_frame.c +++ b/orte/mca/iof/base/iof_base_frame.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -109,7 +109,7 @@ static int orte_iof_base_close(void) */ static int orte_iof_base_open(mca_base_open_flag_t flags) { - int rc, xmlfd; + int xmlfd; /* daemons do not need to do this as they do not write out stdout/err */ if (!ORTE_PROC_IS_DAEMON) { diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 8d178a46cf9..da844a5352c 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -121,6 +121,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, opal_list_t *modex; int n; + /* get the job data pointer */ if (NULL == (jdata = orte_get_job_data_object(job))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 39850edf171..633eb5de7ed 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index 4e0db3db890..f36286938f5 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -12,7 +12,7 @@ * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -327,6 +327,7 @@ int orte_proc_info_finalize(void) orte_process_info.proc_type = ORTE_PROC_TYPE_NONE; opal_argv_free(orte_process_info.aliases); + orte_process_info.aliases = NULL; init = false; return ORTE_SUCCESS;