diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index 92407af895d..37b73104486 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -227,12 +227,6 @@ AC_DEFUN([OPAL_CHECK_PMI],[ AC_DEFUN([OPAL_CHECK_PMIX],[ - opal_pmix_ext_CPPFLAGS= - opal_pmix_ext_LDFLAGS= - opal_pmix_ext_LIBS= - - OPAL_VAR_SCOPE_PUSH([pmix_ext_install_dir opal_pmix_CPPFLAGS_save opal_pmix_LDFLAGS_save opal_pmix_LIBS_save opal_pmix_LD_LIBRARY_PATH_save]) - AC_ARG_WITH([pmix], [AC_HELP_STRING([--with-pmix(=DIR)], [Build PMIx support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of PMIx. "external" forces Open MPI to use an external installation of PMIx. Supplying a valid directory name also forces Open MPI to use an external installation of PMIx, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI does not support --without-pmix.])]) @@ -246,7 +240,7 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AC_MSG_CHECKING([if user requested external PMIx support($with_pmix)]) AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"], [AC_MSG_RESULT([no]) - opal_external_pmix_happy="no"], + opal_external_pmix_happy=no], [AC_MSG_RESULT([yes]) # check for external pmix lib */ AS_IF([test "$with_pmix" = "external"], @@ -255,80 +249,6 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ # Make sure we have the headers and libs in the correct location OPAL_CHECK_WITHDIR([external-pmix], [$pmix_ext_install_dir/include], [pmix.h]) OPAL_CHECK_WITHDIR([external-libpmix], [$pmix_ext_install_dir/lib], [libpmix.*]) - AC_MSG_CHECKING([if external component can be used]) - OPAL_CHECK_PACKAGE([opal_pmix_ext], - [pmix.h], - [pmix], - [PMIx_Init], - [], - [$pmix_ext_install_dir], - [$pmix_ext_install_dir/lib], - [AC_MSG_RESULT([PMIx external support will be built]) - opal_external_pmix_happy=yes], - [opal_external_pmix_happy="no" - AC_MSG_RESULT([no]) - AC_MSG_WARN([External PMIx support was requested but failed]) - AC_MSG_WARN([as explained above.]) - AC_MSG_ERROR([Cannot continue])]) - # Check the version - opal_external_pmix_version="unknown" - opal_pmix_CPPFLAGS_save=$CPPFLAGS - opal_pmix_LDFLAGS_save=$LDFLAGS - opal_pmix_LIBS_save=$LIBS - LD_LIBRARY_PATH_orig=$opal_pmix_LD_LIBRARY_PATH_save - - CPPFLAGS=$opal_pmix_ext_CPPFLAGS - LDFLAGS=$opal_pmix_ext_LDFLAGS - LIBS=$opal_pmix_ext_LIBS - LD_LIBRARY_PATH=$pmix_ext_install_dir/lib:$LD_LIBRARY_PATH - export LD_LIBRARY_PATH - - AC_MSG_CHECKING([PMIx library version]) - AC_RUN_IFELSE([ - AC_LANG_SOURCE([ -#include -#include -#include -#include - -int main(int argc, char **argv) -{ - const char * version = NULL; - FILE *f = NULL; + opal_external_pmix_happy=yes]) - f = fopen("conftestval", "w"); - if( !f ) exit(1); - version = PMIx_Get_version(); - fprintf(f, "%s", version); - fclose(f); - - return 0; -} - ])], [ - eval opal_external_pmix_version=`cat conftestval` - AC_MSG_RESULT([$opal_external_pmix_version]) - ], [ - LD_LIBRARY_PATH=$opal_pmix_LD_LIBRARY_PATH_save - export LD_LIBRARY_PATH - opal_external_pmix_happy="no" - AC_MSG_ERROR([External PMIx support requested but could not build/run a test program. Aborting]) - ], [ - LD_LIBRARY_PATH=$opal_pmix_LD_LIBRARY_PATH_save - export LD_LIBRARY_PATH - opal_external_pmix_happy="no" - AC_MSG_ERROR([External PMIx disabled for cross compile. Aborting]) - ]) - CPPFLAGS=$opal_pmix_CPPFLAGS_save - LDFLAGS=$opal_pmix_LDFLAGS_save - LIBS=$opal_pmix_LIBS_save - LD_LIBRARY_PATH=$opal_pmix_LD_LIBRARY_PATH_save - - opal_external_pmix_happy="yes" - ]) - - AC_SUBST(opal_pmix_ext_CPPFLAGS) - AC_SUBST(opal_pmix_ext_LDFLAGS) - AC_SUBST(opal_pmix_ext_LIBS) - - OPAL_VAR_SCOPE_POP ]) diff --git a/opal/mca/pmix/external/Makefile.am b/opal/mca/pmix/ext114/Makefile.am similarity index 51% rename from opal/mca/pmix/external/Makefile.am rename to opal/mca/pmix/ext114/Makefile.am index a6f2dfd5bfd..4248f8fbe1b 100644 --- a/opal/mca/pmix/external/Makefile.am +++ b/opal/mca/pmix/ext114/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. # Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014-2015 Mellanox Technologies, Inc. # All rights reserved. @@ -22,25 +22,25 @@ sources = \ # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if MCA_BUILD_opal_pmix_external_DSO +if MCA_BUILD_opal_pmix_ext114_DSO component_noinst = -component_install = mca_pmix_external.la +component_install = mca_pmix_ext114.la else -component_noinst = libmca_pmix_external.la +component_noinst = libmca_pmix_ext114.la component_install = endif mcacomponentdir = $(opallibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_pmix_external_la_SOURCES = $(sources) -mca_pmix_external_la_CFLAGS = -mca_pmix_external_la_CPPFLAGS = $(opal_pmix_ext_CPPFLAGS) -mca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext_LDFLAGS) -mca_pmix_external_la_LIBADD = $(opal_pmix_ext_LIBS) +mca_pmix_ext114_la_SOURCES = $(sources) +mca_pmix_ext114_la_CFLAGS = +mca_pmix_ext114_la_CPPFLAGS = $(opal_pmix_ext114_CPPFLAGS) +mca_pmix_ext114_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext114_LDFLAGS) +mca_pmix_ext114_la_LIBADD = $(opal_pmix_ext114_LIBS) noinst_LTLIBRARIES = $(component_noinst) -libmca_pmix_external_la_SOURCES =$(sources) -libmca_pmix_external_la_CFLAGS = -libmca_pmix_external_la_CPPFLAGS = $(opal_pmix_ext_CPPFLAGS) -libmca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext_LDFLAGS) -libmca_pmix_external_la_LIBADD = $(opal_pmix_ext_LIBS) +libmca_pmix_ext114_la_SOURCES =$(sources) +libmca_pmix_ext114_la_CFLAGS = +libmca_pmix_ext114_la_CPPFLAGS = $(opal_pmix_ext114_CPPFLAGS) +libmca_pmix_ext114_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext114_LDFLAGS) +libmca_pmix_ext114_la_LIBADD = $(opal_pmix_ext114_LIBS) diff --git a/opal/mca/pmix/ext114/configure.m4 b/opal/mca/pmix/ext114/configure.m4 new file mode 100644 index 00000000000..7b66af18a71 --- /dev/null +++ b/opal/mca/pmix/ext114/configure.m4 @@ -0,0 +1,73 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2014-2015 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pmix_ext114_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_opal_pmix_ext114_CONFIG],[ + AC_CONFIG_FILES([opal/mca/pmix/ext114/Makefile]) + + AC_REQUIRE([OPAL_CHECK_PMIX]) + + AS_IF([test "$opal_external_pmix_happy" = "yes"], + [AS_IF([test "$opal_event_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) + AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + AS_IF([test "$opal_hwloc_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL HWLOC]) + AC_MSG_WARN([LIBRARY THIS LIBRARY MUST POINT TO THE SAME ONE USED ]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + external_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi/$opal_pmix_external_basedir/pmix -I${includedir}/openmpi/$opal_pmix_external_basedir/pmix/include' + # check for the 1.1.4 version by looking for a function + # which was later removed + AC_MSG_CHECKING([if external component is version 1.1.4]) + OPAL_CHECK_PACKAGE([opal_pmix_ext114], + [pmix.h], + [pmix], + [PMIx_Register_errhandler], + [-lpmix], + [$pmix_ext_install_dir], + [$pmix_ext_install_dir/lib], + [AC_MSG_RESULT([yes]) + opal_pmix_external_114_happy=yes], + [AC_MSG_RESULT([no]) + opal_pmix_external_114_happy=no]) + + AC_SUBST(opal_pmix_ext114_CPPFLAGS) + AC_SUBST(opal_pmix_ext114_LDFLAGS) + AC_SUBST(opal_pmix_ext114_LIBS) + + AS_IF([test "$opal_pmix_external_114_happy" = "yes"], + [$1 + # need to set the wrapper flags for static builds + pmix_ext114_WRAPPER_EXTRA_LDFLAGS="$opal_pmix_ext114_LDFLAGS" + pmix_ext114_WRAPPER_EXTRA_LIBS="$opal_pmix_ext114_LIBS"], + [$2])], + [$2]) +])dnl diff --git a/opal/mca/pmix/external/pmix_ext.c b/opal/mca/pmix/ext114/pmix_ext.c similarity index 97% rename from opal/mca/pmix/external/pmix_ext.c rename to opal/mca/pmix/ext114/pmix_ext.c index ec98f2f0955..0d8d3121534 100644 --- a/opal/mca/pmix/external/pmix_ext.c +++ b/opal/mca/pmix/ext114/pmix_ext.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -48,7 +48,7 @@ static const char *pmix1_get_nspace(opal_jobid_t jobid); static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace); -const opal_pmix_base_module_t opal_pmix_external_module = { +const opal_pmix_base_module_t opal_pmix_ext114_module = { /* client APIs */ .init = pmix1_client_init, .finalize = pmix1_client_finalize, @@ -99,7 +99,7 @@ static const char *pmix1_get_nspace(opal_jobid_t jobid) { opal_pmix1_jobid_trkr_t *jptr; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { return jptr->nspace; } @@ -112,7 +112,7 @@ static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace) opal_pmix1_jobid_trkr_t *jptr; /* if we don't already have it, add this to our jobid tracker */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { return; } @@ -120,7 +120,7 @@ static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace) jptr = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); jptr->jobid = jobid; - opal_list_append(&mca_pmix_external_component.jobids, &jptr->super); + opal_list_append(&mca_pmix_ext114_component.jobids, &jptr->super); } pmix_status_t pmix1_convert_opalrc(int rc) diff --git a/opal/mca/pmix/external/pmix_ext.h b/opal/mca/pmix/ext114/pmix_ext.h similarity index 96% rename from opal/mca/pmix/external/pmix_ext.h rename to opal/mca/pmix/ext114/pmix_ext.h index c4f9f30a5cc..44d87ecb38b 100644 --- a/opal/mca/pmix/external/pmix_ext.h +++ b/opal/mca/pmix/ext114/pmix_ext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -11,8 +11,8 @@ * $HEADER$ */ -#ifndef MCA_PMIX_EXTERNAL_H -#define MCA_PMIX_EXTERNAL_H +#ifndef MCA_PMIX_EXT114_H +#define MCA_PMIX_EXT114_H #include "opal_config.h" @@ -38,11 +38,11 @@ typedef struct { opal_pmix_base_component_t super; opal_list_t jobids; bool native_launch; -} mca_pmix_external_component_t; +} mca_pmix_ext114_component_t; -OPAL_DECLSPEC extern mca_pmix_external_component_t mca_pmix_external_component; +OPAL_DECLSPEC extern mca_pmix_ext114_component_t mca_pmix_ext114_component; -OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_external_module; +OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_ext114_module; /**** INTERNAL OBJECTS ****/ typedef struct { diff --git a/opal/mca/pmix/external/pmix_ext_client.c b/opal/mca/pmix/ext114/pmix_ext_client.c similarity index 93% rename from opal/mca/pmix/external/pmix_ext_client.c rename to opal/mca/pmix/ext114/pmix_ext_client.c index 9f91a7e527a..81b193a7e7f 100644 --- a/opal/mca/pmix/external/pmix_ext_client.c +++ b/opal/mca/pmix/ext114/pmix_ext_client.c @@ -113,11 +113,7 @@ int pmix1_client_init(void) putenv(dbgvalue); } -#ifdef OPAL_PMIX_VERSION_11 rc = PMIx_Init(&my_proc); -#else - rc = PMIx_Init(&my_proc, NULL, 0); -#endif if (PMIX_SUCCESS != rc) { return pmix1_convert_rc(rc); } @@ -126,7 +122,7 @@ int pmix1_client_init(void) if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ - mca_pmix_external_component.native_launch = true; + mca_pmix_ext114_component.native_launch = true; opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace); } else { /* we were launched by someone else, so make the @@ -138,7 +134,7 @@ int pmix1_client_init(void) job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); job->jobid = pname.jobid; - opal_list_append(&mca_pmix_external_component.jobids, &job->super); + opal_list_append(&mca_pmix_ext114_component.jobids, &job->super); pname.vpid = my_proc.rank; opal_proc_set_name(&pname); @@ -159,12 +155,7 @@ int pmix1_client_finalize(void) /* deregister the errhandler */ PMIx_Deregister_errhandler(errhdler_ref, NULL, NULL); -#ifdef OPAL_PMIX_VERSION_11 rc = PMIx_Finalize(); -#else - rc = PMIx_Finalize(NULL, 0); -#endif - return pmix1_convert_rc(rc); } @@ -197,7 +188,7 @@ int pmix1_abort(int flag, const char *msg, /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == ptr->name.jobid) { job = jptr; break; @@ -232,7 +223,7 @@ int pmix1_store_local(const opal_process_name_t *proc, opal_value_t *val) /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == proc->jobid) { job = jptr; break; @@ -298,7 +289,7 @@ int pmix1_fence(opal_list_t *procs, int collect_data) /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == ptr->name.jobid) { job = jptr; break; @@ -360,7 +351,7 @@ int pmix1_fencenb(opal_list_t *procs, int collect_data, /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == ptr->name.jobid) { job = jptr; break; @@ -445,7 +436,7 @@ int pmix1_get(const opal_process_name_t *proc, const char *key, /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == proc->jobid) { job = jptr; break; @@ -550,7 +541,7 @@ int pmix1_getnb(const opal_process_name_t *proc, const char *key, /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == proc->jobid) { job = jptr; break; @@ -701,7 +692,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info) /* transfer the data back */ n=0; OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { - if (mca_pmix_external_component.native_launch) { + if (mca_pmix_ext114_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(&d->proc.jobid, pdata[n].proc.nspace); @@ -712,7 +703,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info) } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == d->proc.jobid) { job = jptr; break; @@ -722,7 +713,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info) job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, pdata[n].proc.nspace, PMIX_MAX_NSLEN); job->jobid = d->proc.jobid; - opal_list_append(&mca_pmix_external_component.jobids, &job->super); + opal_list_append(&mca_pmix_ext114_component.jobids, &job->super); } if (PMIX_RANK_WILDCARD == pdata[n].proc.rank) { d->proc.vpid = OPAL_VPID_WILDCARD; @@ -764,7 +755,7 @@ static void lk_cbfunc(pmix_status_t status, for (n=0; n < ndata; n++) { d = OBJ_NEW(opal_pmix_pdata_t); opal_list_append(&results, &d->super); - if (mca_pmix_external_component.native_launch) { + if (mca_pmix_ext114_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(&d->proc.jobid, data[n].proc.nspace); @@ -775,7 +766,7 @@ static void lk_cbfunc(pmix_status_t status, } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == d->proc.jobid) { job = jptr; break; @@ -785,7 +776,7 @@ static void lk_cbfunc(pmix_status_t status, job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, data[n].proc.nspace, PMIX_MAX_NSLEN); job->jobid = d->proc.jobid; - opal_list_append(&mca_pmix_external_component.jobids, &job->super); + opal_list_append(&mca_pmix_ext114_component.jobids, &job->super); } if (PMIX_RANK_WILDCARD == data[n].proc.rank) { d->proc.vpid = OPAL_VPID_WILDCARD; @@ -949,7 +940,7 @@ int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); if (PMIX_SUCCESS == ret) { - if (mca_pmix_external_component.native_launch) { + if (mca_pmix_ext114_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(jobid, nspace); @@ -962,7 +953,7 @@ int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = *jobid; - opal_list_append(&mca_pmix_external_component.jobids, &job->super); + opal_list_append(&mca_pmix_ext114_component.jobids, &job->super); } PMIX_APP_FREE(papps, napps); @@ -979,7 +970,7 @@ static void spcbfunc(pmix_status_t status, rc = pmix1_convert_rc(status); if (PMIX_SUCCESS == status) { - if (mca_pmix_external_component.native_launch) { + if (mca_pmix_ext114_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(&jobid, nspace); @@ -992,7 +983,7 @@ static void spcbfunc(pmix_status_t status, job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; - opal_list_append(&mca_pmix_external_component.jobids, &job->super); + opal_list_append(&mca_pmix_ext114_component.jobids, &job->super); } op->spcbfunc(rc, jobid, op->cbdata); @@ -1070,7 +1061,7 @@ int pmix1_connect(opal_list_t *procs) /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == ptr->name.jobid) { job = jptr; break; @@ -1123,7 +1114,7 @@ int pmix1_connectnb(opal_list_t *procs, OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { /* look thru our list of jobids and find the * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (job->jobid == ptr->name.jobid) { (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); break; @@ -1162,7 +1153,7 @@ int pmix1_disconnect(opal_list_t *procs) OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { /* look thru our list of jobids and find the * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (job->jobid == ptr->name.jobid) { (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); break; @@ -1210,7 +1201,7 @@ int pmix1_disconnectnb(opal_list_t *procs, OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { /* look thru our list of jobids and find the * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (job->jobid == ptr->name.jobid) { (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); break; @@ -1245,7 +1236,7 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, nspace = NULL; } else { job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { job = jptr; break; @@ -1264,7 +1255,7 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); opal_list_append(procs, &nm->super); - if (mca_pmix_external_component.native_launch) { + if (mca_pmix_ext114_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ opal_convert_string_to_jobid(&nm->name.jobid, array[n].nspace); @@ -1275,7 +1266,7 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == nm->name.jobid) { job = jptr; break; @@ -1285,7 +1276,7 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; - opal_list_append(&mca_pmix_external_component.jobids, &job->super); + opal_list_append(&mca_pmix_ext114_component.jobids, &job->super); } nm->name.vpid = array[n].rank; } @@ -1305,7 +1296,7 @@ int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist) /* look thru our list of jobids and find the * corresponding nspace */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { job = jptr; break; diff --git a/opal/mca/pmix/external/pmix_ext_component.c b/opal/mca/pmix/ext114/pmix_ext_component.c similarity index 82% rename from opal/mca/pmix/external/pmix_ext_component.c rename to opal/mca/pmix/ext114/pmix_ext_component.c index c250fe0703f..65d807bc3e9 100644 --- a/opal/mca/pmix/external/pmix_ext_component.c +++ b/opal/mca/pmix/ext114/pmix_ext_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -24,10 +24,10 @@ #include "pmix_ext.h" /* - * Public string showing the pmix external component version number + * Public string showing the pmix ext114 component version number */ -const char *opal_pmix_external_component_version_string = - "OPAL external pmix MCA component version " OPAL_VERSION; +const char *opal_pmix_ext114_component_version_string = + "OPAL external pmix1.1.4 MCA component version " OPAL_VERSION; /* * Local function @@ -42,7 +42,7 @@ static int external_component_query(mca_base_module_t **module, int *priority); * and pointers to our public functions in it */ -mca_pmix_external_component_t mca_pmix_external_component = { +mca_pmix_ext114_component_t mca_pmix_ext114_component = { { /* First, the mca_component_t struct containing meta information about the component itself */ @@ -55,7 +55,7 @@ mca_pmix_external_component_t mca_pmix_external_component = { /* Component name and version */ - .mca_component_name = "external", + .mca_component_name = "ext114", MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, OPAL_RELEASE_VERSION), @@ -76,13 +76,13 @@ mca_pmix_external_component_t mca_pmix_external_component = { static int external_open(void) { - OBJ_CONSTRUCT(&mca_pmix_external_component.jobids, opal_list_t); + OBJ_CONSTRUCT(&mca_pmix_ext114_component.jobids, opal_list_t); return OPAL_SUCCESS; } static int external_close(void) { - OPAL_LIST_DESTRUCT(&mca_pmix_external_component.jobids); + OPAL_LIST_DESTRUCT(&mca_pmix_ext114_component.jobids); return OPAL_SUCCESS; } @@ -100,6 +100,6 @@ static int external_component_query(mca_base_module_t **module, int *priority) /* we could be a server, so we still need to be considered */ *priority = 5; } - *module = (mca_base_module_t *)&opal_pmix_external_module; + *module = (mca_base_module_t *)&opal_pmix_ext114_module; return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/external/pmix_ext_server_north.c b/opal/mca/pmix/ext114/pmix_ext_server_north.c similarity index 100% rename from opal/mca/pmix/external/pmix_ext_server_north.c rename to opal/mca/pmix/ext114/pmix_ext_server_north.c diff --git a/opal/mca/pmix/external/pmix_ext_server_south.c b/opal/mca/pmix/ext114/pmix_ext_server_south.c similarity index 97% rename from opal/mca/pmix/external/pmix_ext_server_south.c rename to opal/mca/pmix/ext114/pmix_ext_server_south.c index 777ee11a5aa..31e1f18299e 100644 --- a/opal/mca/pmix/external/pmix_ext_server_south.c +++ b/opal/mca/pmix/ext114/pmix_ext_server_south.c @@ -215,7 +215,7 @@ int pmix1_server_register_nspace(opal_jobid_t jobid, job = OBJ_NEW(opal_pmix1_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; - opal_list_append(&mca_pmix_external_component.jobids, &job->super); + opal_list_append(&mca_pmix_ext114_component.jobids, &job->super); /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -268,12 +268,12 @@ void pmix1_server_deregister_nspace(opal_jobid_t jobid) opal_pmix1_jobid_trkr_t *jptr; /* if we don't already have it, we can ignore this */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == jobid) { /* found it - tell the server to deregister */ PMIx_server_deregister_nspace(jptr->nspace); /* now get rid of it from our list */ - opal_list_remove_item(&mca_pmix_external_component.jobids, &jptr->super); + opal_list_remove_item(&mca_pmix_ext114_component.jobids, &jptr->super); OBJ_RELEASE(jptr); return; } @@ -312,7 +312,7 @@ void pmix1_server_deregister_client(const opal_process_name_t *proc) pmix_proc_t p; /* if we don't already have it, we can ignore this */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_external_component.jobids, opal_pmix1_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext114_component.jobids, opal_pmix1_jobid_trkr_t) { if (jptr->jobid == proc->jobid) { /* found it - tell the server to deregister */ (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); diff --git a/opal/mca/pmix/ext20/Makefile.am b/opal/mca/pmix/ext20/Makefile.am new file mode 100644 index 00000000000..3a62d32452b --- /dev/null +++ b/opal/mca/pmix/ext20/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + pmix_ext20.h \ + pmix_ext20_component.c \ + pmix_ext20.c \ + pmix_ext20_client.c \ + pmix_ext20_server_south.c \ + pmix_ext20_server_north.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_pmix_ext20_DSO +component_noinst = +component_install = mca_pmix_ext20.la +else +component_noinst = libmca_pmix_ext20.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pmix_ext20_la_SOURCES = $(sources) +mca_pmix_ext20_la_CFLAGS = +mca_pmix_ext20_la_CPPFLAGS = $(opal_pmix_ext20_CPPFLAGS) +mca_pmix_ext20_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext20_LDFLAGS) +mca_pmix_ext20_la_LIBADD = $(opal_pmix_ext20_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pmix_ext20_la_SOURCES =$(sources) +libmca_pmix_ext20_la_CFLAGS = +libmca_pmix_ext20_la_CPPFLAGS = $(opal_pmix_ext20_CPPFLAGS) +libmca_pmix_ext20_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext20_LDFLAGS) +libmca_pmix_ext20_la_LIBADD = $(opal_pmix_ext20_LIBS) diff --git a/opal/mca/pmix/external/configure.m4 b/opal/mca/pmix/ext20/configure.m4 similarity index 66% rename from opal/mca/pmix/external/configure.m4 rename to opal/mca/pmix/ext20/configure.m4 index 9787db78fcf..b818e699586 100644 --- a/opal/mca/pmix/external/configure.m4 +++ b/opal/mca/pmix/ext20/configure.m4 @@ -13,7 +13,7 @@ # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2016 Intel, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -25,10 +25,10 @@ # $HEADER$ # -# MCA_pmix_external_CONFIG([action-if-found], [action-if-not-found]) +# MCA_pmix_ext20_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_opal_pmix_external_CONFIG],[ - AC_CONFIG_FILES([opal/mca/pmix/external/Makefile]) +AC_DEFUN([MCA_opal_pmix_ext20_CONFIG],[ + AC_CONFIG_FILES([opal/mca/pmix/ext20/Makefile]) AC_REQUIRE([OPAL_CHECK_PMIX]) @@ -44,18 +44,27 @@ AC_DEFUN([MCA_opal_pmix_external_CONFIG],[ AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) external_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi/$opal_pmix_external_basedir/pmix -I${includedir}/openmpi/$opal_pmix_external_basedir/pmix/include' - # check the version - AC_MSG_CHECKING([pmix version]) - OPAL_CHECK_VERSION([opal_pmix_external], - [$opal_external_pmix_version], - ["2.0"], - [opal_external_pmix_version_flag=1.1], - [opal_external_pmix_version_flag=2.0], - [opal_external_pmix_version_flag=2.0], - [opal_external_pmix_version_flag=2.0]) - AC_MSG_RESULT([$opal_external_pmix_version]) - AS_IF([test "$opal_external_pmix_version_flag" = "1.1"], - [AC_DEFINE([OPAL_PMIX_VERSION_11], [1], [PMIx external version])]) - $1], + # check for the 2.0 series by looking for a function + # that was added in that series + AC_MSG_CHECKING([if external component is series 2.0]) + OPAL_CHECK_PACKAGE([opal_pmix_ext20], + [pmix.h], + [pmix], + [PMIx_Register_event_handler], + [-lpmix], + [$pmix_ext_install_dir], + [$pmix_ext_install_dir/lib], + [AC_MSG_RESULT([yes]) + opal_pmix_ext20_happy=yes], + [AC_MSG_RESULT([no]) + opal_pmix_ext20_happy=no]) + + AC_SUBST(opal_pmix_ext20_CPPFLAGS) + AC_SUBST(opal_pmix_ext20_LDFLAGS) + AC_SUBST(opal_pmix_ext20_LIBS) + + AS_IF([test "$opal_pmix_ext20_happy" = "yes"], + [$1], + [$2])], [$2]) ])dnl diff --git a/opal/mca/pmix/ext20/pmix_ext20.c b/opal/mca/pmix/ext20/pmix_ext20.c new file mode 100644 index 00000000000..11c939d2a60 --- /dev/null +++ b/opal/mca/pmix/ext20/pmix_ext20.c @@ -0,0 +1,495 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" + +#include "pmix_ext20.h" +#include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/pmix_types.h" + +#include + +/**** C.O.M.M.O.N I.N.T.E.R.F.A.C.E.S ****/ + +/* These are functions used by both client and server to + * access common functions in the embedded PMIx library */ + +static const char *pmix1_get_nspace(opal_jobid_t jobid); +static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace); + +const opal_pmix_base_module_t opal_pmix_ext20_module = { + /* client APIs */ + .init = pmix1_client_init, + .finalize = pmix1_client_finalize, + .initialized = pmix1_initialized, + .abort = pmix1_abort, + .commit = pmix1_commit, + .fence = pmix1_fence, + .fence_nb = pmix1_fencenb, + .put = pmix1_put, + .get = pmix1_get, + .get_nb = pmix1_getnb, + .publish = pmix1_publish, + .publish_nb = pmix1_publishnb, + .lookup = pmix1_lookup, + .lookup_nb = pmix1_lookupnb, + .unpublish = pmix1_unpublish, + .unpublish_nb = pmix1_unpublishnb, + .spawn = pmix1_spawn, + .spawn_nb = pmix1_spawnnb, + .connect = pmix1_connect, + .connect_nb = pmix1_connectnb, + .disconnect = pmix1_disconnect, + .disconnect_nb = pmix1_disconnectnb, + .resolve_peers = pmix1_resolve_peers, + .resolve_nodes = pmix1_resolve_nodes, + /* server APIs */ + .server_init = pmix1_server_init, + .server_finalize = pmix1_server_finalize, + .generate_regex = pmix1_server_gen_regex, + .generate_ppn = pmix1_server_gen_ppn, + .server_register_nspace = pmix1_server_register_nspace, + .server_deregister_nspace = pmix1_server_deregister_nspace, + .server_register_client = pmix1_server_register_client, + .server_deregister_client = pmix1_server_deregister_client, + .server_setup_fork = pmix1_server_setup_fork, + .server_dmodex_request = pmix1_server_dmodex, + .server_notify_error = pmix1_server_notify_error, + /* utility APIs */ + .get_version = PMIx_Get_version, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = pmix1_store_local, + .get_nspace = pmix1_get_nspace, + .register_jobid = pmix1_register_jobid +}; + +static const char *pmix1_get_nspace(opal_jobid_t jobid) +{ + opal_pmix1_jobid_trkr_t *jptr; + + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return jptr->nspace; + } + } + return NULL; +} + +static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace) +{ + opal_pmix1_jobid_trkr_t *jptr; + + /* if we don't already have it, add this to our jobid tracker */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return; + } + } + jptr = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); + jptr->jobid = jobid; + opal_list_append(&mca_pmix_ext20_component.jobids, &jptr->super); +} + +pmix_status_t pmix1_convert_opalrc(int rc) +{ + switch (rc) { + case OPAL_ERR_NOT_IMPLEMENTED: + case OPAL_ERR_NOT_SUPPORTED: + return PMIX_ERR_NOT_SUPPORTED; + + case OPAL_ERR_NOT_FOUND: + return PMIX_ERR_NOT_FOUND; + + case OPAL_ERR_PERM: + case OPAL_ERR_UNREACH: + case OPAL_ERR_SERVER_NOT_AVAIL: + return PMIX_ERR_UNREACH; + + case OPAL_ERR_BAD_PARAM: + return PMIX_ERR_BAD_PARAM; + + case OPAL_ERR_OUT_OF_RESOURCE: + return PMIX_ERR_OUT_OF_RESOURCE; + + case OPAL_ERR_DATA_VALUE_NOT_FOUND: + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + + case OPAL_ERR_TIMEOUT: + return PMIX_ERR_TIMEOUT; + + case OPAL_ERR_WOULD_BLOCK: + return PMIX_ERR_WOULD_BLOCK; + + case OPAL_EXISTS: + return PMIX_EXISTS; + + case OPAL_ERROR: + return PMIX_ERROR; + case OPAL_SUCCESS: + return PMIX_SUCCESS; + default: + return PMIX_ERROR; + } +} + +int pmix1_convert_rc(pmix_status_t rc) +{ + switch (rc) { + case PMIX_ERR_NOT_SUPPORTED: + return OPAL_ERR_NOT_SUPPORTED; + + case PMIX_ERR_NOT_FOUND: + return OPAL_ERR_NOT_FOUND; + + case PMIX_ERR_OUT_OF_RESOURCE: + return OPAL_ERR_OUT_OF_RESOURCE; + + case PMIX_ERR_INIT: + return OPAL_ERROR; + + case PMIX_ERR_BAD_PARAM: + return OPAL_ERR_BAD_PARAM; + + case PMIX_ERR_UNREACH: + case PMIX_ERR_NO_PERMISSIONS: + return OPAL_ERR_UNREACH; + + case PMIX_ERR_TIMEOUT: + return OPAL_ERR_TIMEOUT; + + case PMIX_ERR_WOULD_BLOCK: + return OPAL_ERR_WOULD_BLOCK; + + case PMIX_ERR_LOST_CONNECTION_TO_SERVER: + case PMIX_ERR_LOST_PEER_CONNECTION: + case PMIX_ERR_LOST_CONNECTION_TO_CLIENT: + return OPAL_ERR_COMM_FAILURE; + + case PMIX_EXISTS: + return OPAL_EXISTS; + + case PMIX_ERROR: + return OPAL_ERROR; + case PMIX_SUCCESS: + return OPAL_SUCCESS; + default: + return OPAL_ERROR; + } +} + +pmix_scope_t pmix1_convert_opalscope(opal_pmix_scope_t scope) { + switch(scope) { + case OPAL_PMIX_LOCAL: + return PMIX_LOCAL; + case OPAL_PMIX_REMOTE: + return PMIX_REMOTE; + case OPAL_PMIX_GLOBAL: + return PMIX_GLOBAL; + default: + return PMIX_SCOPE_UNDEF; + } +} + +void pmix1_value_load(pmix_value_t *v, + opal_value_t *kv) +{ + switch(kv->type) { + case OPAL_UNDEF: + v->type = PMIX_UNDEF; + opal_output(0, "TYPE WAS UNDEF"); + break; + case OPAL_BOOL: + v->type = PMIX_BOOL; + memcpy(&(v->data.flag), &kv->data.flag, 1); + break; + case OPAL_BYTE: + v->type = PMIX_BYTE; + memcpy(&(v->data.byte), &kv->data.byte, 1); + break; + case OPAL_STRING: + v->type = PMIX_STRING; + if (NULL != kv->data.string) { + v->data.string = strdup(kv->data.string); + } else { + v->data.string = NULL; + } + break; + case OPAL_SIZE: + v->type = PMIX_SIZE; + v->data.size = (size_t)kv->data.size; + break; + case OPAL_PID: + v->type = PMIX_PID; + memcpy(&(v->data.pid), &kv->data.pid, sizeof(pid_t)); + break; + case OPAL_INT: + v->type = PMIX_INT; + memcpy(&(v->data.integer), &kv->data.integer, sizeof(int)); + break; + case OPAL_INT8: + v->type = PMIX_INT8; + memcpy(&(v->data.int8), &kv->data.int8, 1); + break; + case OPAL_INT16: + v->type = PMIX_INT16; + memcpy(&(v->data.int16), &kv->data.int16, 2); + break; + case OPAL_INT32: + v->type = PMIX_INT32; + memcpy(&(v->data.int32), &kv->data.int32, 4); + break; + case OPAL_INT64: + v->type = PMIX_INT64; + memcpy(&(v->data.int64), &kv->data.int64, 8); + break; + case OPAL_UINT: + v->type = PMIX_UINT; + memcpy(&(v->data.uint), &kv->data.uint, sizeof(int)); + break; + case OPAL_UINT8: + v->type = PMIX_UINT8; + memcpy(&(v->data.uint8), &kv->data.uint8, 1); + break; + case OPAL_UINT16: + v->type = PMIX_UINT16; + memcpy(&(v->data.uint16), &kv->data.uint16, 2); + break; + case OPAL_UINT32: + v->type = PMIX_UINT32; + memcpy(&(v->data.uint32), &kv->data.uint32, 4); + break; + case OPAL_UINT64: + v->type = PMIX_UINT64; + memcpy(&(v->data.uint64), &kv->data.uint64, 8); + break; + case OPAL_FLOAT: + v->type = PMIX_FLOAT; + memcpy(&(v->data.fval), &kv->data.fval, sizeof(float)); + break; + case OPAL_DOUBLE: + v->type = PMIX_DOUBLE; + memcpy(&(v->data.dval), &kv->data.dval, sizeof(double)); + break; + case OPAL_TIMEVAL: + v->type = PMIX_TIMEVAL; + memcpy(&(v->data.tv), &kv->data.tv, sizeof(struct timeval)); + break; + case OPAL_BYTE_OBJECT: + v->type = PMIX_BYTE_OBJECT; + if (NULL != kv->data.bo.bytes) { + v->data.bo.bytes = (char*)malloc(kv->data.bo.size); + memcpy(v->data.bo.bytes, kv->data.bo.bytes, kv->data.bo.size); + v->data.bo.size = (size_t)kv->data.bo.size; + } else { + v->data.bo.bytes = NULL; + v->data.bo.size = 0; + } + break; + default: + /* silence warnings */ + break; + } +} + +int pmix1_value_unload(opal_value_t *kv, + const pmix_value_t *v) +{ + int rc=OPAL_SUCCESS; + + + switch(v->type) { + case PMIX_UNDEF: + rc = OPAL_ERR_UNKNOWN_DATA_TYPE; + break; + case PMIX_BOOL: + kv->type = OPAL_BOOL; + memcpy(&kv->data.flag, &(v->data.flag), 1); + break; + case PMIX_BYTE: + kv->type = OPAL_BYTE; + memcpy(&kv->data.byte, &(v->data.byte), 1); + break; + case PMIX_STRING: + kv->type = OPAL_STRING; + if (NULL != v->data.string) { + kv->data.string = strdup(v->data.string); + } + break; + case PMIX_SIZE: + kv->type = OPAL_SIZE; + kv->data.size = (int)v->data.size; + break; + case PMIX_PID: + kv->type = OPAL_PID; + memcpy(&kv->data.pid, &(v->data.pid), sizeof(pid_t)); + break; + case PMIX_INT: + kv->type = OPAL_INT; + memcpy(&kv->data.integer, &(v->data.integer), sizeof(int)); + break; + case PMIX_INT8: + kv->type = OPAL_INT8; + memcpy(&kv->data.int8, &(v->data.int8), 1); + break; + case PMIX_INT16: + kv->type = OPAL_INT16; + memcpy(&kv->data.int16, &(v->data.int16), 2); + break; + case PMIX_INT32: + kv->type = OPAL_INT32; + memcpy(&kv->data.int32, &(v->data.int32), 4); + break; + case PMIX_INT64: + kv->type = OPAL_INT64; + memcpy(&kv->data, &(v->data.int64), 8); + break; + case PMIX_UINT: + kv->type = OPAL_UINT; + memcpy(&kv->data, &(v->data.uint), sizeof(int)); + break; + case PMIX_UINT8: + kv->type = OPAL_UINT8; + memcpy(&kv->data, &(v->data.uint8), 1); + break; + case PMIX_UINT16: + kv->type = OPAL_UINT16; + memcpy(&kv->data, &(v->data.uint16), 2); + break; + case PMIX_UINT32: + kv->type = OPAL_UINT32; + memcpy(&kv->data, &(v->data.uint32), 4); + break; + case PMIX_UINT64: + kv->type = OPAL_UINT64; + memcpy(&kv->data, &(v->data.uint64), 8); + break; + case PMIX_FLOAT: + kv->type = OPAL_FLOAT; + memcpy(&kv->data, &(v->data.fval), sizeof(float)); + break; + case PMIX_DOUBLE: + kv->type = OPAL_DOUBLE; + memcpy(&kv->data, &(v->data.dval), sizeof(double)); + break; + case PMIX_TIMEVAL: + kv->type = OPAL_TIMEVAL; + memcpy(&kv->data, &(v->data.tv), sizeof(struct timeval)); + break; + case PMIX_BYTE_OBJECT: + kv->type = OPAL_BYTE_OBJECT; + if (NULL != v->data.bo.bytes && 0 < v->data.bo.size) { + kv->data.bo.bytes = (uint8_t*)malloc(v->data.bo.size); + memcpy(kv->data.bo.bytes, v->data.bo.bytes, v->data.bo.size); + kv->data.bo.size = (int)v->data.bo.size; + } else { + kv->data.bo.bytes = NULL; + kv->data.bo.size = 0; + } + break; + default: + /* silence warnings */ + rc = OPAL_ERROR; + break; + } + return rc; +} + + +/**** INSTANTIATE INTERNAL CLASSES ****/ +OBJ_CLASS_INSTANCE(opal_pmix1_jobid_trkr_t, + opal_list_item_t, + NULL, NULL); + +static void opcon(pmix1_opcaddy_t *p) +{ + memset(&p->p, 0, sizeof(pmix_proc_t)); + p->procs = NULL; + p->nprocs = 0; + p->error_procs = NULL; + p->nerror_procs = 0; + p->info = NULL; + p->ninfo = 0; + p->apps = NULL; + p->sz = 0; + p->opcbfunc = NULL; + p->mdxcbfunc = NULL; + p->valcbfunc = NULL; + p->lkcbfunc = NULL; + p->spcbfunc = NULL; + p->cbdata = NULL; +} +static void opdes(pmix1_opcaddy_t *p) +{ + if (NULL != p->procs) { + PMIX_PROC_FREE(p->procs, p->nprocs); + } + if (NULL != p->error_procs) { + PMIX_PROC_FREE(p->error_procs, p->nerror_procs); + } + if (NULL != p->info) { + PMIX_INFO_FREE(p->info, p->sz); + } + if (NULL != p->apps) { + PMIX_APP_FREE(p->apps, p->sz); + } +} +OBJ_CLASS_INSTANCE(pmix1_opcaddy_t, + opal_object_t, + opcon, opdes); + +static void ocadcon(pmix1_opalcaddy_t *p) +{ + OBJ_CONSTRUCT(&p->procs, opal_list_t); + OBJ_CONSTRUCT(&p->info, opal_list_t); + OBJ_CONSTRUCT(&p->apps, opal_list_t); + p->opcbfunc = NULL; + p->dmdxfunc = NULL; + p->mdxcbfunc = NULL; + p->lkupcbfunc = NULL; + p->spwncbfunc = NULL; + p->cbdata = NULL; + p->odmdxfunc = NULL; + p->ocbdata = NULL; +} +static void ocaddes(pmix1_opalcaddy_t *p) +{ + OPAL_LIST_DESTRUCT(&p->procs); + OPAL_LIST_DESTRUCT(&p->info); + OPAL_LIST_DESTRUCT(&p->apps); +} +OBJ_CLASS_INSTANCE(pmix1_opalcaddy_t, + opal_object_t, + ocadcon, ocaddes); diff --git a/opal/mca/pmix/ext20/pmix_ext20.h b/opal/mca/pmix/ext20/pmix_ext20.h new file mode 100644 index 00000000000..4d2a82cd9c4 --- /dev/null +++ b/opal/mca/pmix/ext20/pmix_ext20.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_PMIX_EXT20_H +#define MCA_PMIX_EXT20_H + +#include "opal_config.h" + +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_SYS_UN_H +#include +#endif + +#include "opal/mca/mca.h" +#include "opal/mca/event/event.h" +#include "opal/util/proc.h" + +#include "opal/mca/pmix/pmix.h" +#include "pmix_server.h" +#include "pmix_server.h" +#include "pmix/pmix_common.h" + +BEGIN_C_DECLS + +typedef struct { + opal_pmix_base_component_t super; + opal_list_t jobids; + bool native_launch; +} mca_pmix_ext20_component_t; + +OPAL_DECLSPEC extern mca_pmix_ext20_component_t mca_pmix_ext20_component; + +OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_ext20_module; + +/**** INTERNAL OBJECTS ****/ +typedef struct { + opal_list_item_t super; + opal_jobid_t jobid; + char nspace[PMIX_MAX_NSLEN + 1]; +} opal_pmix1_jobid_trkr_t; +OBJ_CLASS_DECLARATION(opal_pmix1_jobid_trkr_t); + +typedef struct { + opal_object_t super; + pmix_proc_t p; + pmix_proc_t *procs; + size_t nprocs; + pmix_proc_t *error_procs; + size_t nerror_procs; + pmix_info_t *info; + size_t ninfo; + pmix_app_t *apps; + size_t sz; + opal_pmix_op_cbfunc_t opcbfunc; + opal_pmix_modex_cbfunc_t mdxcbfunc; + opal_pmix_value_cbfunc_t valcbfunc; + opal_pmix_lookup_cbfunc_t lkcbfunc; + opal_pmix_spawn_cbfunc_t spcbfunc; + void *cbdata; +} pmix1_opcaddy_t; +OBJ_CLASS_DECLARATION(pmix1_opcaddy_t); + +typedef struct { + opal_object_t super; + opal_list_t procs; + opal_list_t info; + opal_list_t apps; + pmix_op_cbfunc_t opcbfunc; + pmix_dmodex_response_fn_t dmdxfunc; + pmix_modex_cbfunc_t mdxcbfunc; + pmix_lookup_cbfunc_t lkupcbfunc; + pmix_spawn_cbfunc_t spwncbfunc; + void *cbdata; + opal_pmix_release_cbfunc_t odmdxfunc; + void *ocbdata; +} pmix1_opalcaddy_t; +OBJ_CLASS_DECLARATION(pmix1_opalcaddy_t); + + +/**** CLIENT FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix1_client_init(void); +OPAL_MODULE_DECLSPEC int pmix1_client_finalize(void); +OPAL_MODULE_DECLSPEC int pmix1_initialized(void); +OPAL_MODULE_DECLSPEC int pmix1_abort(int flag, const char *msg, + opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix1_commit(void); +OPAL_MODULE_DECLSPEC int pmix1_fence(opal_list_t *procs, int collect_data); +OPAL_MODULE_DECLSPEC int pmix1_fencenb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_put(opal_pmix_scope_t scope, + opal_value_t *val); +OPAL_MODULE_DECLSPEC int pmix1_get(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_value_t **val); +OPAL_MODULE_DECLSPEC int pmix1_getnb(const opal_process_name_t *proc, const char *key, + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_publish(opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix1_publishnb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_lookup(opal_list_t *data, opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix1_lookupnb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_unpublish(char **keys, opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix1_unpublishnb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid); +OPAL_MODULE_DECLSPEC int pmix1_spawnnb(opal_list_t *job_info, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_connect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix1_connectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_disconnect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix1_disconnectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, + opal_list_t *procs); +OPAL_MODULE_DECLSPEC int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist); + +/**** COMMON FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix1_store_local(const opal_process_name_t *proc, + opal_value_t *val); + +/**** SERVER SOUTHBOUND FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC int pmix1_server_init(opal_pmix_server_module_t *module, + opal_list_t *info); +OPAL_MODULE_DECLSPEC int pmix1_server_finalize(void); +OPAL_MODULE_DECLSPEC int pmix1_server_gen_regex(const char *input, char **regex); +OPAL_MODULE_DECLSPEC int pmix1_server_gen_ppn(const char *input, char **ppn); +OPAL_MODULE_DECLSPEC int pmix1_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC void pmix1_server_deregister_nspace(opal_jobid_t jobid); +OPAL_MODULE_DECLSPEC int pmix1_server_register_client(const opal_process_name_t *proc, + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); +OPAL_MODULE_DECLSPEC void pmix1_server_deregister_client(const opal_process_name_t *proc); +OPAL_MODULE_DECLSPEC int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env); +OPAL_MODULE_DECLSPEC int pmix1_server_dmodex(const opal_process_name_t *proc, + opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_server_notify_error(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + + +/**** COMPONENT UTILITY FUNCTIONS ****/ +OPAL_MODULE_DECLSPEC pmix_status_t pmix1_convert_opalrc(int rc); +OPAL_MODULE_DECLSPEC int pmix1_convert_rc(pmix_status_t rc); +OPAL_MODULE_DECLSPEC pmix_scope_t pmix1_convert_opalscope(opal_pmix_scope_t scope); +OPAL_MODULE_DECLSPEC void pmix1_value_load(pmix_value_t *v, + opal_value_t *kv); +OPAL_MODULE_DECLSPEC int pmix1_value_unload(opal_value_t *kv, + const pmix_value_t *v); + +END_C_DECLS + +#endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/ext20/pmix_ext20_client.c b/opal/mca/pmix/ext20/pmix_ext20_client.c new file mode 100644 index 00000000000..14d2b97b238 --- /dev/null +++ b/opal/mca/pmix/ext20/pmix_ext20_client.c @@ -0,0 +1,1318 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/hash_string.h" +#include "opal/util/argv.h" +#include "opal/util/proc.h" + +#include "opal/mca/pmix/base/base.h" +#include "pmix_ext20.h" +#include "pmix.h" + +static pmix_proc_t my_proc; +static char *dbgvalue=NULL; +static size_t errhdler_ref = 0; + +#define PMIX_WAIT_FOR_COMPLETION(a) \ + do { \ + while ((a)) { \ + usleep(10); \ + } \ + } while (0) + + +static void completion_handler (void *cbdata) { + bool *active = (bool *)cbdata; + *active = false; +} + +static void myerr(size_t evhdlr_registration_id, + pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_info_t info[], size_t ninfo, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + int rc; + opal_list_t plist, ilist; + opal_namelist_t *nm; + opal_value_t *iptr; + size_t n; + volatile bool active; + + /* convert the incoming status */ + rc = pmix1_convert_rc(status); + + /* convert the array of procs */ + OBJ_CONSTRUCT(&plist, opal_list_t); + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + nm->name.jobid = strtoul(procs[n].nspace, NULL, 10); + nm->name.vpid = procs[n].rank; + opal_list_append(&plist, &nm->super); + } + + /* convert the array of info */ + OBJ_CONSTRUCT(&ilist, opal_list_t); + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + iptr->key = strdup(info[n].key); + pmix1_value_unload(iptr, &info[n].value); + opal_list_append(&plist, &iptr->super); + } + + /* call the base errhandler */ + active = true; + opal_pmix_base_errhandler(rc, &plist, &ilist, completion_handler, (void *)&active); + PMIX_WAIT_FOR_COMPLETION(active); + + OPAL_LIST_DESTRUCT(&plist); + OPAL_LIST_DESTRUCT(&ilist); +} + +static void errreg_cbfunc (pmix_status_t status, + size_t errhandler_ref, + void *cbdata) +{ + errhdler_ref = errhandler_ref; + opal_output_verbose(5, opal_pmix_base_framework.framework_output, + "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", + status, (unsigned long)errhandler_ref); +} + +int pmix1_client_init(void) +{ + opal_process_name_t pname; + pmix_status_t rc; + int dbg; + opal_pmix1_jobid_trkr_t *job; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client init"); + + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } + + rc = PMIx_Init(&my_proc, NULL, 0); + if (PMIX_SUCCESS != rc) { + return pmix1_convert_rc(rc); + } + + /* store our jobid and rank */ + if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + mca_pmix_ext20_component.native_launch = true; + opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(my_proc.nspace, pname.jobid); + } + /* insert this into our list of jobids - it will be the + * first, and so we'll check it first */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); + job->jobid = pname.jobid; + opal_list_append(&mca_pmix_ext20_component.jobids, &job->super); + + pname.vpid = my_proc.rank; + opal_proc_set_name(&pname); + + /* register the default event handler */ + PMIx_Register_event_handler(NULL, 0, NULL, 0, myerr, errreg_cbfunc, NULL); + return OPAL_SUCCESS; + +} + +int pmix1_client_finalize(void) +{ + pmix_status_t rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client finalize"); + + /* deregister the default event handler */ + PMIx_Deregister_event_handler(errhdler_ref, NULL, NULL); + + rc = PMIx_Finalize(NULL, 0); + return pmix1_convert_rc(rc); +} + +int pmix1_initialized(void) +{ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client initialized"); + + return PMIx_Initialized(); +} + +int pmix1_abort(int flag, const char *msg, + opal_list_t *procs) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client abort"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + + /* call the library abort */ + rc = PMIx_Abort(flag, msg, parray, cnt); + + /* release the array */ + PMIX_PROC_FREE(parray, cnt); + + return pmix1_convert_rc(rc); +} + +int pmix1_store_local(const opal_process_name_t *proc, opal_value_t *val) +{ + pmix_value_t kv; + pmix_status_t rc; + pmix_proc_t p; + opal_pmix1_jobid_trkr_t *job, *jptr; + + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + } else { + /* use our name */ + (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + p.rank = OPAL_PROC_MY_NAME.vpid; + } + + PMIX_VALUE_CONSTRUCT(&kv); + pmix1_value_load(&kv, val); + + rc = PMIx_Store_internal(&p, val->key, &kv); + PMIX_VALUE_DESTRUCT(&kv); + + return pmix1_convert_rc(rc); +} + +int pmix1_commit(void) +{ + pmix_status_t rc; + + rc = PMIx_Commit(); + return pmix1_convert_rc(rc); +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix1_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + +int pmix1_fence(opal_list_t *procs, int collect_data) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix_info_t info, *iptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client fence"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + if (collect_data) { + PMIX_INFO_CONSTRUCT(&info); + (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); + info.value.type = PMIX_BOOL; + info.value.data.flag = true; + iptr = &info; + n = 1; + } else { + iptr = NULL; + n = 0; + } + + /* call the library function */ + rc = PMIx_Fence(parray, cnt, iptr, n); + + /* release the array */ + PMIX_PROC_FREE(parray, cnt); + if (NULL != iptr) { + PMIX_INFO_DESTRUCT(&info); + } + + return pmix1_convert_rc(rc); + +} + +int pmix1_fencenb(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t rc; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix1_opcaddy_t *op; + pmix_info_t info, *iptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client fence_nb"); + + /* convert the list of procs to an array + * of pmix_proc_t */ + if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + parray[n].rank = ptr->name.vpid; + ++n; + } + } + + if (collect_data) { + PMIX_INFO_CONSTRUCT(&info); + (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); + info.value.type = PMIX_BOOL; + info.value.data.flag = true; + iptr = &info; + n = 1; + } else { + iptr = NULL; + n = 0; + } + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->procs = parray; + op->nprocs = cnt; + + /* call the library function */ + rc = PMIx_Fence_nb(parray, cnt, iptr, n, opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + + return pmix1_convert_rc(rc); + +} + +int pmix1_put(opal_pmix_scope_t opal_scope, + opal_value_t *val) +{ + pmix_value_t kv; + pmix_scope_t pmix_scope = pmix1_convert_opalscope(opal_scope); + pmix_status_t rc; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client put"); + + PMIX_VALUE_CONSTRUCT(&kv); + pmix1_value_load(&kv, val); + + rc = PMIx_Put(pmix_scope, val->key, &kv); + PMIX_VALUE_DESTRUCT(&kv); + return pmix1_convert_rc(rc); +} + +int pmix1_get(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_value_t **val) +{ + int ret; + pmix_value_t *kv; + pmix_status_t rc; + pmix_proc_t p, *pptr; + size_t ninfo, n; + pmix_info_t *pinfo; + opal_value_t *ival; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "%s PMIx_client get on proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + + /* prep default response */ + *val = NULL; + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + pptr = &p; + } else { + /* if they are asking for our jobid, then return it */ + if (0 == strcmp(key, OPAL_PMIX_JOBID)) { + (*val) = OBJ_NEW(opal_value_t); + (*val)->type = OPAL_UINT32; + (*val)->data.uint32 = OPAL_PROC_MY_NAME.jobid; + return OPAL_SUCCESS; + } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { + (*val) = OBJ_NEW(opal_value_t); + (*val)->type = OPAL_INT; + (*val)->data.integer = my_proc.rank; + return OPAL_SUCCESS; + } + pptr = NULL; + } + + if (NULL != info) { + ninfo = opal_list_get_size(info); + if (0 < ninfo) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, ival); + } + } else { + pinfo = NULL; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + /* pass the request down */ + rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); + if (PMIX_SUCCESS == rc) { + if (NULL == kv) { + ret = OPAL_SUCCESS; + } else { + *val = OBJ_NEW(opal_value_t); + ret = pmix1_value_unload(*val, kv); + PMIX_VALUE_FREE(kv, 1); + } + } else { + ret = pmix1_convert_rc(rc); + } + PMIX_INFO_FREE(pinfo, ninfo); + return ret; +} + +static void val_cbfunc(pmix_status_t status, + pmix_value_t *kv, void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + int rc; + opal_value_t val, *v=NULL; + + rc = pmix1_convert_opalrc(status); + if (PMIX_SUCCESS == status && NULL != kv) { + rc = pmix1_value_unload(&val, kv); + v = &val; + } + + if (NULL != op->valcbfunc) { + op->valcbfunc(rc, v, op->cbdata); + } + OBJ_RELEASE(op); +} + +int pmix1_getnb(const opal_process_name_t *proc, const char *key, + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opcaddy_t *op; + pmix_status_t rc; + size_t n; + opal_value_t *ival; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "%s PMIx_client get_nb on proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->valcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != proc) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); + op->p.rank = proc->vpid; + } else { + (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + op->p.rank = PMIX_RANK_WILDCARD; + } + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, ival); + } + } + } + + /* call the library function */ + rc = PMIx_Get_nb(&op->p, key, op->info, op->sz, val_cbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + + return pmix1_convert_rc(rc); +} + +int pmix1_publish(opal_list_t *info) +{ + pmix_info_t *pinfo; + pmix_status_t ret; + opal_value_t *iptr; + size_t sz, n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client publish"); + + if (NULL == info) { + return OPAL_ERR_BAD_PARAM; + } + + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + } + + ret = PMIx_Publish(pinfo, sz); + + return pmix1_convert_rc(ret); +} + +int pmix1_publishnb(opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + opal_value_t *iptr; + size_t n; + pmix1_opcaddy_t *op; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client publish_nb"); + + if (NULL == info) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, iptr); + ++n; + } + } + + ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_lookup(opal_list_t *data, opal_list_t *info) +{ + pmix_pdata_t *pdata; + pmix_info_t *pinfo; + size_t sz, ninfo, n; + int rc; + pmix_status_t ret; + opal_pmix_pdata_t *d; + opal_value_t *iptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client lookup"); + + if (NULL == data) { + return OPAL_ERR_BAD_PARAM; + } + + sz = opal_list_get_size(data); + PMIX_PDATA_CREATE(pdata, sz); + n=0; + OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { + (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); + } + + if (NULL != info) { + ninfo = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + ret = PMIx_Lookup(pdata, sz, pinfo, ninfo); + PMIX_INFO_FREE(pinfo, ninfo); + + if (PMIX_SUCCESS == ret) { + /* transfer the data back */ + n=0; + OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { + if (mca_pmix_ext20_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&d->proc.jobid, pdata[n].proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(pdata[n].proc.nspace, d->proc.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == d->proc.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, pdata[n].proc.nspace, PMIX_MAX_NSLEN); + job->jobid = d->proc.jobid; + opal_list_append(&mca_pmix_ext20_component.jobids, &job->super); + } + if (PMIX_RANK_WILDCARD == pdata[n].proc.rank) { + d->proc.vpid = OPAL_VPID_WILDCARD; + } else { + d->proc.vpid = pdata[n].proc.rank; + } + rc = pmix1_value_unload(&d->value, &pdata[n].value); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + PMIX_PDATA_FREE(pdata, sz); + return OPAL_ERR_BAD_PARAM; + } + ++n; + } + } + + return pmix1_convert_rc(ret); +} + +static void lk_cbfunc(pmix_status_t status, + pmix_pdata_t data[], size_t ndata, + void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + opal_pmix_pdata_t *d; + opal_list_t results, *r = NULL; + int rc; + size_t n; + opal_pmix1_jobid_trkr_t *job, *jptr; + + if (NULL == op->lkcbfunc) { + OBJ_RELEASE(op); + return; + } + + rc = pmix1_convert_rc(status); + if (OPAL_SUCCESS == rc) { + OBJ_CONSTRUCT(&results, opal_list_t); + for (n=0; n < ndata; n++) { + d = OBJ_NEW(opal_pmix_pdata_t); + opal_list_append(&results, &d->super); + if (mca_pmix_ext20_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&d->proc.jobid, data[n].proc.nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(data[n].proc.nspace, d->proc.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == d->proc.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, data[n].proc.nspace, PMIX_MAX_NSLEN); + job->jobid = d->proc.jobid; + opal_list_append(&mca_pmix_ext20_component.jobids, &job->super); + } + if (PMIX_RANK_WILDCARD == data[n].proc.rank) { + d->proc.vpid = OPAL_VPID_WILDCARD; + } else { + d->proc.vpid = data[n].proc.rank; + } + d->value.key = strdup(data[n].key); + rc = pmix1_value_unload(&d->value, &data[n].value); + if (OPAL_SUCCESS != rc) { + rc = OPAL_ERR_BAD_PARAM; + OPAL_ERROR_LOG(rc); + goto release; + } + } + r = &results; + } + release: + /* execute the callback */ + op->lkcbfunc(rc, r, op->cbdata); + + if (NULL != r) { + OPAL_LIST_DESTRUCT(&results); + } + OBJ_RELEASE(op); +} + +int pmix1_lookupnb(char **keys, opal_list_t *info, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix1_opcaddy_t *op; + opal_value_t *iptr; + size_t n; + + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIx_client lookup_nb"); + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->lkcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, iptr); + ++n; + } + } + } + + ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_unpublish(char **keys, opal_list_t *info) +{ + pmix_status_t ret; + size_t ninfo, n; + pmix_info_t *pinfo; + opal_value_t *iptr; + + if (NULL != info) { + ninfo = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, iptr); + ++n; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + ret = PMIx_Unpublish(keys, pinfo, ninfo); + PMIX_INFO_FREE(pinfo, ninfo); + + return pmix1_convert_rc(ret); +} + +int pmix1_unpublishnb(char **keys, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix1_opcaddy_t *op; + opal_value_t *iptr; + size_t n; + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, iptr); + ++n; + } + } + } + + ret = PMIx_Unpublish_nb(keys, op->info, op->sz, opcbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) +{ + pmix_status_t ret; + pmix_info_t *pinfo = NULL; + pmix_app_t *papps; + size_t napps, n, m, ninfo = 0; + char nspace[PMIX_MAX_NSLEN+1]; + opal_value_t *info; + opal_pmix_app_t *app; + opal_pmix1_jobid_trkr_t *job; + + if (NULL != job_info && 0 < (ninfo = opal_list_get_size(job_info))) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(info, job_info, opal_value_t) { + (void)strncpy(pinfo[n].key, info->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, info); + ++n; + } + } + + napps = opal_list_get_size(apps); + PMIX_APP_CREATE(papps, napps); + n=0; + OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { + papps[n].cmd = strdup(app->cmd); + papps[n].argc = app->argc; + papps[n].argv = opal_argv_copy(app->argv); + papps[n].env = opal_argv_copy(app->env); + papps[n].maxprocs = app->maxprocs; + if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) { + PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo); + m=0; + OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { + (void)strncpy(papps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&papps[n].info[m].value, info); + ++m; + } + } + ++n; + } + + ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); + if (PMIX_SUCCESS == ret) { + if (mca_pmix_ext20_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(jobid, nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(nspace, *jobid); + } + /* add this to our jobid tracker */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = *jobid; + opal_list_append(&mca_pmix_ext20_component.jobids, &job->super); + } + PMIX_APP_FREE(papps, napps); + + return pmix1_convert_rc(ret); +} + +static void spcbfunc(pmix_status_t status, + char *nspace, void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + int rc; + opal_jobid_t jobid=OPAL_JOBID_INVALID; + opal_pmix1_jobid_trkr_t *job; + + rc = pmix1_convert_rc(status); + if (PMIX_SUCCESS == status) { + if (mca_pmix_ext20_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&jobid, nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(nspace, jobid); + } + /* add this to our jobid tracker */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_ext20_component.jobids, &job->super); + } + + op->spcbfunc(rc, jobid, op->cbdata); + OBJ_RELEASE(op); +} + +int pmix1_spawnnb(opal_list_t *job_info, opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata) +{ + pmix_status_t ret; + pmix1_opcaddy_t *op; + size_t n, m; + opal_value_t *info; + opal_pmix_app_t *app; + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->spcbfunc = cbfunc; + op->cbdata = cbdata; + + if (NULL != job_info && 0 < (op->ninfo = opal_list_get_size(job_info))) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(info, job_info, opal_value_t) { + (void)strncpy(op->info[n].key, info->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, info); + ++n; + } + } + + op->sz = opal_list_get_size(apps); + PMIX_APP_CREATE(op->apps, op->sz); + n=0; + OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { + op->apps[n].cmd = strdup(app->cmd); + op->apps[n].argc = app->argc; + op->apps[n].argv = opal_argv_copy(app->argv); + op->apps[n].env = opal_argv_copy(app->env); + op->apps[n].maxprocs = app->maxprocs; + if (0 < (op->apps[n].ninfo = opal_list_get_size(&app->info))) { + PMIX_INFO_CREATE(op->apps[n].info, op->apps[n].ninfo); + m=0; + OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { + (void)strncpy(op->apps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->apps[n].info[m].value, info); + ++m; + } + } + ++n; + } + + ret = PMIx_Spawn_nb(op->info, op->ninfo, op->apps, op->sz, spcbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_connect(opal_list_t *procs) +{ + pmix_status_t ret; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix1_jobid_trkr_t *job, *jptr; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == ptr->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + parray[n].rank = PMIX_RANK_WILDCARD; + } else { + parray[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Connect(parray, cnt, NULL, 0); + PMIX_PROC_FREE(parray, cnt); + + return pmix1_convert_rc(ret); +} + +int pmix1_connectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t ret; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix1_opcaddy_t *op; + opal_pmix1_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->nprocs = cnt; + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(op->procs, op->nprocs); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + op->procs[n].rank = PMIX_RANK_WILDCARD; + } else { + op->procs[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Connect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); + + return pmix1_convert_rc(ret); +} + +int pmix1_disconnect(opal_list_t *procs) +{ + pmix_status_t ret; + pmix_proc_t *parray=NULL; + size_t n, cnt=0; + opal_namelist_t *ptr; + opal_pmix1_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(parray, cnt); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + parray[n].rank = PMIX_RANK_WILDCARD; + } else { + parray[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Disconnect(parray, cnt, NULL, 0); + PMIX_PROC_FREE(parray, cnt); + + return pmix1_convert_rc(ret); +} + +int pmix1_disconnectnb(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t ret; + size_t n, cnt=0; + opal_namelist_t *ptr; + pmix1_opcaddy_t *op; + opal_pmix1_jobid_trkr_t *job; + + /* protect against bozo error */ + if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + return OPAL_ERR_BAD_PARAM; + } + + /* create the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + op->nprocs = cnt; + + /* convert the list of procs to an array + * of pmix_proc_t */ + PMIX_PROC_CREATE(op->procs, op->nprocs); + n=0; + OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(job, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (job->jobid == ptr->name.jobid) { + (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); + break; + } + } + if (OPAL_VPID_WILDCARD == ptr->name.vpid) { + op->procs[n].rank = PMIX_RANK_WILDCARD; + } else { + op->procs[n].rank = ptr->name.vpid; + } + ++n; + } + + ret = PMIx_Disconnect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); + + return pmix1_convert_rc(ret); +} + + +int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, + opal_list_t *procs) +{ + char *nspace; + pmix_proc_t *array=NULL; + size_t nprocs, n; + opal_namelist_t *nm; + int rc; + pmix_status_t ret; + opal_pmix1_jobid_trkr_t *job, *jptr; + + if (OPAL_JOBID_WILDCARD == jobid) { + nspace = NULL; + } else { + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + nspace = job->nspace; + } + + ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs); + rc = pmix1_convert_rc(ret); + + if (NULL != array && 0 < nprocs) { + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(procs, &nm->super); + if (mca_pmix_ext20_component.native_launch) { + /* if we were launched by the OMPI RTE, then + * the jobid is in a special format - so get it */ + opal_convert_string_to_jobid(&nm->name.jobid, array[n].nspace); + } else { + /* we were launched by someone else, so make the + * jobid just be the hash of the nspace */ + OPAL_HASH_STR(array[n].nspace, nm->name.jobid); + } + /* if we don't already have it, add this to our jobid tracker */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == nm->name.jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_ext20_component.jobids, &job->super); + } + nm->name.vpid = array[n].rank; + } + } + PMIX_PROC_FREE(array, nprocs); + + return rc; +} + +int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist) +{ + pmix_status_t ret; + char *nspace=NULL; + opal_pmix1_jobid_trkr_t *job, *jptr; + + if (OPAL_JOBID_WILDCARD != jobid) { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + job = jptr; + break; + } + } + if (NULL == job) { + return OPAL_ERR_NOT_FOUND; + } + nspace = job->nspace; + } + + ret = PMIx_Resolve_nodes(nspace, nodelist); + + return pmix1_convert_rc(ret);; +} diff --git a/opal/mca/pmix/ext20/pmix_ext20_component.c b/opal/mca/pmix/ext20/pmix_ext20_component.c new file mode 100644 index 00000000000..52377e549ab --- /dev/null +++ b/opal/mca/pmix/ext20/pmix_ext20_component.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/class/opal_list.h" +#include "opal/util/proc.h" +#include "opal/mca/pmix/pmix.h" +#include "pmix_ext20.h" + +/* + * Public string showing the pmix external component version number + */ +const char *opal_pmix_ext20_component_version_string = + "OPAL external pmix2.0 MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int external_open(void); +static int external_close(void); +static int external_component_query(mca_base_module_t **module, int *priority); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +mca_pmix_ext20_component_t mca_pmix_ext20_component = { + { + /* First, the mca_component_t struct containing meta information + about the component itself */ + + .base_version = { + /* Indicate that we are a pmix v1.1.0 component (which also + implies a specific MCA version) */ + + OPAL_PMIX_BASE_VERSION_2_0_0, + + /* Component name and version */ + + .mca_component_name = "ext20", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + + /* Component open and close functions */ + + .mca_open_component = external_open, + .mca_close_component = external_close, + .mca_query_component = external_component_query, + }, + /* Next the MCA v1.0.0 component meta data */ + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } + }, + .native_launch = false +}; + +static int external_open(void) +{ + OBJ_CONSTRUCT(&mca_pmix_ext20_component.jobids, opal_list_t); + return OPAL_SUCCESS; +} + +static int external_close(void) +{ + OPAL_LIST_DESTRUCT(&mca_pmix_ext20_component.jobids); + return OPAL_SUCCESS; +} + + +static int external_component_query(mca_base_module_t **module, int *priority) +{ + char *t, *id; + + /* see if a PMIx server is present */ + if (NULL != (t = getenv("PMIX_SERVER_URI")) || + NULL != (id = getenv("PMIX_ID"))) { + /* if PMIx is present, then we are a client and need to use it */ + *priority = 100; + } else { + /* we could be a server, so we still need to be considered */ + *priority = 5; + } + *module = (mca_base_module_t *)&opal_pmix_ext20_module; + return OPAL_SUCCESS; +} diff --git a/opal/mca/pmix/ext20/pmix_ext20_server_north.c b/opal/mca/pmix/ext20/pmix_ext20_server_north.c new file mode 100644 index 00000000000..705f594f6b0 --- /dev/null +++ b/opal/mca/pmix/ext20/pmix_ext20_server_north.c @@ -0,0 +1,796 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" +#include "opal/mca/pmix/base/base.h" +#include "pmix_ext20.h" + +#include "pmix.h" +#include "pmix_server.h" + +/**** N.O.R.T.H.B.O.U.N.D I.N.T.E.R.F.A.C.E.S ****/ + +/* These are the interfaces used by the embedded PMIx server + * to call up into ORTE for service requests */ + + static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object); + static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_notify_event(pmix_status_t code, + pmix_proc_t procs[], size_t nprocs, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + static pmix_status_t server_listener_fn(int listening_sd, + pmix_connection_cbfunc_t cbfunc); + + pmix_server_module_t mymodule = { + .client_connected = server_client_connected_fn, + .client_finalized = server_client_finalized_fn, + .abort = server_abort_fn, + .fence_nb = server_fencenb_fn, + .direct_modex = server_dmodex_req_fn, + .publish = server_publish_fn, + .lookup = server_lookup_fn, + .unpublish = server_unpublish_fn, + .spawn = server_spawn_fn, + .connect = server_connect_fn, + .disconnect = server_disconnect_fn, + .register_events = server_register_events, + .deregister_events = server_deregister_events, + .notify_event = server_notify_event, + .listener = server_listener_fn +}; + +opal_pmix_server_module_t *host_module = NULL; + + +static void opal_opcbfunc(int status, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + + if (NULL != opalcaddy->opcbfunc) { + opalcaddy->opcbfunc(pmix1_convert_opalrc(status), opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_client_connected_fn(const pmix_proc_t *p, void *server_object) +{ + int rc; + opal_process_name_t proc; + + if (NULL == host_module || NULL == host_module->client_connected) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* pass it up */ + rc = host_module->client_connected(&proc, server_object); + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + + if (NULL == host_module || NULL == host_module->client_finalized) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* pass it up */ + rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + size_t n; + opal_namelist_t *nm; + opal_process_name_t proc; + int rc; + pmix1_opalcaddy_t *opalcaddy; + + if (NULL == host_module || NULL == host_module->abort) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + proc.vpid = p->rank; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* pass it up */ + rc = host_module->abort(&proc, server_object, status, msg, + &opalcaddy->procs, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix1_convert_opalrc(rc); +} + +static void _data_release(void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + + if (NULL != opalcaddy->odmdxfunc) { + opalcaddy->odmdxfunc(opalcaddy->ocbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static void opmdx_response(int status, const char *data, size_t sz, void *cbdata, + opal_pmix_release_cbfunc_t relcbfunc, void *relcbdata) +{ + pmix_status_t rc; + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + + rc = pmix1_convert_rc(status); + if (NULL != opalcaddy->mdxcbfunc) { + opalcaddy->odmdxfunc = relcbfunc; + opalcaddy->ocbdata = relcbdata; + opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata, + _data_release, opalcaddy); + } else { + OBJ_RELEASE(opalcaddy); + } +} + +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy; + size_t n; + opal_namelist_t *nm; + opal_value_t *iptr; + int rc; + + if (NULL == host_module || NULL == host_module->fence_nb) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->mdxcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->fence_nb(&opalcaddy->procs, &opalcaddy->info, + data, ndata, opmdx_response, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == host_module || NULL == host_module->direct_modex) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->mdxcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->direct_modex(&proc, &opalcaddy->info, opmdx_response, opalcaddy); + if (OPAL_SUCCESS != rc && OPAL_ERR_IN_PROCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + if (OPAL_ERR_IN_PROCESS == rc) { + rc = OPAL_SUCCESS; + } + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_publish_fn(const pmix_proc_t *p, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + size_t n; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *oinfo; + + if (NULL == host_module || NULL == host_module->publish) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the info array */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->publish(&proc, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + +static void opal_lkupcbfunc(int status, + opal_list_t *data, + void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + pmix_status_t rc; + pmix_pdata_t *d=NULL; + size_t nd=0, n; + opal_pmix_pdata_t *p; + + if (NULL != opalcaddy->lkupcbfunc) { + rc = pmix1_convert_opalrc(status); + /* convert any returned data */ + if (NULL != data) { + nd = opal_list_get_size(data); + PMIX_PDATA_CREATE(d, nd); + n=0; + OPAL_LIST_FOREACH(p, data, opal_pmix_pdata_t) { + /* convert the jobid */ + (void)opal_snprintf_jobid(d[n].proc.nspace, PMIX_MAX_NSLEN, p->proc.jobid); + d[n].proc.rank = p->proc.vpid; + (void)strncpy(d[n].key, p->value.key, PMIX_MAX_KEYLEN); + pmix1_value_load(&d[n].value, &p->value); + } + } + opalcaddy->lkupcbfunc(rc, d, nd, opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == host_module || NULL == host_module->lookup) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->lkupcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->lookup(&proc, keys, &opalcaddy->info, opal_lkupcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + + +static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_value_t *iptr; + size_t n; + + if (NULL == host_module || NULL == host_module->unpublish) { + return PMIX_SUCCESS; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_info_t to the list of info */ + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(iptr, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->unpublish(&proc, keys, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + +static void opal_spncbfunc(int status, opal_jobid_t jobid, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy = (pmix1_opalcaddy_t*)cbdata; + pmix_status_t rc; + char nspace[PMIX_MAX_NSLEN]; + + if (NULL != opalcaddy->spwncbfunc) { + rc = pmix1_convert_opalrc(status); + /* convert the jobid */ + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); + opalcaddy->spwncbfunc(rc, nspace, opalcaddy->cbdata); + } + OBJ_RELEASE(opalcaddy); +} + +static pmix_status_t server_spawn_fn(const pmix_proc_t *p, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy; + opal_process_name_t proc; + opal_pmix_app_t *app; + opal_value_t *oinfo; + size_t k, n; + int rc; + + if (NULL == host_module || NULL == host_module->spawn) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* convert the nspace/rank to an opal_process_name_t */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == p->rank) { + proc.vpid = OPAL_VPID_WILDCARD; + } else { + proc.vpid = p->rank; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->spwncbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the job info */ + for (k=0; k < ninfo; k++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(job_info[k].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &job_info[k].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* convert the apps */ + for (n=0; n < napps; n++) { + app = OBJ_NEW(opal_pmix_app_t); + opal_list_append(&opalcaddy->apps, &app->super); + if (NULL != apps[n].cmd) { + app->cmd = strdup(apps[n].cmd); + } + app->argc = apps[n].argc; + if (NULL != apps[n].argv) { + app->argv = opal_argv_copy(apps[n].argv); + } + if (NULL != apps[n].env) { + app->env = opal_argv_copy(apps[n].env); + } + app->maxprocs = apps[n].maxprocs; + for (k=0; k < apps[n].ninfo; k++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&app->info, &oinfo->super); + oinfo->key = strdup(apps[n].info[k].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &apps[n].info[k].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + } + + /* pass it up */ + rc = host_module->spawn(&proc, &opalcaddy->info, &opalcaddy->apps, opal_spncbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + + +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_namelist_t *nm; + size_t n; + opal_value_t *oinfo; + + if (NULL == host_module || NULL == host_module->connect) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->connect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + + +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + int rc; + pmix1_opalcaddy_t *opalcaddy; + opal_namelist_t *nm; + size_t n; + opal_value_t *oinfo; + + if (NULL == host_module || NULL == host_module->disconnect) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the array of pmix_proc_t to the list of procs */ + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + if (PMIX_RANK_WILDCARD == procs[n].rank) { + nm->name.vpid = OPAL_VPID_WILDCARD; + } else { + nm->name.vpid = procs[n].rank; + } + } + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->disconnect(&opalcaddy->procs, &opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opalcaddy_t *opalcaddy; + size_t n; + opal_value_t *oinfo; + int rc; + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix1_opalcaddy_t); + opalcaddy->opcbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the info */ + for (n=0; n < ninfo; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + oinfo->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix1_value_unload(oinfo, &info[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix1_convert_opalrc(rc); + } + } + + /* pass it up */ + rc = host_module->register_events(&opalcaddy->info, opal_opcbfunc, opalcaddy); + if (OPAL_SUCCESS != rc) { + OBJ_RELEASE(opalcaddy); + } + + return pmix1_convert_opalrc(rc); +} + +static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_ERR_NOT_SUPPORTED; +} + +static pmix_status_t server_notify_event(pmix_status_t code, + pmix_proc_t procs[], size_t nprocs, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_ERR_NOT_SUPPORTED; +} + +static pmix_status_t server_listener_fn(int listening_sd, + pmix_connection_cbfunc_t cbfunc) +{ + int rc; + + if (NULL == host_module || NULL == host_module->listener) { + return PMIX_ERR_NOT_SUPPORTED; + } + + rc = host_module->listener(listening_sd, cbfunc); + return pmix1_convert_opalrc(rc); +} diff --git a/opal/mca/pmix/ext20/pmix_ext20_server_south.c b/opal/mca/pmix/ext20/pmix_ext20_server_south.c new file mode 100644 index 00000000000..af37525ee1b --- /dev/null +++ b/opal/mca/pmix/ext20/pmix_ext20_server_south.c @@ -0,0 +1,457 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/dss/dss.h" +#include "opal/mca/event/event.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" +#include "opal/util/argv.h" +#include "opal/util/error.h" +#include "opal/util/output.h" +#include "opal/util/proc.h" +#include "opal/util/show_help.h" +#include "opal/mca/pmix/base/base.h" +#include "pmix_ext20.h" + +#include "pmix.h" +#include "pmix_server.h" + +/**** S.O.U.T.H.B.O.U.N.D I.N.T.E.R.F.A.C.E.S ****/ + +/* These are the interfaces used by the OMPI/ORTE/OPAL layer to call + * down into the embedded PMIx server. */ + +extern pmix_server_module_t mymodule; +extern opal_pmix_server_module_t *host_module; +static char *dbgvalue=NULL; +static size_t errhdler_ref = 0; + +#define PMIX_WAIT_FOR_COMPLETION(a) \ + do { \ + while ((a)) { \ + usleep(10); \ + } \ + } while (0) + +static void completion_handler (void *cbdata) { + bool *active = (bool *)cbdata; + *active = false; +} + +static void myerr(size_t evhdlr_registration_id, + pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_info_t info[], size_t ninfo, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + int rc; + opal_list_t plist, ilist; + opal_namelist_t *nm; + opal_value_t *iptr; + size_t n; + volatile bool active; + + /* convert the incoming status */ + rc = pmix1_convert_rc(status); + + /* convert the array of procs */ + OBJ_CONSTRUCT(&plist, opal_list_t); + for (n=0; n < nprocs; n++) { + nm = OBJ_NEW(opal_namelist_t); + nm->name.jobid = strtoul(procs[n].nspace, NULL, 10); + nm->name.vpid = procs[n].rank; + opal_list_append(&plist, &nm->super); + } + + /* convert the array of info */ + OBJ_CONSTRUCT(&ilist, opal_list_t); + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + iptr->key = strdup(info[n].key); + pmix1_value_unload(iptr, &info[n].value); + opal_list_append(&plist, &iptr->super); + } + + /* call the base errhandler */ + active = true; + opal_pmix_base_errhandler(rc, &plist, &ilist, completion_handler, (void *)&active); + PMIX_WAIT_FOR_COMPLETION(active); + + OPAL_LIST_DESTRUCT(&plist); + OPAL_LIST_DESTRUCT(&ilist); +} + +static void errreg_cbfunc (pmix_status_t status, + size_t errhandler_ref, + void *cbdata) +{ + errhdler_ref = errhandler_ref; + opal_output_verbose(5, opal_pmix_base_framework.framework_output, + "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu", + status, (unsigned long)errhandler_ref); +} + +int pmix1_server_init(opal_pmix_server_module_t *module, + opal_list_t *info) +{ + pmix_status_t rc; + int dbg; + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, n; + + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, kv); + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { + PMIX_INFO_FREE(pinfo, sz); + return pmix1_convert_rc(rc); + } + PMIX_INFO_FREE(pinfo, sz); + + /* record the host module */ + host_module = module; + + /* register the default event handler */ + PMIx_Register_event_handler(NULL, 0, NULL, 0, myerr, errreg_cbfunc, NULL); + + return OPAL_SUCCESS; +} + +int pmix1_server_finalize(void) +{ + pmix_status_t rc; + + /* deregister the default event handler */ + PMIx_Deregister_event_handler(errhdler_ref, NULL, NULL); + + rc = PMIx_server_finalize(); + return pmix1_convert_rc(rc); +} + +int pmix1_server_gen_regex(const char *input, char **regex) +{ + pmix_status_t rc; + + rc = PMIx_generate_regex(input, regex); + return pmix1_convert_rc(rc); +} + + +int pmix1_server_gen_ppn(const char *input, char **ppn) +{ + pmix_status_t rc; + + rc = PMIx_generate_ppn(input, ppn); + return pmix1_convert_rc(rc); +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix1_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + +int pmix1_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + opal_value_t *kv, *k2; + pmix_info_t *pinfo, *pmap; + size_t sz, szmap, m, n; + char nspace[PMIX_MAX_NSLEN]; + pmix_status_t rc; + pmix1_opcaddy_t *op; + opal_list_t *pmapinfo; + opal_pmix1_jobid_trkr_t *job; + + /* convert the jobid */ + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); + + /* store this job in our list of known nspaces */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); + job->jobid = jobid; + opal_list_append(&mca_pmix_ext20_component.jobids, &job->super); + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) { + pinfo[n].value.type = PMIX_INFO_ARRAY; + /* the value contains a list of values - convert + * that list to another array */ + pmapinfo = (opal_list_t*)kv->data.ptr; + szmap = opal_list_get_size(pmapinfo); + PMIX_INFO_CREATE(pmap, szmap); + pinfo[n].value.data.array.array = (struct pmix_info_t*)pmap; + pinfo[n].value.data.array.size = szmap; + m = 0; + OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { + (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pmap[m].value, k2); + ++m; + } + } else { + pmix1_value_load(&pinfo[n].value, kv); + } + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + + /* setup the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->info = pinfo; + op->sz = sz; + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz, + opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix1_convert_rc(rc); +} + +void pmix1_server_deregister_nspace(opal_jobid_t jobid) +{ + opal_pmix1_jobid_trkr_t *jptr; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + /* found it - tell the server to deregister */ + PMIx_server_deregister_nspace(jptr->nspace); + /* now get rid of it from our list */ + opal_list_remove_item(&mca_pmix_ext20_component.jobids, &jptr->super); + OBJ_RELEASE(jptr); + return; + } + } +} + +int pmix1_server_register_client(const opal_process_name_t *proc, + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_status_t rc; + pmix1_opcaddy_t *op; + + /* setup the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + /* convert the jobid */ + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); + op->p.rank = proc->vpid; + + rc = PMIx_server_register_client(&op->p, uid, gid, server_object, + opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix1_convert_rc(rc); +} + +void pmix1_server_deregister_client(const opal_process_name_t *proc) +{ + opal_pmix1_jobid_trkr_t *jptr; + pmix_proc_t p; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext20_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + /* found it - tell the server to deregister */ + (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + PMIx_server_deregister_client(&p); + return; + } + } +} + + +int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env) +{ + pmix_status_t rc; + pmix_proc_t p; + + /* convert the jobid */ + (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); + p.rank = proc->vpid; + + rc = PMIx_server_setup_fork(&p, env); + return pmix1_convert_rc(rc); +} + +/* this is the call back up from the embedded PMIx server that + * will contain the returned data. Note that the embedded server + * "owns" the data and will free it upon return from this function */ +static void dmdx_response(pmix_status_t status, char *data, size_t sz, void *cbdata) +{ + int rc; + pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; + + rc = pmix1_convert_rc(status); + if (NULL != op->mdxcbfunc) { + op->mdxcbfunc(rc, data, sz, op->cbdata, NULL, NULL); + } + OBJ_RELEASE(op); +} + +int pmix1_server_dmodex(const opal_process_name_t *proc, + opal_pmix_modex_cbfunc_t cbfunc, void *cbdata) +{ + pmix1_opcaddy_t *op; + pmix_status_t rc; + + /* setup the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->mdxcbfunc = cbfunc; + op->cbdata = cbdata; + + /* convert the jobid */ + (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); + op->p.rank = proc->vpid; + + /* find the internally-cached data for this proc */ + rc = PMIx_server_dmodex_request(&op->p, dmdx_response, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix1_convert_rc(rc); +} + +int pmix1_server_notify_error(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, psz, esz, n; + pmix_proc_t *ps, *eps; + pmix_status_t rc; + pmix1_opcaddy_t *op; + opal_namelist_t *nm; + + /* convert the list of procs */ + if (NULL != procs) { + psz = opal_list_get_size(procs); + PMIX_PROC_CREATE(ps, psz); + n = 0; + OPAL_LIST_FOREACH(nm, procs, opal_namelist_t) { + (void)opal_snprintf_jobid(ps[n].nspace, PMIX_MAX_NSLEN, nm->name.jobid); + ps[n].rank = (int)nm->name.vpid; + ++n; + } + } else { + psz = 0; + ps = NULL; + } + if (NULL != error_procs) { + esz = opal_list_get_size(error_procs); + PMIX_PROC_CREATE(eps, esz); + n = 0; + OPAL_LIST_FOREACH(nm, error_procs, opal_namelist_t) { + (void)opal_snprintf_jobid(eps[n].nspace, PMIX_MAX_NSLEN, nm->name.jobid); + eps[n].rank = (int)nm->name.vpid; + ++n; + } + } else { + esz = 0; + eps = NULL; + } + + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, kv); + } + } else { + sz = 0; + pinfo = NULL; + } + + /* setup the caddy */ + op = OBJ_NEW(pmix1_opcaddy_t); + op->procs = ps; + op->nprocs = psz; + op->error_procs = eps; + op->nerror_procs = esz; + op->info = pinfo; + op->sz = sz; + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + rc = pmix1_convert_opalrc(status); + rc = PMIx_Notify_event(rc, ps, psz, eps, esz, + pinfo, sz, opcbfunc, op); + if (PMIX_SUCCESS != rc) { + OBJ_RELEASE(op); + } + return pmix1_convert_rc(rc); +} diff --git a/opal/mca/pmix/pmix_server.h b/opal/mca/pmix/pmix_server.h index 28d6c4fdc46..e7e73ff8c33 100644 --- a/opal/mca/pmix/pmix_server.h +++ b/opal/mca/pmix/pmix_server.h @@ -162,17 +162,25 @@ typedef int (*opal_pmix_server_disconnect_fn_t)(opal_list_t *procs, opal_list_t * manager may have access to events beyond process failure. In cases where * the client application requests to be notified of such events, the request * will be passed to the PMIx server, which in turn shall pass the request to - * the resource manager. The list of opal_value_t will describe the - * desired events */ + * the resource manager. The list of opal_value_t will provide the OPAL + * error codes corresponding to the desired events */ typedef int (*opal_pmix_server_register_events_fn_t)(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -/* Deregister from the specified events. */ +/* Deregister from the specified events. The list of opal_value_t will provide the OPAL + * error codes corresponding to the desired events */ typedef int (*opal_pmix_server_deregister_events_fn_t)(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Notify the specified processes of an event generated either by + * the PMIx server itself, or by one of its local clients. The RTE + * is requested to pass the notification to each PMIx server that + * hosts one or more of the specified processes */ +typedef int (*opal_pmix_server_notify_fn_t)(int code, opal_list_t *procs, opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + /* Callback function for incoming connection requests from * local clients */ typedef void (*opal_pmix_connection_cbfunc_t)(int incoming_sd); @@ -204,6 +212,7 @@ typedef struct opal_pmix_server_module_1_0_0_t { opal_pmix_server_disconnect_fn_t disconnect; opal_pmix_server_register_events_fn_t register_events; opal_pmix_server_deregister_events_fn_t deregister_events; + opal_pmix_server_notify_fn_t notify_event; opal_pmix_server_listener_fn_t listener; } opal_pmix_server_module_t;