diff --git a/.gitignore b/.gitignore index b3524bc3ff3..f5fbba37f6a 100644 --- a/.gitignore +++ b/.gitignore @@ -306,6 +306,7 @@ opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h opal/mca/pmix/pmix112/pmix/include/private/autogen/config.h opal/mca/pmix/pmix112/pmix/include/private/autogen/config.h.in opal/mca/pmix/pmix112/pmix/include/pmix/autogen/config.h.in +opal/mca/pmix/pmix112/pmix/include/pmix_version.h opal/tools/opal-checkpoint/opal-checkpoint opal/tools/opal-checkpoint/opal-checkpoint.1 diff --git a/config/ompi_find_mpi_aint_count_offset.m4 b/config/ompi_find_mpi_aint_count_offset.m4 index b84dad355c6..d16bcc79766 100644 --- a/config/ompi_find_mpi_aint_count_offset.m4 +++ b/config/ompi_find_mpi_aint_count_offset.m4 @@ -17,6 +17,7 @@ # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -29,7 +30,7 @@ AC_DEFUN([OMPI_FIND_MPI_AINT_COUNT_OFFSET],[ _OMPI_FIND_MPI_COUNT_TYPE _OMPI_FIND_MPI_OFFSET_TYPE - if test "$ompi_fortran_happy" == "1" && \ + if test "$ompi_fortran_happy" = "1" && \ test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS"; then _OMPI_FIND_MPI_INTEGER_KIND _OMPI_FIND_MPI_ADDRESS_KIND diff --git a/config/opal_check_cray_pmi.m4 b/config/opal_check_cray_pmi.m4 index 9789aba050a..8e3dfee58f3 100644 --- a/config/opal_check_cray_pmi.m4 +++ b/config/opal_check_cray_pmi.m4 @@ -13,7 +13,7 @@ dnl All rights reserved. dnl Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights dnl reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. dnl Copyright (c) 2014-2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ @@ -50,7 +50,7 @@ AC_DEFUN([OPAL_CHECK_CRAY_PMI_EXPLICIT],[ # this logic assumes knowledge about all the dependencies of the Cray PMI library, # something that Cray doesn't generally document # - AS_IF([test "$enable_static" == "yes"], + AS_IF([test "$enable_static" = "yes"], [AS_IF([test -d /usr/lib/alps], [AC_MSG_RESULT([Detected presense of /usr/lib/alps]) CRAY_PMI_LDFLAGS="$CRAY_PMI_LDFLAGS -L/usr/lib/alps -lalpslli -lalpsutil" diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index c839a23786b..bb1df6453cf 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -13,9 +13,10 @@ # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. # Copyright (c) 2014-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -226,11 +227,7 @@ AC_DEFUN([OPAL_CHECK_PMI],[ AC_DEFUN([OPAL_CHECK_PMIX],[ - opal_pmix_ext_CPPFLAGS= - opal_pmix_ext_LDFLAGS= - opal_pmix_ext_LIBS= - - OPAL_VAR_SCOPE_PUSH([pmix_ext_install_dir]) + OPAL_VAR_SCOPE_PUSH([opal_external_pmix_save_CPPFLAGS opal_external_pmix_save_LDFLAGS opal_external_pmix_save_LIBS]) AC_ARG_WITH([pmix], [AC_HELP_STRING([--with-pmix(=DIR)], @@ -245,30 +242,93 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AC_MSG_CHECKING([if user requested external PMIx support($with_pmix)]) AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"], [AC_MSG_RESULT([no]) - opal_external_pmix_happy="no"], + opal_external_pmix_happy=no], + [AC_MSG_RESULT([yes]) # check for external pmix lib */ AS_IF([test "$with_pmix" = "external"], [pmix_ext_install_dir=/usr], [pmix_ext_install_dir=$with_pmix]) - AC_MSG_CHECKING([if external component can be used]) - OPAL_CHECK_PACKAGE([opal_pmix_ext], - [pmix.h], - [pmix], - [PMIx_Init], - [], - [$pmix_ext_install_dir], - [], - [AC_MSG_RESULT([PMIx external support will be built]) - opal_external_pmix_happy=yes], - [AC_MSG_RESULT([no]) - AC_MSG_WARN([External PMIx support was requested but failed]) - AC_MSG_WARN([as explained above.]) - AC_MSG_ERROR([Cannot continue])]) - ]) - AC_SUBST(opal_pmix_ext_CPPFLAGS) - AC_SUBST(opal_pmix_ext_LDFLAGS) - AC_SUBST(opal_pmix_ext_LIBS) + + # Make sure we have the headers and libs in the correct location + OPAL_CHECK_WITHDIR([external-pmix], [$pmix_ext_install_dir/include], [pmix.h]) + OPAL_CHECK_WITHDIR([external-libpmix], [$pmix_ext_install_dir/lib], [libpmix.*]) + + # check the version + opal_external_pmix_save_CPPFLAGS=$CPPFLAGS + opal_external_pmix_save_LDFLAGS=$LDFLAGS + opal_external_pmix_save_LIBS=$LIBS + + # if the pmix_version.h file does not exist, then + # this must be from a pre-1.1.5 version + AC_MSG_CHECKING([PMIx version]) + CPPFLAGS="-I$pmix_ext_install_dir/include $CPPFLAGS" + AS_IF([test "x`ls $pmix_ext_install_dir/include/pmix_version.h 2> /dev/null`" = "x"], + [AC_MSG_RESULT([version file not found - assuming v1.1.4]) + opal_external_pmix_version_found=1 + opal_external_pmix_version=114], + [AC_MSG_RESULT([version file found]) + opal_external_pmix_version_found=0]) + + # if it does exist, then we need to parse it to find + # the actual release series + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 3x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 3L) + #error "not version 3" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=3X + opal_external_pmix_version_found=1 + AC_MSG_WARN([This version of Open MPI does not support PMIx version 2.x and later]) + AC_MSG_ERROR([Cannot continue])], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 2x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 2L) + #error "not version 2" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=2X + opal_external_pmix_version_found=1 + AC_MSG_WARN([This version of Open MPI does not support PMIx version 2.x and later]) + AC_MSG_ERROR([Cannot continue])], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "$opal_external_pmix_version_found" = "0"], + [AC_MSG_CHECKING([version 1x]) + AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #include + #if (PMIX_VERSION_MAJOR != 1L) + #error "not version 1" + #endif + ], [])], + [AC_MSG_RESULT([found]) + opal_external_pmix_version=1X + opal_external_pmix_version_found=1], + [AC_MSG_RESULT([not found])])]) + + AS_IF([test "x$opal_external_pmix_version" = "x"], + [AC_MSG_WARN([External PMIx support requested, but version]) + AC_MSG_WARN([information of the external lib could not]) + AC_MSG_WARN([be detected]) + AC_MSG_ERROR([cannot continue])]) + + CPPFLAGS=$opal_external_pmix_save_CPPFLAGS + LDFLAGS=$opal_external_pmix_save_LDFLAGS + LIBS=$opal_external_pmix_save_LIBS + + opal_external_pmix_CPPFLAGS="-I$pmix_ext_install_dir/include" + opal_external_pmix_LDFLAGS=-L$pmix_ext_install_dir/lib + opal_external_pmix_LIBS=-lpmix + opal_external_pmix_happy=yes]) OPAL_VAR_SCOPE_POP ]) diff --git a/configure.ac b/configure.ac index 689c2430eb9..eefecef2b1a 100644 --- a/configure.ac +++ b/configure.ac @@ -253,6 +253,7 @@ m4_ifdef([project_oshmem], OPAL_CONFIGURE_OPTIONS OPAL_CHECK_OS_FLAVORS OPAL_CHECK_CUDA +OPAL_CHECK_PMIX m4_ifdef([project_orte], [ORTE_CONFIGURE_OPTIONS]) m4_ifdef([project_ompi], [OMPI_CONFIGURE_OPTIONS]) m4_ifdef([project_oshmem], [OSHMEM_CONFIGURE_OPTIONS]) diff --git a/opal/mca/pmix/external/Makefile.am b/opal/mca/pmix/external/Makefile.am index 770850c6e3f..2b56cc619fc 100644 --- a/opal/mca/pmix/external/Makefile.am +++ b/opal/mca/pmix/external/Makefile.am @@ -34,15 +34,15 @@ mcacomponentdir = $(opallibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_pmix_external_la_SOURCES = $(sources) mca_pmix_external_la_CFLAGS = -mca_pmix_external_la_CPPFLAGS = $(opal_pmix_ext_CPPFLAGS) -mca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext_LDFLAGS) -mca_pmix_external_la_LIBADD = $(opal_pmix_ext_LIBS) \ +mca_pmix_external_la_CPPFLAGS = $(opal_pmix_external_CPPFLAGS) +mca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_external_LDFLAGS) +mca_pmix_external_la_LIBADD = $(opal_pmix_external_LIBS) \ $(OPAL_TOP_BUILDDIR)/opal/mca/event/lib@OPAL_LIB_PREFIX@mca_event.la \ $(OPAL_TOP_BUILDDIR)/opal/mca/hwloc/lib@OPAL_LIB_PREFIX@mca_hwloc.la noinst_LTLIBRARIES = $(component_noinst) libmca_pmix_external_la_SOURCES =$(sources) libmca_pmix_external_la_CFLAGS = -libmca_pmix_external_la_CPPFLAGS = $(opal_pmix_ext_CPPFLAGS) -libmca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_ext_LDFLAGS) -libmca_pmix_external_la_LIBADD = $(opal_pmix_ext_LIBS) +libmca_pmix_external_la_CPPFLAGS = $(opal_pmix_external_CPPFLAGS) +libmca_pmix_external_la_LDFLAGS = -module -avoid-version $(opal_pmix_external_LDFLAGS) +libmca_pmix_external_la_LIBADD = $(opal_pmix_external_LIBS) diff --git a/opal/mca/pmix/external/configure.m4 b/opal/mca/pmix/external/configure.m4 index 779be3431c7..df3b022a51c 100644 --- a/opal/mca/pmix/external/configure.m4 +++ b/opal/mca/pmix/external/configure.m4 @@ -18,6 +18,7 @@ # and Technology (RIST). All rights reserved. # Copyright (c) 2014-2015 Mellanox Technologies, Inc. # All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -30,16 +31,41 @@ AC_DEFUN([MCA_opal_pmix_external_CONFIG],[ AC_CONFIG_FILES([opal/mca/pmix/external/Makefile]) - AC_REQUIRE([OPAL_CHECK_PMIX]) - AS_IF([test "$opal_external_pmix_happy" = "yes"], - [AS_IF([test "$opal_event_external_want" != "yes" || test "$opal_hwloc_external_support" != "yes"], + [AS_IF([test "$opal_event_external_support" != "yes"], [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) - AC_MSG_WARN([AND EXTERNAL HWLOC LIBRARIES. THESE LIBRARIES MUST POINT]) - AC_MSG_WARN([TO THE SAME ONES USED TO BUILD PMIX OR ELSE UNPREDICTABLE]) - AC_MSG_WARN([BEHAVIOR MAY RESULT]) + AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + AS_IF([test "$opal_hwloc_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL HWLOC]) + AC_MSG_WARN([LIBRARY THIS LIBRARY MUST POINT TO THE SAME ONE USED ]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) - external_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi/$opal_pmix_external_basedir/pmix -I${includedir}/openmpi/$opal_pmix_external_basedir/pmix/include' - $1], + + # check for the 1.1.4 version + AC_MSG_CHECKING([if external component is version 1.1.4 or compatible]) + AS_IF([test "$opal_external_pmix_version" = "11" || + test "$opal_external_pmix_version" = "114" || + test "$opal_external_pmix_version" = "1X"], + [AC_MSG_RESULT([yes]) + opal_pmix_external_11_happy=yes], + [AC_MSG_RESULT([no]) + opal_pmix_external_11_happy=no]) + + AS_IF([test "$opal_pmix_external_11_happy" = "yes"], + [$1 + # need to set the wrapper flags for static builds + pmix_external_WRAPPER_EXTRA_LDFLAGS=$opal_external_pmix_LDFLAGS + pmix_external_WRAPPER_EXTRA_LIBS=$opal_external_pmix_LIBS], + [$2])], [$2]) + + opal_pmix_external_CPPFLAGS=$opal_external_pmix_CPPFLAGS + opal_pmix_external_LDFLAGS=$opal_external_pmix_LDFLAGS + opal_pmix_external_LIBS=$opal_external_pmix_LIBS + + AC_SUBST([opal_pmix_external_CPPFLAGS]) + AC_SUBST([opal_pmix_external_LDFLAGS]) + AC_SUBST([opal_pmix_external_LIBS]) ])dnl diff --git a/opal/mca/pmix/pmix112/configure.m4 b/opal/mca/pmix/pmix112/configure.m4 index 517d1b7a225..b2ea209dd53 100644 --- a/opal/mca/pmix/pmix112/configure.m4 +++ b/opal/mca/pmix/pmix112/configure.m4 @@ -16,6 +16,7 @@ # Copyright (c) 2013-2015 Intel, Inc. All rights reserved. # Copyright (c) 2015-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -38,7 +39,19 @@ AC_DEFUN([MCA_opal_pmix_pmix112_CONFIG],[ opal_pmix_pmix112_save_LDFLAGS=$LDFLAGS opal_pmix_pmix112_save_LIBS=$LIBS - opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" + AC_ARG_ENABLE([pmix-dstore], + [AC_HELP_STRING([--enable-pmix-dstore], + [Enable PMIx shared memory data store (default: disabled)])]) + AC_MSG_CHECKING([if PMIx shared memory data store is enabled]) + if test "$enable_pmix3_dstore" == "yes"; then + AC_MSG_RESULT([yes]) + opal_pmix_pmix_sm_flag=--enable-dstore + else + AC_MSG_RESULT([no (disabled)]) + opal_pmix_pmix_sm_flag=--disable-dstore + fi + + opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ $opal_pmix_pmix_sm_flag --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" AS_IF([test "$enable_debug" = "yes"], [opal_pmix_pmix112_args="--enable-debug $opal_pmix_pmix112_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], diff --git a/opal/mca/pmix/pmix112/pmix/Makefile.am b/opal/mca/pmix/pmix112/pmix/Makefile.am index 7ca274d258c..500240b5916 100644 --- a/opal/mca/pmix/pmix112/pmix/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2015 Intel, Inc. All rights reserved +# Copyright (c) 2013-2016 Intel, Inc. All rights reserved # $COPYRIGHT$ # # Additional copyrights may follow @@ -34,6 +34,7 @@ dist_pmixdata_DATA = if ! PMIX_EMBEDDED_MODE dist_pmixdata_DATA += contrib/pmix-valgrind.supp + man_MANS = \ man/man3/pmix_init.3 \ man/man3/pmix_finalize.3 \ @@ -59,6 +60,11 @@ include src/server/Makefile.am include src/sec/Makefile.am include src/common/Makefile.am +if WANT_DSTORE +include src/sm/Makefile.am +include src/dstore/Makefile.am +endif + if PMIX_EMBEDDED_MODE noinst_LTLIBRARIES = libpmix.la libpmix_la_SOURCES = $(headers) $(sources) diff --git a/opal/mca/pmix/pmix112/pmix/NEWS b/opal/mca/pmix/pmix112/pmix/NEWS index 9e05bc788c5..16b4ad8f86c 100644 --- a/opal/mca/pmix/pmix112/pmix/NEWS +++ b/opal/mca/pmix/pmix112/pmix/NEWS @@ -1,4 +1,5 @@ Copyright (c) 2015-2016 Intel, Inc. All rights reserved. +Copyright (c) 2016 IBM Corporation. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -20,8 +21,27 @@ example, a bug might be fixed in the master, and then moved to the current release as well as the "stable" bug fix release branch. -Master (not on release branches yet) ------------------------------------- +1.2.0 +----- +- Add shared memory data storage (dstore) option. Default: enabled + Configure option: --disable-dstore +- PMIx_Commit performance improvements +- Disable errhandler support +- Keep job info in the shared memory dstore +- PMIx_Get performance and memory improvements + + + +1.1.5 +----- +- Add pmix_version.h to support direct detection of PMIx library version +- Fix support for Solaris 10 by using abstract version of strnlen +- Fix native security module for Solaris by using getpeerucred in + that environment +- Ensure man pages don't get installed in embedded builds +- Pass temporary directory locations in info keys instead of + the environment + 1.1.4 ----- diff --git a/opal/mca/pmix/pmix112/pmix/VERSION b/opal/mca/pmix/pmix112/pmix/VERSION index a0b5894235b..1bda44c1099 100644 --- a/opal/mca/pmix/pmix112/pmix/VERSION +++ b/opal/mca/pmix/pmix112/pmix/VERSION @@ -4,6 +4,7 @@ # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. # This is the VERSION file for PMIx, describing the precise # version of PMIx in this distribution. The various components of @@ -14,8 +15,8 @@ # ... major=1 -minor=1 -release=5 +minor=2 +release=0 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -23,14 +24,14 @@ release=5 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek=a1 +greek= # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git9ae61b8 +repo_rev=git33736edb # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +45,7 @@ tarball_version= # The date when this release was created -date="Aug 23, 2016" +date="Dec 14, 2016" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix112/pmix/config/pmix.m4 b/opal/mca/pmix/pmix112/pmix/config/pmix.m4 index 45cf99a12f9..87637bfbc4f 100644 --- a/opal/mca/pmix/pmix112/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix112/pmix/config/pmix.m4 @@ -17,8 +17,9 @@ dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. -dnl Copyright (c) 2015-2016 Research Organization for Information Science dnl Copyright (c) 2013-2016 Intel, Inc. All rights reserved +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2016 Mellanox Technologies, Inc. dnl All rights reserved. dnl Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -85,7 +86,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_MSG_RESULT([$PMIX_VERSION]) # Save the breakdown the version information - AC_MSG_CHECKING([for pmix major version]) PMIX_MAJOR_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --major`" if test "$?" != "0"; then AC_MSG_ERROR([Cannot continue]) @@ -94,7 +94,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_DEFINE_UNQUOTED([PMIX_MAJOR_VERSION], ["$PMIX_MAJOR_VERSION"], [The library major version is always available, contrary to VERSION]) - AC_MSG_CHECKING([for pmix minor version]) PMIX_MINOR_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --minor`" if test "$?" != "0"; then AC_MSG_ERROR([Cannot continue]) @@ -103,7 +102,12 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_DEFINE_UNQUOTED([PMIX_MINOR_VERSION], ["$PMIX_MINOR_VERSION"], [The library minor version is always available, contrary to VERSION]) - AC_MSG_CHECKING([for pmix release version]) + pmixmajor=${PMIX_MAJOR_VERSION}L + pmixminor=${PMIX_MINOR_VERSION}L + AC_SUBST(pmixmajor) + AC_SUBST(pmixminor) + AC_CONFIG_FILES(pmix_config_prefix[include/pmix_version.h]) + PMIX_RELEASE_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --release`" if test "$?" != "0"; then AC_MSG_ERROR([Cannot continue]) @@ -318,7 +322,8 @@ AC_DEFUN([PMIX_SETUP_CORE],[ time.h unistd.h \ crt_externs.h signal.h \ ioLib.h sockLib.h hostLib.h limits.h \ - ucred.h]) + sys/statfs.h sys/statvfs.h \ + netdb.h ucred.h]) # Note that sometimes we have , but it doesn't work (e.g., # have both Portland and GNU installed; using pgcc will find GNU's @@ -495,7 +500,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib PMIX_SEARCH_LIBS_CORE([ceil], [m]) - AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep getpeereid getpeerucred strnlen]) + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen]) # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get # confused. On others, it's in the standard library, but stubbed with @@ -753,6 +758,26 @@ AC_DEFINE_UNQUOTED([PMIX_WANT_PRETTY_PRINT_STACKTRACE], [$WANT_PRETTY_PRINT_STACKTRACE], [if want pretty-print stack trace feature]) +# +# Do we want the shared memory datastore usage? +# + +AC_MSG_CHECKING([if want shared memory datastore]) +AC_ARG_ENABLE([dstore], + [AC_HELP_STRING([--disable-dstore], + [Using shared memory datastore (default: enabled)])]) +if test "$enable_dstore" == "no" ; then + AC_MSG_RESULT([no]) + WANT_DSTORE=0 +else + AC_MSG_RESULT([yes]) + WANT_DSTORE=1 +fi +AC_DEFINE_UNQUOTED([PMIX_ENABLE_DSTORE], + [$WANT_DSTORE], + [if want shared memory dstore feature]) +AM_CONDITIONAL([WANT_DSTORE],[test "x$enable_dstore" != "xno"]) + # # Ident string # @@ -810,6 +835,7 @@ AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[ AM_CONDITIONAL([PMIX_COMPILE_TIMING], [test "$WANT_TIMING" = "1"]) AM_CONDITIONAL([PMIX_WANT_MUNGE], [test "$pmix_munge_support" = "1"]) AM_CONDITIONAL([PMIX_WANT_SASL], [test "$pmix_sasl_support" = "1"]) + AM_CONDITIONAL([WANT_DSTORE],[test "x$enable_dstore" != "xno"]) ]) pmix_did_am_conditionals=yes ])dnl diff --git a/opal/mca/pmix/pmix112/pmix/examples/dynamic.c b/opal/mca/pmix/pmix112/pmix/examples/dynamic.c index 195c130e623..3da66d4302c 100644 --- a/opal/mca/pmix/pmix112/pmix/examples/dynamic.c +++ b/opal/mca/pmix/pmix112/pmix/examples/dynamic.c @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -45,7 +46,7 @@ int main(int argc, char **argv) uint32_t nprocs; char nsp2[PMIX_MAX_NSLEN+1]; pmix_app_t *app; - char hostname[PMIX_MAXHOSTNAMELEN], dir[1024]; + char hostname[MAXHOSTNAMELEN], dir[1024]; pmix_proc_t *peers; size_t npeers, ntmp=0; char *nodelist; diff --git a/opal/mca/pmix/pmix112/pmix/include/Makefile.am b/opal/mca/pmix/pmix112/pmix/include/Makefile.am index 9bcdf36c97d..9e774defd4c 100644 --- a/opal/mca/pmix/pmix112/pmix/include/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/include/Makefile.am @@ -17,11 +17,11 @@ include_HEADERS = \ include/pmix.h \ include/pmix_server.h \ include/pmi.h \ - include/pmi2.h + include/pmi2.h \ + include/pmix_version.h.in include_pmixdir = $(includedir)/pmix include_pmix_HEADERS = \ - include/pmix/rename.h \ include/pmix/pmix_common.h include_pmix_autogendir = $(includedir)/pmix/autogen @@ -32,4 +32,7 @@ include_pmix_autogen_HEADERS = \ nodist_include_pmix_autogen_HEADERS = \ include/pmix/autogen/config.h +nodist_include_HEADERS = \ + include/pmix_version.h + endif ! PMIX_EMBEDDED_MODE diff --git a/opal/mca/pmix/pmix112/pmix/include/pmix.h b/opal/mca/pmix/pmix112/pmix/include/pmix.h index 5c879eacc23..8a1da23ce27 100644 --- a/opal/mca/pmix/pmix112/pmix/include/pmix.h +++ b/opal/mca/pmix/pmix112/pmix/include/pmix.h @@ -46,16 +46,13 @@ #ifndef PMIx_H #define PMIx_H -#include - -/* Symbol transforms */ -#include - /* Structure and constant definitions */ #include -BEGIN_C_DECLS +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif /**** PMIX API ****/ @@ -391,5 +388,8 @@ pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace, * when done with it */ pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist); -END_C_DECLS +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + #endif diff --git a/opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h b/opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h index b3200eaded2..853e8849c63 100644 --- a/opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h +++ b/opal/mca/pmix/pmix112/pmix/include/pmix/pmix_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -46,9 +46,6 @@ #ifndef PMIx_COMMON_H #define PMIx_COMMON_H -#include -#include - #include #include #include @@ -56,8 +53,11 @@ #ifdef HAVE_SYS_TIME_H #include /* for struct timeval */ #endif +#include -BEGIN_C_DECLS +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif /**** PMIX CONSTANTS ****/ @@ -92,6 +92,7 @@ BEGIN_C_DECLS /* identification attributes */ #define PMIX_USERID "pmix.euid" // (uint32_t) effective user id #define PMIX_GRPID "pmix.egid" // (uint32_t) effective group id +#define PMIX_DSTPATH "pmix.dstpath" // (char*) path to dstore files /* attributes for the rendezvous socket */ #define PMIX_SOCKET_MODE "pmix.sockmode" // (uint32_t) POSIX mode_t (9 bits valid) @@ -118,7 +119,7 @@ BEGIN_C_DECLS #define PMIX_NPROC_OFFSET "pmix.offset" // (uint32_t) starting global rank of this job #define PMIX_LOCAL_RANK "pmix.lrank" // (uint16_t) rank on this node within this job #define PMIX_NODE_RANK "pmix.nrank" // (uint16_t) rank on this node spanning all jobs -#define PMIX_LOCALLDR "pmix.lldr" // (uint64_t) opal_identifier of lowest rank on this node within this job +#define PMIX_LOCALLDR "pmix.lldr" // (uint32_t) opal_identifier of lowest rank on this node within this job #define PMIX_APPLDR "pmix.aldr" // (uint32_t) lowest rank in this app within this job /* proc location-related info */ @@ -983,5 +984,8 @@ pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, #define PMIX_VAL_FREE(_v) \ PMIx_free_value_data(_v) -END_C_DECLS +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + #endif diff --git a/opal/mca/pmix/pmix112/pmix/include/pmix_server.h b/opal/mca/pmix/pmix112/pmix/include/pmix_server.h index b2fdacd929f..4d6a358340e 100644 --- a/opal/mca/pmix/pmix112/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix112/pmix/include/pmix_server.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -59,15 +59,12 @@ #ifndef PMIx_SERVER_API_H #define PMIx_SERVER_API_H -#include - -/* Symbol transforms */ -#include - /* Structure and constant definitions */ #include -BEGIN_C_DECLS +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif /**** SERVER FUNCTION-SHIPPED APIs ****/ /* NOTE: for performance purposes, the host server is required to @@ -420,6 +417,8 @@ pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, pmix_dmodex_response_fn_t cbfunc, void *cbdata); -END_C_DECLS +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif #endif diff --git a/opal/mca/pmix/pmix112/pmix/include/pmix_version.h.in b/opal/mca/pmix/pmix112/pmix/include/pmix_version.h.in new file mode 100644 index 00000000000..096b8955820 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/include/pmix_version.h.in @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#ifndef PMIx_VERSION_H +#define PMIx_VERSION_H + +/* define PMIx version */ +#define PMIX_VERSION_MAJOR @pmixmajor@ +#define PMIX_VERSION_MINOR @pmixminor@ + +#endif diff --git a/opal/mca/pmix/pmix112/pmix/src/class/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/class/Makefile.am index a173ff46d97..d40457bcfb9 100644 --- a/opal/mca/pmix/pmix112/pmix/src/class/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/src/class/Makefile.am @@ -26,10 +26,12 @@ headers += \ src/class/pmix_object.h \ src/class/pmix_list.h \ src/class/pmix_pointer_array.h \ - src/class/pmix_hash_table.h + src/class/pmix_hash_table.h \ + src/class/pmix_value_array.h sources += \ src/class/pmix_object.c \ src/class/pmix_list.c \ src/class/pmix_pointer_array.c \ - src/class/pmix_hash_table.c + src/class/pmix_hash_table.c \ + src/class/pmix_value_array.c diff --git a/opal/mca/pmix/pmix112/pmix/src/class/pmix_value_array.c b/opal/mca/pmix/pmix112/pmix/src/class/pmix_value_array.c new file mode 100644 index 00000000000..f46e494c381 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/class/pmix_value_array.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include "src/class/pmix_value_array.h" + + +static void pmix_value_array_construct(pmix_value_array_t* array) +{ + array->array_items = NULL; + array->array_size = 0; + array->array_item_sizeof = 0; + array->array_alloc_size = 0; +} + +static void pmix_value_array_destruct(pmix_value_array_t* array) +{ + if (NULL != array->array_items) + free(array->array_items); +} + +PMIX_CLASS_INSTANCE( + pmix_value_array_t, + pmix_object_t, + pmix_value_array_construct, + pmix_value_array_destruct +); + + +int pmix_value_array_set_size(pmix_value_array_t* array, size_t size) +{ +#if PMIX_ENABLE_DEBUG + if(array->array_item_sizeof == 0) { + pmix_output(0, "pmix_value_array_set_size: item size must be initialized"); + return PMIX_ERR_BAD_PARAM; + } +#endif + + if(size > array->array_alloc_size) { + while(array->array_alloc_size < size) + array->array_alloc_size <<= 1; + array->array_items = (unsigned char *)realloc(array->array_items, + array->array_alloc_size * array->array_item_sizeof); + if (NULL == array->array_items) + return PMIX_ERR_OUT_OF_RESOURCE; + } + array->array_size = size; + return PMIX_SUCCESS; +} + diff --git a/opal/mca/pmix/pmix112/pmix/src/class/pmix_value_array.h b/opal/mca/pmix/pmix112/pmix/src/class/pmix_value_array.h new file mode 100644 index 00000000000..d78197bf5d6 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/class/pmix_value_array.h @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_VALUE_ARRAY_H +#define PMIX_VALUE_ARRAY_H + +#include + +#include +#ifdef HAVE_STRINGS_H +#include +#endif /* HAVE_STRINGS_H */ + +#include "src/class/pmix_object.h" +#if PMIX_ENABLE_DEBUG +#include "src/util/output.h" +#endif +#include + +BEGIN_C_DECLS + +/* + * @file Array of elements maintained by value. + */ + +struct pmix_value_array_t +{ + pmix_object_t super; + unsigned char* array_items; + size_t array_item_sizeof; + size_t array_size; + size_t array_alloc_size; +}; +typedef struct pmix_value_array_t pmix_value_array_t; + +PMIX_CLASS_DECLARATION(pmix_value_array_t); + +/** + * Initialize the array to hold items by value. This routine must + * be called prior to using the array. + * + * @param array The array to initialize (IN). + * @param item_size The sizeof each array element (IN). + * @return PMIX error code + * + * Note that there is no corresponding "finalize" function -- use + * OBJ_DESTRUCT (for stack arrays) or OBJ_RELEASE (for heap arrays) to + * delete it. + */ + +static inline int pmix_value_array_init(pmix_value_array_t *array, size_t item_sizeof) +{ + array->array_item_sizeof = item_sizeof; + array->array_alloc_size = 1; + array->array_size = 0; + array->array_items = (unsigned char*)realloc(array->array_items, item_sizeof * array->array_alloc_size); + return (NULL != array->array_items) ? PMIX_SUCCESS : PMIX_ERR_OUT_OF_RESOURCE; +} + + +/** + * Reserve space in the array for new elements, but do not change the size. + * + * @param array The input array (IN). + * @param size The anticipated size of the array (IN). + * @return PMIX error code. + */ + +static inline int pmix_value_array_reserve(pmix_value_array_t* array, size_t size) +{ + if(size > array->array_alloc_size) { + array->array_items = (unsigned char*)realloc(array->array_items, array->array_item_sizeof * size); + if(NULL == array->array_items) { + array->array_size = 0; + array->array_alloc_size = 0; + return PMIX_ERR_OUT_OF_RESOURCE; + } + array->array_alloc_size = size; + } + return PMIX_SUCCESS; +} + + + +/** + * Retreives the number of elements in the array. + * + * @param array The input array (IN). + * @return The number of elements currently in use. + */ + +static inline size_t pmix_value_array_get_size(pmix_value_array_t* array) +{ + return array->array_size; +} + + +/** + * Set the number of elements in the array. + * + * @param array The input array (IN). + * @param size The new array size. + * + * @return PMIX error code. + * + * Note that resizing the array to a smaller size may not change + * the underlying memory allocated by the array. However, setting + * the size larger than the current allocation will grow it. In either + * case, if the routine is successful, pmix_value_array_get_size() will + * return the new size. + */ + +int pmix_value_array_set_size(pmix_value_array_t* array, size_t size); + + +/** + * Macro to retrieve an item from the array by value. + * + * @param array The input array (IN). + * @param item_type The C datatype of the array item (IN). + * @param item_index The array index (IN). + * + * @returns item The requested item. + * + * Note that this does not change the size of the array - this macro is + * strictly for performance - the user assumes the responsibility of + * ensuring the array index is valid (0 <= item index < array size). + */ + +#define PMIX_VALUE_ARRAY_GET_ITEM(array, item_type, item_index) \ + ((item_type*)((array)->array_items))[item_index] + +/** + * Retrieve an item from the array by reference. + * + * @param array The input array (IN). + * @param item_index The array index (IN). + * + * @return ptr Pointer to the requested item. + * + * Note that if the specified item_index is larger than the current + * array size, the array is grown to satisfy the request. + */ + +static inline void* pmix_value_array_get_item(pmix_value_array_t *array, size_t item_index) +{ + if(item_index >= array->array_size && pmix_value_array_set_size(array, item_index+1) != PMIX_SUCCESS) + return NULL; + return array->array_items + (item_index * array->array_item_sizeof); +} + +/** + * Macro to set an array element by value. + * + * @param array The input array (IN). + * @param item_type The C datatype of the array item (IN). + * @param item_index The array index (IN). + * @param item_value The new value for the specified index (IN). + * + * Note that this does not change the size of the array - this macro is + * strictly for performance - the user assumes the responsibility of + * ensuring the array index is valid (0 <= item index < array size). + * + * It is safe to free the item after returning from this call; it is + * copied into the array by value. + */ + +#define PMIX_VALUE_ARRAY_SET_ITEM(array, item_type, item_index, item_value) \ + (((item_type*)((array)->array_items))[item_index] = item_value) + +/** + * Set an array element by value. + * + * @param array The input array (IN). + * @param item_index The array index (IN). + * @param item_value A pointer to the item, which is copied into + * the array. + * + * @return PMIX error code. + * + * It is safe to free the item after returning from this call; it is + * copied into the array by value. + */ + +static inline int pmix_value_array_set_item(pmix_value_array_t *array, size_t item_index, const void* item) +{ + int rc; + if(item_index >= array->array_size && + (rc = pmix_value_array_set_size(array, item_index+1)) != PMIX_SUCCESS) + return rc; + memcpy(array->array_items + (item_index * array->array_item_sizeof), item, array->array_item_sizeof); + return PMIX_SUCCESS; +} + + +/** + * Appends an item to the end of the array. + * + * @param array The input array (IN). + * @param item A pointer to the item to append, which is copied + * into the array. + * + * @return PMIX error code + * + * This will grow the array if it is not large enough to contain the + * item. It is safe to free the item after returning from this call; + * it is copied by value into the array. + */ + +static inline int pmix_value_array_append_item(pmix_value_array_t *array, const void *item) +{ + return pmix_value_array_set_item(array, array->array_size, item); +} + + +/** + * Remove a specific item from the array. + * + * @param array The input array (IN). + * @param item_index The index to remove, which must be less than + * the current array size (IN). + * + * @return PMIX error code. + * + * All elements following this index are shifted down. + */ + +static inline int pmix_value_array_remove_item(pmix_value_array_t *array, size_t item_index) +{ +#if PMIX_ENABLE_DEBUG + if (item_index >= array->array_size) { + pmix_output(0, "pmix_value_array_remove_item: invalid index %lu\n", (unsigned long)item_index); + return PMIX_ERR_BAD_PARAM; + } +#endif + memmove(array->array_items+(array->array_item_sizeof * item_index), + array->array_items+(array->array_item_sizeof * (item_index+1)), + array->array_item_sizeof * (array->array_size - item_index - 1)); + array->array_size--; + return PMIX_SUCCESS; +} + +/** + * Get the base pointer of the underlying array. + * + * @param array The input array (IN). + * @param array_type The C datatype of the array (IN). + * + * @returns ptr Pointer to the actual array. + * + * This function is helpful when you need to iterate through an + * entire array; simply get the base value of the array and use native + * C to iterate through it manually. This can have better performance + * than looping over PMIX_VALUE_ARRAY_GET_ITEM() and + * PMIX_VALUE_ARRAY_SET_ITEM() because it will [potentially] reduce the + * number of pointer dereferences. + */ + +#define PMIX_VALUE_ARRAY_GET_BASE(array, item_type) \ + ((item_type*) ((array)->array_items)) + +END_C_DECLS + +#endif + diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c index e37aa95c666..2d044c17d5f 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmi1.c @@ -247,9 +247,10 @@ PMIX_EXPORT int PMI_Get_size(int *size) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -257,9 +258,6 @@ PMIX_EXPORT int PMI_Get_size(int *size) return PMI_ERR_INVALID_ARG; } - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ @@ -293,9 +291,10 @@ PMIX_EXPORT int PMI_Get_universe_size(int *size) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -303,9 +302,6 @@ PMIX_EXPORT int PMI_Get_universe_size(int *size) return PMI_ERR_INVALID_ARG; } - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ @@ -327,9 +323,10 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -337,9 +334,6 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) return PMI_ERR_INVALID_ARG; } - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ @@ -481,6 +475,8 @@ PMIX_EXPORT int PMI_Get_clique_size(int *size) pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -494,7 +490,7 @@ PMIX_EXPORT int PMI_Get_clique_size(int *size) PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); - rc = PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val); + rc = PMIx_Get(&proc, PMIX_LOCAL_SIZE, info, 1, &val); if (PMIX_SUCCESS == rc) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); @@ -511,6 +507,8 @@ PMIX_EXPORT int PMI_Get_clique_ranks(int ranks[], int length) pmix_value_t *val; char **rks; int i; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -518,7 +516,7 @@ PMIX_EXPORT int PMI_Get_clique_ranks(int ranks[], int length) return PMI_ERR_INVALID_ARGS; } - rc = PMIx_Get(&myproc, PMIX_LOCAL_PEERS, NULL, 0, &val); + rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val); if (PMIX_SUCCESS == rc) { /* kv will contain a string of comma-separated * ranks on my node */ diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c index da2e8897b77..347c64da575 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmi2.c @@ -56,9 +56,10 @@ PMIX_EXPORT int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != PMIx_Init(&myproc)) { return PMI2_ERR_INIT; @@ -67,10 +68,6 @@ PMIX_EXPORT int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) /* get the rank */ *rank = myproc.rank; - /* getting internal key requires special rank value */ - memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ @@ -254,6 +251,9 @@ PMIX_EXPORT int PMI2_Info_GetSize(int *size) pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; + PMI2_CHECK(); @@ -267,7 +267,7 @@ PMIX_EXPORT int PMI2_Info_GetSize(int *size) PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) { + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_LOCAL_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); } @@ -421,6 +421,8 @@ PMIX_EXPORT int PMI2_Info_GetNodeAttr(const char name[], pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_UNDEF; PMI2_CHECK(); @@ -435,7 +437,8 @@ PMIX_EXPORT int PMI2_Info_GetNodeAttr(const char name[], PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); *found = 0; - rc = PMIx_Get(&myproc, name, info, 1, &val); + /* TODO: does PMI2's "name" makes sense to PMIx? */ + rc = PMIx_Get(&proc, name, info, 1, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { rc = PMIX_ERROR; @@ -481,9 +484,10 @@ PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuel { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_UNDEF; PMI2_CHECK(); @@ -491,10 +495,6 @@ PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuel return PMI2_ERR_INVALID_ARG; } - /* getting internal key requires special rank value */ - memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client.c index 226b84c0ce6..892e81ac4d7 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client.c @@ -63,8 +63,12 @@ static const char pmix_version_string[] = PMIX_VERSION; #include "src/util/progress_threads.h" #include "src/usock/usock.h" #include "src/sec/pmix_sec.h" +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +#include "src/dstore/pmix_dstore.h" +#endif /* PMIX_ENABLE_DSTORE */ #include "pmix_client_ops.h" +#include "src/include/pmix_jobdata.h" #define PMIX_MAX_RETRIES 10 @@ -183,7 +187,9 @@ static void job_data(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, return; } /* decode it */ - pmix_client_process_nspace_blob(pmix_globals.myid.nspace, buf); +#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) + pmix_job_data_htable_store(nspace, buf); +#endif cb->status = PMIX_SUCCESS; cb->active = false; } @@ -363,6 +369,14 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc) pmix_globals.pindex = -1; /* setup the support */ +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS != (rc = pmix_dstore_init(NULL, 0))) { + pmix_output_close(pmix_globals.debug_output); + pmix_output_finalize(); + pmix_class_finalize(); + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + } +#endif /* PMIX_ENABLE_DSTORE */ pmix_bfrop_open(); pmix_usock_init(pmix_client_notify_recv); pmix_sec_init(); @@ -473,6 +487,9 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(void) #endif pmix_bfrop_close(); pmix_sec_finalize(); +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + pmix_dstore_finalize(); +#endif /* PMIX_ENABLE_DSTORE */ pmix_globals_finalize(); @@ -576,6 +593,7 @@ static void _putfn(int sd, short args, void *cbdata) /* shouldn't be possible */ goto done; } + if (PMIX_SUCCESS != (rc = pmix_hash_store(&ns->modex, pmix_globals.myid.rank, kv))) { PMIX_ERROR_LOG(rc); } @@ -736,12 +754,27 @@ static void _peersfn(int sd, short args, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; char **nsprocs=NULL, **nsps=NULL, **tmp; +#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) pmix_nspace_t *nsptr; pmix_nrec_t *nptr; +#endif size_t i; /* cycle across our known nspaces */ tmp = NULL; +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(cb->nspace, PMIX_RANK_WILDCARD, + cb->key, &cb->value))) { + + tmp = pmix_argv_split(cb->value->data.string, ','); + for (i=0; NULL != tmp[i]; i++) { + pmix_argv_append_nosize(&nsps, cb->nspace); + pmix_argv_append_nosize(&nsprocs, tmp[i]); + } + pmix_argv_free(tmp); + tmp = NULL; + } +#else PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { if (0 == strncmp(nsptr->nspace, cb->nspace, PMIX_MAX_NSLEN)) { /* cycle across the nodes in this nspace */ @@ -759,6 +792,7 @@ static void _peersfn(int sd, short args, void *cbdata) } } } +#endif if (0 == (i = pmix_argv_count(nsps))) { /* we don't know this nspace */ rc = PMIX_ERR_NOT_FOUND; @@ -1015,163 +1049,6 @@ static pmix_status_t recv_connect_ack(int sd) return PMIX_SUCCESS; } -void pmix_client_process_nspace_blob(const char *nspace, pmix_buffer_t *bptr) -{ - pmix_status_t rc; - int32_t cnt; - int rank; - pmix_kval_t *kptr, *kp2, kv; - pmix_buffer_t buf2; - pmix_byte_object_t *bo; - size_t nnodes, i, j; - pmix_nspace_t *nsptr, *nsptr2; - pmix_nrec_t *nrec, *nr2; - char **procs; - - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: PROCESSING BLOB FOR NSPACE %s", nspace); - - /* cycle across our known nspaces */ - nsptr = NULL; - PMIX_LIST_FOREACH(nsptr2, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(nsptr2->nspace, nspace)) { - nsptr = nsptr2; - break; - } - } - if (NULL == nsptr) { - /* we don't know this nspace - add it */ - nsptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nsptr->nspace, nspace, PMIX_MAX_NSLEN); - pmix_list_append(&pmix_globals.nspaces, &nsptr->super); - } - - /* unpack any info structs provided */ - cnt = 1; - kptr = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, kptr, &cnt, PMIX_KVAL))) { - if (0 == strcmp(kptr->key, PMIX_PROC_BLOB)) { - /* transfer the byte object for unpacking */ - bo = &(kptr->value->data.bo); - PMIX_CONSTRUCT(&buf2, pmix_buffer_t); - PMIX_LOAD_BUFFER(&buf2, bo->bytes, bo->size); - PMIX_RELEASE(kptr); - /* start by unpacking the rank */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &rank, &cnt, PMIX_INT))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&buf2); - return; - } - kp2 = PMIX_NEW(pmix_kval_t); - kp2->key = strdup(PMIX_RANK); - PMIX_VALUE_CREATE(kp2->value, 1); - kp2->value->type = PMIX_INT; - kp2->value->data.integer = rank; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, rank, kp2))) { - PMIX_ERROR_LOG(rc); - } - PMIX_RELEASE(kp2); // maintain accounting - cnt = 1; - kp2 = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(&buf2, kp2, &cnt, PMIX_KVAL))) { - /* this is data provided by a job-level exchange, so store it - * in the job-level data hash_table */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, rank, kp2))) { - PMIX_ERROR_LOG(rc); - } - PMIX_RELEASE(kp2); // maintain accounting - kp2 = PMIX_NEW(pmix_kval_t); - } - /* cleanup */ - PMIX_DESTRUCT(&buf2); // releases the original kptr data - PMIX_RELEASE(kp2); - } else if (0 == strcmp(kptr->key, PMIX_MAP_BLOB)) { - /* transfer the byte object for unpacking */ - bo = &(kptr->value->data.bo); - PMIX_CONSTRUCT(&buf2, pmix_buffer_t); - PMIX_LOAD_BUFFER(&buf2, bo->bytes, bo->size); - PMIX_RELEASE(kptr); - /* start by unpacking the number of nodes */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &nnodes, &cnt, PMIX_SIZE))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&buf2); - return; - } - /* unpack the list of procs on each node */ - for (i=0; i < nnodes; i++) { - cnt = 1; - PMIX_CONSTRUCT(&kv, pmix_kval_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &kv, &cnt, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&buf2); - PMIX_DESTRUCT(&kv); - return; - } - /* the name of the node is in the key, and the value is - * a comma-delimited list of procs on that node. See if we already - * have this node */ - nrec = NULL; - PMIX_LIST_FOREACH(nr2, &nsptr->nodes, pmix_nrec_t) { - if (0 == strcmp(nr2->name, kv.key)) { - nrec = nr2; - break; - } - } - if (NULL == nrec) { - /* Create a node record and store that list */ - nrec = PMIX_NEW(pmix_nrec_t); - nrec->name = strdup(kv.key); - pmix_list_append(&nsptr->nodes, &nrec->super); - } else { - /* refresh the list */ - if (NULL != nrec->procs) { - free(nrec->procs); - } - } - nrec->procs = strdup(kv.value->data.string); - /* split the list of procs so we can store their - * individual location data */ - procs = pmix_argv_split(nrec->procs, ','); - for (j=0; NULL != procs[j]; j++) { - /* store the hostname for each proc - again, this is - * data obtained via a job-level exchange, so store it - * in the job-level data hash_table */ - kp2 = PMIX_NEW(pmix_kval_t); - kp2->key = strdup(PMIX_HOSTNAME); - kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - kp2->value->type = PMIX_STRING; - kp2->value->data.string = strdup(nrec->name); - rank = strtol(procs[j], NULL, 10); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, rank, kp2))) { - PMIX_ERROR_LOG(rc); - } - PMIX_RELEASE(kp2); // maintain accounting - } - pmix_argv_free(procs); - PMIX_DESTRUCT(&kv); - } - /* cleanup */ - PMIX_DESTRUCT(&buf2); // releases the original kptr data - } else { - /* this is job-level data, so just add it to that hash_table - * with the wildcard rank */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, PMIX_RANK_WILDCARD, kptr))) { - PMIX_ERROR_LOG(rc); - } - /* maintain accounting - but note that the kptr remains - * alive and stored in the hash table! So we cannot reuse - * it for some other purpose */ - PMIX_RELEASE(kptr); - } - kptr = PMIX_NEW(pmix_kval_t); - cnt = 1; - } - /* need to release the leftover kptr */ - PMIX_RELEASE(kptr); -} - static pmix_status_t usock_connect(struct sockaddr *addr, int *fd) { int sd=-1; diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_connect.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_connect.c index df9f5287670..fe0cb669f94 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_connect.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_connect.c @@ -55,6 +55,7 @@ #include "src/sec/pmix_sec.h" #include "pmix_client_ops.h" +#include "src/include/pmix_jobdata.h" /* callback for wait completion */ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, @@ -314,7 +315,9 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, continue; } /* extract and process any proc-related info for this nspace */ - pmix_client_process_nspace_blob(nspace, bptr); +#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) + pmix_job_data_htable_store(nspace, bptr); +#endif PMIX_RELEASE(bptr); } if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_get.c index 2a1747793e8..a2d9f4a726a 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_get.c @@ -54,8 +54,12 @@ #include "src/util/progress_threads.h" #include "src/usock/usock.h" #include "src/sec/pmix_sec.h" +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +#include "src/dstore/pmix_dstore.h" +#endif /* PMIX_ENABLE_DSTORE */ #include "pmix_client_ops.h" +#include "src/include/pmix_jobdata.h" static pmix_buffer_t* _pack_get(char *nspace, int rank, const pmix_info_t info[], size_t ninfo, @@ -280,6 +284,34 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, goto done; } +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; + if ((0 == strncmp(pmix_globals.myid.nspace, nptr->nspace, PMIX_MAX_NSLEN + 1)) && + ((pmix_globals.myid.rank == cb->rank) || (PMIX_RANK_UNDEF == cb->rank))){ + /* if we asking the data about this or undefined process - + check local hash table first. All the data passed through + PMIx_Put settle down there */ + rc = pmix_hash_fetch(&nptr->modex, pmix_globals.myid.rank, cb->key, &val); + assert( (PMIX_SUCCESS == rc) || (PMIX_ERR_PROC_ENTRY_NOT_FOUND == rc) || + (PMIX_ERR_NOT_FOUND == rc) ); + if( PMIX_SUCCESS != rc ){ + if(pmix_globals.myid.rank == cb->rank){ + rc = PMIX_ERR_NOT_FOUND; + } + } + /* in else case we supposed to get PMIX_ERR_PROC_ENTRY_NOT_FOUND because + we don't push data from the remote processes into the dstore */ + } + /* try to take it from dstore */ + if( PMIX_ERR_PROC_ENTRY_NOT_FOUND == rc ){ + /* Two option possible here: + - we asking the key from UNDEF process and local proc + haven't pushed this data + - we askin the key from the particular process which is not us. + */ + rc = pmix_dstore_fetch(nptr->nspace, cb->rank, cb->key, &val); + } +#else /* we received the entire blob for this process, so * unpack and store it in the modex - this could consist * of buffers from multiple scopes */ @@ -327,8 +359,9 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, } else { rc = PMIX_SUCCESS; } +#endif /* PMIX_ENABLE_DSTORE */ - done: +done: /* if a callback was provided, execute it */ if (NULL != cb && NULL != cb->value_cbfunc) { if (NULL == val) { @@ -349,7 +382,11 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, if (0 == strncmp(nptr->nspace, cb->nspace, PMIX_MAX_NSLEN) && cb->rank == rank) { /* we have the data - see if we can find the key */ val = NULL; +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + rc = pmix_dstore_fetch(nptr->nspace, rank, cb->key, &val); +#else rc = pmix_hash_fetch(&nptr->modex, rank, cb->key, &val); +#endif /* PMIX_ENABLE_DSTORE */ cb->value_cbfunc(rc, val, cb->cbdata); if (NULL != val) { PMIX_VALUE_RELEASE(val); @@ -409,7 +446,11 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* if the rank is WILDCARD, then they want all the job-level info, * so no need to check the modex */ if (PMIX_RANK_WILDCARD != cb->rank) { +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, NULL, &val))) { +#else if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) { +#endif /* PMIX_ENABLE_DSTORE */ pmix_output_verbose(2, pmix_globals.debug_output, "pmix_get[%d]: value retrieved from dstore", __LINE__); /* since we didn't provide them with a key, the hash function @@ -444,7 +485,11 @@ static void _getnbfn(int fd, short flags, void *cbdata) } } /* now get any data from the job-level info */ +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, PMIX_RANK_WILDCARD, NULL, &val))) { +#else if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, NULL, &val))) { +#endif /* since we didn't provide them with a key, the hash function * must return the results in the pmix_info_array field of the * value */ @@ -490,9 +535,8 @@ static void _getnbfn(int fd, short flags, void *cbdata) return; } - /* the requested data could be in the job-data table, so let's - * just check there first. */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, cb->key, &val))) { +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val))) { /* found it - we are in an event, so we can * just execute the callback */ cb->value_cbfunc(rc, val, cb->cbdata); @@ -503,17 +547,42 @@ static void _getnbfn(int fd, short flags, void *cbdata) PMIX_RELEASE(cb); return; } - if (PMIX_RANK_WILDCARD == cb->rank) { - /* can't be anywhere else */ - cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata); - PMIX_RELEASE(cb); - return; +#endif + + /* the requested data could be in the job-data table, so let's + * just check there first. */ + if (0 == strncmp(cb->key, "pmix", 4)) { +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, PMIX_RANK_WILDCARD, cb->key, &val))) { +#else + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, cb->key, &val))) { +#endif + /* found it - we are in an event, so we can + * just execute the callback */ + cb->value_cbfunc(rc, val, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; + } + if (PMIX_RANK_WILDCARD == cb->rank) { + /* can't be anywhere else */ + cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata); + PMIX_RELEASE(cb); + return; + } } /* it could still be in the job-data table, only stored under its own * rank and not WILDCARD - e.g., this is true of data returned about * ourselves during startup */ +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, cb->key, &val))) { +#else if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val))) { +#endif /* found it - we are in an event, so we can * just execute the callback */ cb->value_cbfunc(rc, val, cb->cbdata); @@ -527,7 +596,13 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* not finding it is not an error - it could be in the * modex hash table, so check it */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, cb->key, &val))) { +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + rc = pmix_dstore_fetch(nptr->nspace, cb->rank, cb->key, &val); +#else + rc = pmix_hash_fetch(&nptr->modex, cb->rank, cb->key, &val); +#endif /* PMIX_ENABLE_DSTORE */ + + if ( PMIX_SUCCESS == rc ) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix_get[%d]: value retrieved from dstore", __LINE__); /* found it - we are in an event, so we can @@ -560,6 +635,15 @@ static void _getnbfn(int fd, short flags, void *cbdata) "Unable to locally satisfy request for key=%s for rank = %d, namespace = %s", cb->key, cb->rank, cb->nspace); cb->checked = true; // flag that we are going to check this again + } else if (PMIX_ERR_PROC_ENTRY_NOT_FOUND != rc) { + /* errors are fatal */ + cb->value_cbfunc(rc, NULL, cb->cbdata); + /* protect the data */ + cb->procs = NULL; + cb->key = NULL; + cb->info = NULL; + PMIX_RELEASE(cb); + return; } request: diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_ops.h index 689b04c04d1..d9bfa1ffafb 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_ops.h @@ -26,8 +26,6 @@ typedef struct { extern pmix_client_globals_t pmix_client_globals; -void pmix_client_process_nspace_blob(const char *nspace, pmix_buffer_t *bptr); - void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo, pmix_notification_fn_t errhandler, pmix_errhandler_reg_cbfunc_t cbfunc, diff --git a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_spawn.c index 93fa6f5eb89..05577f212dd 100644 --- a/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix112/pmix/src/client/pmix_client_spawn.c @@ -55,6 +55,7 @@ #include "src/sec/pmix_sec.h" #include "pmix_client_ops.h" +#include "src/include/pmix_jobdata.h" static void wait_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata); @@ -207,7 +208,9 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, if (NULL != n2) { (void)strncpy(nspace, n2, PMIX_MAX_NSLEN); /* extract and process any proc-related info for this nspace */ - pmix_client_process_nspace_blob(nspace, buf); +#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) + pmix_job_data_htable_store(nspace, buf); +#endif free(n2); } } diff --git a/opal/mca/pmix/pmix112/pmix/src/common/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/common/Makefile.am index 099a99903e4..0aa7f332f4e 100644 --- a/opal/mca/pmix/pmix112/pmix/src/common/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/src/common/Makefile.am @@ -8,4 +8,5 @@ # sources += \ - src/common/pmix_common.c + src/common/pmix_common.c \ + src/common/pmix_jobdata.c diff --git a/opal/mca/pmix/pmix112/pmix/src/common/pmix_common.c b/opal/mca/pmix/pmix112/pmix/src/common/pmix_common.c index 2956de8273e..f3de30c799c 100644 --- a/opal/mca/pmix/pmix112/pmix/src/common/pmix_common.c +++ b/opal/mca/pmix/pmix112/pmix/src/common/pmix_common.c @@ -32,29 +32,8 @@ PMIX_EXPORT void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo, pmix_errhandler_reg_cbfunc_t cbfunc, void *cbdata) { - if (pmix_globals.init_cntr <= 0) { - return; - } - - /* common err handler registration */ - if (pmix_globals.server) { - /* PMIX server: store the error handler, process info keys and call - * cbfunc with reference to the errhandler */ - pmix_output_verbose(2, pmix_globals.debug_output, - "registering server err handler"); - pmix_server_register_errhandler(info, ninfo, - errhandler, - cbfunc,cbdata); - - } else { - /* PMIX client: store the error handler, process info keys & - * call pmix_server_register_for_events, and call cbfunc with - * reference to the errhandler */ - pmix_output_verbose(2, pmix_globals.debug_output, - "registering client err handler"); - pmix_client_register_errhandler(info, ninfo, - errhandler, - cbfunc, cbdata); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_NOT_SUPPORTED, -1, cbdata); } } @@ -62,28 +41,8 @@ PMIX_EXPORT void PMIx_Deregister_errhandler(int errhandler_ref, pmix_op_cbfunc_t cbfunc, void *cbdata) { - if (pmix_globals.init_cntr <= 0) { - return; - } - - if (errhandler_ref < 0) { - return; - } - - /* common err handler registration */ - if (pmix_globals.server) { - /* PMIX server: store the error handler, process info keys and call - * cbfunc with reference to the errhandler */ - pmix_server_deregister_errhandler(errhandler_ref,cbfunc,cbdata); - pmix_output_verbose(2, pmix_globals.debug_output, - "deregistering server err handler"); - } else { - /* PMIX client: store the error handler, process info keys & - * call pmix_server_register_for_events, and call cbfunc with - * reference to the errhandler */ - pmix_client_deregister_errhandler(errhandler_ref, cbfunc, cbdata); - pmix_output_verbose(2, pmix_globals.debug_output, - "deregistering client err handler"); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_NOT_SUPPORTED, cbdata); } } @@ -93,24 +52,6 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_error(pmix_status_t status, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - int rc; - if (pmix_globals.init_cntr <= 0) { - return PMIX_ERR_INIT; - } - - if (pmix_globals.server) { - rc = pmix_server_notify_error(status, procs, nprocs, error_procs, - error_nprocs, info, ninfo, - cbfunc, cbdata); - pmix_output_verbose(0, pmix_globals.debug_output, - "pmix_server_notify_error error =%d, rc=%d", status, rc); - } else { - rc = pmix_client_notify_error(status, procs, nprocs, error_procs, - error_nprocs, info, ninfo, - cbfunc, cbdata); - pmix_output_verbose(0, pmix_globals.debug_output, - "pmix_client_notify_error error =%d, rc=%d", status, rc); - } - return rc; + return PMIX_ERR_NOT_SUPPORTED; } diff --git a/opal/mca/pmix/pmix112/pmix/src/common/pmix_jobdata.c b/opal/mca/pmix/pmix112/pmix/src/common/pmix_jobdata.c new file mode 100644 index 00000000000..7fc5057bc7d --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/common/pmix_jobdata.c @@ -0,0 +1,353 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include +#include +#include +#include "src/include/pmix_globals.h" +#include "src/class/pmix_value_array.h" +#include "src/util/error.h" +#include "src/buffer_ops/internal.h" +#include "src/util/argv.h" +#include "src/util/hash.h" +#include "src/include/pmix_jobdata.h" + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +#include "src/dstore/pmix_dstore.h" +#endif + +static inline int _add_key_for_rank(int rank, pmix_kval_t *kv, void *cbdata); +static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata); + +static inline int _add_key_for_rank(int rank, pmix_kval_t *kv, void *cbdata) +{ + pmix_job_data_caddy_t *cb = (pmix_job_data_caddy_t*)(cbdata); + pmix_status_t rc = PMIX_SUCCESS; +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + int i, size; + pmix_buffer_t *tmp = NULL; + int cur_rank; + + if (NULL != cb->dstore_fn) { + /* rank WILDCARD contained in the 0 item */ + cur_rank = PMIX_RANK_WILDCARD == rank ? 0 : rank + 1; + size = pmix_value_array_get_size(cb->bufs); + + if ((cur_rank + 1) <= size) { + tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, cur_rank)); + pmix_bfrop.pack(tmp, kv, 1, PMIX_KVAL); + return rc; + } + if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(cb->bufs, cur_rank + 1))) { + PMIX_ERROR_LOG(rc); + return rc; + } + for (i = size; i < (cur_rank + 1); i++) { + tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, i)); + PMIX_CONSTRUCT(tmp, pmix_buffer_t); + } + pmix_bfrop.pack(tmp, kv, 1, PMIX_KVAL); + } +#endif + if (cb->hstore_fn) { + if (PMIX_SUCCESS != (rc = cb->hstore_fn(&cb->nsptr->internal, rank, kv))) { + PMIX_ERROR_LOG(rc); + } + } + return rc; +} + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +static inline int _rank_key_dstore_store(void *cbdata) +{ + int rc = PMIX_SUCCESS; + uint32_t i, size; + pmix_buffer_t *tmp; + pmix_job_data_caddy_t *cb = (pmix_job_data_caddy_t*)cbdata; + int rank; + pmix_kval_t *kv = NULL; + + if (NULL == cb->bufs) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto exit; + } + kv = PMIX_NEW(pmix_kval_t); + kv->key = strdup("jobinfo"); + PMIX_VALUE_CREATE(kv->value, 1); + kv->value->type = PMIX_BYTE_OBJECT; + + size = pmix_value_array_get_size(cb->bufs); + for (i = 0; i < size; i++) { + tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, i)); + rank = 0 == i ? PMIX_RANK_WILDCARD : i - 1; + PMIX_UNLOAD_BUFFER(tmp, kv->value->data.bo.bytes, kv->value->data.bo.size); + if (PMIX_SUCCESS != (rc = cb->dstore_fn(cb->nsptr->nspace, rank, kv))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + +exit: + if (NULL != kv) { + PMIX_RELEASE(kv); + } + return rc; +} +#endif + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +pmix_status_t pmix_job_data_dstore_store(const char *nspace, pmix_buffer_t *bptr) +{ + pmix_job_data_caddy_t *cd = PMIX_NEW(pmix_job_data_caddy_t); + + cd->job_data = bptr; + cd->dstore_fn = pmix_dstore_store; + + return _job_data_store(nspace, cd); +} +#endif + +pmix_status_t pmix_job_data_htable_store(const char *nspace, pmix_buffer_t *bptr) +{ + pmix_job_data_caddy_t *cb = PMIX_NEW(pmix_job_data_caddy_t); + + cb->job_data = bptr; + cb->hstore_fn = pmix_hash_store; + + return _job_data_store(nspace, cb); +} + +static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) +{ + pmix_buffer_t *job_data = ((pmix_job_data_caddy_t*)(cbdata))->job_data; + pmix_job_data_caddy_t *cb = (pmix_job_data_caddy_t*)(cbdata); + pmix_status_t rc = PMIX_SUCCESS; + pmix_nspace_t *nsptr = NULL, *nsptr2 = NULL; + pmix_kval_t *kptr, *kp2, kv; + int32_t cnt; + size_t nnodes; + uint32_t i; +#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) + uint32_t j; +#endif + pmix_nrec_t *nrec, *nr2; + char **procs = NULL; + pmix_byte_object_t *bo; + pmix_buffer_t buf2; + int rank; + char *proc_type_str = pmix_globals.server ? + "server" : "client"; + + pmix_output_verbose(10, pmix_globals.debug_output, + "pmix:%s pmix_jobdata_store %s", proc_type_str, nspace); + + /* check buf data */ + if ((NULL == job_data) && (0 != job_data->bytes_used)) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + + PMIX_LIST_FOREACH(nsptr2, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(nsptr2->nspace, nspace)) { + nsptr = nsptr2; + break; + } + } + if (NULL == nsptr) { + /* we don't know this nspace - add it */ + nsptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(nsptr->nspace, nspace, PMIX_MAX_NSLEN); + pmix_list_append(&pmix_globals.nspaces, &nsptr->super); + } + cb->nsptr = nsptr; + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (NULL == (cb->bufs = PMIX_NEW(pmix_value_array_t))) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto exit; + } + if (PMIX_SUCCESS != (rc = pmix_value_array_init(cb->bufs, sizeof(pmix_buffer_t)))) { + PMIX_ERROR_LOG(rc); + goto exit; + } +#endif + cnt = 1; + kptr = PMIX_NEW(pmix_kval_t); + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(job_data, kptr, &cnt, PMIX_KVAL))) + { + if (0 == strcmp(kptr->key, PMIX_PROC_BLOB)) { + bo = &(kptr->value->data.bo); + PMIX_CONSTRUCT(&buf2, pmix_buffer_t); + PMIX_LOAD_BUFFER(&buf2, bo->bytes, bo->size); + /* start by unpacking the rank */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &rank, &cnt, PMIX_INT))) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + goto exit; + } + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_RANK); + PMIX_VALUE_CREATE(kp2->value, 1); + kp2->value->type = PMIX_INT; + kp2->value->data.integer = rank; + if (PMIX_SUCCESS != (rc = _add_key_for_rank(rank, kp2, cb))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_DESTRUCT(&buf2); + goto exit; + } + PMIX_RELEASE(kp2); // maintain accounting + cnt = 1; + kp2 = PMIX_NEW(pmix_kval_t); + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(&buf2, kp2, &cnt, PMIX_KVAL))) { + /* this is data provided by a job-level exchange, so store it + * in the job-level data hash_table */ + if (PMIX_SUCCESS != (rc = _add_key_for_rank(rank, kp2, cb))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_DESTRUCT(&buf2); + goto exit; + } + PMIX_RELEASE(kp2); // maintain accounting + kp2 = PMIX_NEW(pmix_kval_t); + } + /* cleanup */ + PMIX_DESTRUCT(&buf2); // releases the original kptr data + PMIX_RELEASE(kp2); + } else if (0 == strcmp(kptr->key, PMIX_MAP_BLOB)) { + /* transfer the byte object for unpacking */ + bo = &(kptr->value->data.bo); + PMIX_CONSTRUCT(&buf2, pmix_buffer_t); + PMIX_LOAD_BUFFER(&buf2, bo->bytes, bo->size); + /* start by unpacking the number of nodes */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &nnodes, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + goto exit; + } + /* unpack the list of procs on each node */ + for (i=0; i < nnodes; i++) { + cnt = 1; + PMIX_CONSTRUCT(&kv, pmix_kval_t); + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &kv, &cnt, PMIX_KVAL))) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + PMIX_DESTRUCT(&kv); + goto exit; + } + /* the name of the node is in the key, and the value is + * a comma-delimited list of procs on that node. See if we already + * have this node */ + nrec = NULL; + PMIX_LIST_FOREACH(nr2, &nsptr->nodes, pmix_nrec_t) { + if (0 == strcmp(nr2->name, kv.key)) { + nrec = nr2; + break; + } + } + if (NULL == nrec) { + /* Create a node record and store that list */ + nrec = PMIX_NEW(pmix_nrec_t); + nrec->name = strdup(kv.key); + pmix_list_append(&nsptr->nodes, &nrec->super); + } else { + /* refresh the list */ + if (NULL != nrec->procs) { + free(nrec->procs); + } + } + nrec->procs = strdup(kv.value->data.string); + /* split the list of procs so we can store their + * individual location data */ +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS != (rc = _add_key_for_rank(PMIX_RANK_WILDCARD, &kv, cb))) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&kv); + PMIX_DESTRUCT(&buf2); + pmix_argv_free(procs); + goto exit; + } +#else + procs = pmix_argv_split(nrec->procs, ','); + for (j=0; NULL != procs[j]; j++) { + /* store the hostname for each proc - again, this is + * data obtained via a job-level exchange, so store it + * in the job-level data hash_table */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_HOSTNAME); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = strdup(nrec->name); + rank = strtol(procs[j], NULL, 10); + if (PMIX_SUCCESS != (rc = _add_key_for_rank(rank, kp2, cb))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_DESTRUCT(&kv); + PMIX_DESTRUCT(&buf2); + pmix_argv_free(procs); + goto exit; + } + PMIX_RELEASE(kp2); + } + pmix_argv_free(procs); +#endif + PMIX_DESTRUCT(&kv); + } + /* cleanup */ + PMIX_DESTRUCT(&buf2); + } else { + if (PMIX_SUCCESS != (rc = _add_key_for_rank(PMIX_RANK_WILDCARD, kptr, cb))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kptr); + goto exit; + } + } + PMIX_RELEASE(kptr); + kptr = PMIX_NEW(pmix_kval_t); + cnt = 1; + } + /* need to release the leftover kptr */ + PMIX_RELEASE(kptr); + + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + goto exit; + } + rc = PMIX_SUCCESS; + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (NULL != cb->dstore_fn) { + uint32_t size = (uint32_t)pmix_value_array_get_size(cb->bufs); + for (i = 0; i < size; i++) { + if (PMIX_SUCCESS != (rc = _rank_key_dstore_store(cbdata))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + } +#endif +exit: +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (NULL != cb->bufs) { + PMIX_RELEASE(cb->bufs); + } +#endif + PMIX_RELEASE(cb); + + /* reset buf unpack ptr */ + job_data->unpack_ptr = job_data->base_ptr; + + return rc; +} diff --git a/opal/mca/pmix/pmix112/pmix/src/dstore/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/dstore/Makefile.am new file mode 100644 index 00000000000..5ad482f884b --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/dstore/Makefile.am @@ -0,0 +1,16 @@ +# Copyright (c) 2015-2016 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ + + +headers += \ + src/dstore/pmix_dstore.h \ + src/dstore/pmix_esh.h + +sources += \ + src/dstore/pmix_dstore.c \ + src/dstore/pmix_esh.c diff --git a/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_dstore.c b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_dstore.c new file mode 100644 index 00000000000..740503c5316 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_dstore.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include "src/include/pmix_globals.h" + +#include "pmix_dstore.h" +#include "pmix_esh.h" + + +/* + * Array of all possible DSTOREs + */ + +/**** ENSURE THE FOLLOWING VALUE IS AT LEAST AS + **** LARGE AS THE TOTAL NUMBER OF SUPPORTED SPCs + **** IN THE ARRAY BELOW + */ + +static pmix_dstore_base_module_t *all[] = { + &pmix_dstore_esh_module, + + /* Always end the array with a NULL */ + NULL +}; + +pmix_dstore_base_module_t pmix_dstore = {0}; + +int pmix_dstore_init(pmix_info_t info[], size_t ninfo) +{ + pmix_dstore = *all[0]; + + if (!pmix_dstore.init) { + return PMIX_ERR_NOT_SUPPORTED; + } + + return pmix_dstore.init(info, ninfo); +} + +void pmix_dstore_finalize(void) +{ + if (!pmix_dstore.finalize) { + return ; + } + + pmix_dstore.finalize(); + + return ; +} + +int pmix_dstore_store(const char *nspace, int rank, pmix_kval_t *kv) +{ + if (!pmix_dstore.store) { + return PMIX_ERR_NOT_SUPPORTED; + } + + return pmix_dstore.store(nspace, rank, kv); +} + +int pmix_dstore_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs) +{ + if (!pmix_dstore.fetch) { + return PMIX_ERR_NOT_SUPPORTED; + } + + return pmix_dstore.fetch(nspace, rank, key, kvs); +} + +int pmix_dstore_patch_env(const char *nspace, char ***env) +{ + if (!pmix_dstore.patch_env) { + return PMIX_ERR_NOT_SUPPORTED; + } + return pmix_dstore.patch_env(nspace, env); +} + +int pmix_dstore_nspace_add(const char *nspace, pmix_info_t info[], size_t ninfo) +{ + if (!pmix_dstore.nspace_add) { + return PMIX_ERR_NOT_SUPPORTED; + } + return pmix_dstore.nspace_add(nspace, info, ninfo); +} + +int pmix_dstore_nspace_del(const char *nspace) +{ + if (!pmix_dstore.nspace_del) { + return PMIX_ERR_NOT_SUPPORTED; + } + return pmix_dstore.nspace_del(nspace); +} diff --git a/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_dstore.h b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_dstore.h new file mode 100644 index 00000000000..f39237652f9 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_dstore.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_DSTORE_H +#define PMIX_DSTORE_H + +#include +#include +#include "src/buffer_ops/buffer_ops.h" + + +BEGIN_C_DECLS + + +int pmix_dstore_init(pmix_info_t info[], size_t ninfo); +void pmix_dstore_finalize(void); +int pmix_dstore_store(const char *nspace, int rank, pmix_kval_t *kv); + +/* + * Return codes: + * - PMIX_ERR_BAD_PARAM - bad parameters - can't proceed. + * - PMIX_ERROR - fatal error + * - PMIX_ERR_NOT_FOUND - we have the BLOB for the process but the + * requested key wasn't found there + * - PMIX_ERR_PROC_ENTRY_NOT_FOUND - the BLOB for the process wasn't + * found - need to request it from the server. + */ +int pmix_dstore_fetch(const char *nspace, int rank, + const char *key, pmix_value_t **kvs); +int pmix_dstore_patch_env(const char *nspace, char ***env); +int pmix_dstore_nspace_add(const char *nspace, pmix_info_t info[], size_t ninfo); +int pmix_dstore_nspace_del(const char *nspace); + +/** + * Initialize the module. Returns an error if the module cannot + * run, success if it can and wants to be used. + */ +typedef int (*pmix_dstore_base_module_init_fn_t)(pmix_info_t info[], size_t ninfo); + +/** + * Finalize the module. Tear down any allocated storage, disconnect + * from any system support. + */ +typedef int (*pmix_dstore_base_module_fini_fn_t)(void); + +/** +* store key/value pair in datastore. +* +* @param nspace namespace string +* +* @param rank rank. +* +* @param kv key/value pair. +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_dstore_base_module_store_fn_t)(const char *nspace, int rank, pmix_kval_t *kv); + +/** +* fetch value in datastore. +* +* @param nspace namespace string +* +* @param rank rank. +* +* @param key key. +* +* @return kvs(key/value pair) and PMIX_SUCCESS on success. +*/ +typedef int (*pmix_dstore_base_module_fetch_fn_t)(const char *nspace, int rank, const char *key, pmix_value_t **kvs); + +/** +* get base dstore path. +* +* @param nspace namespace string +* +* @param rank rank. +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_dstore_base_module_proc_patch_env_fn_t)(const char *nspace, char ***env); + +/** +* get base dstore path. +* +* @param nspace namespace string +* +* @param rank rank. +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_dstore_base_module_add_nspace_fn_t)(const char *nspace, + pmix_info_t info[], + size_t ninfo); + +/** +* finalize nspace. +* +* @param nspace namespace string +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_dstore_base_module_del_nspace_fn_t)(const char *nspace); + +/** +* structure for dstore modules +*/ +typedef struct { + const char *name; + pmix_dstore_base_module_init_fn_t init; + pmix_dstore_base_module_fini_fn_t finalize; + pmix_dstore_base_module_store_fn_t store; + pmix_dstore_base_module_fetch_fn_t fetch; + pmix_dstore_base_module_proc_patch_env_fn_t patch_env; + pmix_dstore_base_module_add_nspace_fn_t nspace_add; + pmix_dstore_base_module_del_nspace_fn_t nspace_del; + +} pmix_dstore_base_module_t; + +END_C_DECLS + +#endif /* PMIX_DSTORE_H */ diff --git a/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.c b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.c new file mode 100644 index 00000000000..119ce797fc8 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.c @@ -0,0 +1,2314 @@ +/* + * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "src/include/pmix_globals.h" + +#include "src/class/pmix_value_array.h" +#include "src/buffer_ops/buffer_ops.h" +#include "src/buffer_ops/types.h" +#include "src/util/pmix_environ.h" +#include "src/util/hash.h" +#include "src/util/error.h" +#include "src/sm/pmix_sm.h" + +#include "pmix_dstore.h" +#include "pmix_esh.h" + +static int _esh_init(pmix_info_t info[], size_t ninfo); +static int _esh_finalize(void); +static int _esh_store(const char *nspace, int rank, pmix_kval_t *kv); +static int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs); +static int _esh_patch_env(const char *nspace, char ***env); +static int _esh_nspace_add(const char *nspace, pmix_info_t info[], size_t ninfo); +static int _esh_nspace_del(const char *nspace); + +pmix_dstore_base_module_t pmix_dstore_esh_module = { + "esh", + _esh_init, + _esh_finalize, + _esh_store, + _esh_fetch, + _esh_patch_env, + _esh_nspace_add, + _esh_nspace_del +}; + +#define ESH_REGION_EXTENSION "EXTENSION_SLOT" +#define ESH_REGION_INVALIDATED "INVALIDATED" +#define ESH_ENV_INITIAL_SEG_SIZE "INITIAL_SEG_SIZE" +#define ESH_ENV_NS_META_SEG_SIZE "NS_META_SEG_SIZE" +#define ESH_ENV_NS_DATA_SEG_SIZE "NS_DATA_SEG_SIZE" +#define ESH_ENV_LINEAR "SM_USE_LINEAR_SEARCH" + +#define EXT_SLOT_SIZE(key) (strlen(key) + 1 + 2*sizeof(size_t)) /* in ext slot new offset will be stored in case if new data were added for the same process during next commit */ +#define KVAL_SIZE(key, size) (strlen(key) + 1 + sizeof(size_t) + size) + +#define _ESH_LOCK(lockfd, operation) \ +__extension__ ({ \ + pmix_status_t ret = PMIX_SUCCESS; \ + int i; \ + struct flock fl = {0}; \ + fl.l_type = operation; \ + fl.l_whence = SEEK_SET; \ + for(i = 0; i < 10; i++) { \ + if( 0 > fcntl(lockfd, F_SETLKW, &fl) ) { \ + switch( errno ){ \ + case EINTR: \ + continue; \ + case ENOENT: \ + case EINVAL: \ + ret = PMIX_ERR_NOT_FOUND; \ + break; \ + case EBADF: \ + ret = PMIX_ERR_BAD_PARAM; \ + break; \ + case EDEADLK: \ + case EFAULT: \ + case ENOLCK: \ + ret = PMIX_ERR_RESOURCE_BUSY; \ + break; \ + default: \ + ret = PMIX_ERROR; \ + break; \ + } \ + } \ + break; \ + } \ + if (ret) { \ + pmix_output(0, "%s %d:%s lock failed: %s", \ + __FILE__, __LINE__, __func__, strerror(errno)); \ + } \ + ret; \ +}) + +#define _ESH_WRLOCK(lockfd) _ESH_LOCK(lockfd, F_WRLCK) +#define _ESH_RDLOCK(lockfd) _ESH_LOCK(lockfd, F_RDLCK) +#define _ESH_UNLOCK(lockfd) _ESH_LOCK(lockfd, F_UNLCK) + +#define ESH_INIT_SESSION_TBL_SIZE 2 +#define ESH_INIT_NS_MAP_TBL_SIZE 2 + +static int _store_data_for_rank(ns_track_elem_t *ns_info, int rank, pmix_buffer_t *buf); +static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id); +static seg_desc_t *_attach_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id); +static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info); +static int _put_ns_info_to_initial_segment(const ns_map_data_t *ns_map, pmix_sm_seg_t *metaseg, pmix_sm_seg_t *dataseg); +static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_map); +static ns_track_elem_t *_get_track_elem_for_namespace(ns_map_data_t *ns_map); +static rank_meta_info *_get_rank_meta_info(int rank, seg_desc_t *segdesc); +static uint8_t *_get_data_region_by_offset(seg_desc_t *segdesc, size_t offset); +static void _update_initial_segment_info(const ns_map_data_t *ns_map); +static void _set_constants_from_env(void); +static void _delete_sm_desc(seg_desc_t *desc); +static int _pmix_getpagesize(void); +static inline uint32_t _get_univ_size(const char *nspace); + +static inline ns_map_data_t * _esh_session_map_search_server(const char *nspace); +static inline ns_map_data_t * _esh_session_map_search_client(const char *nspace); +static inline ns_map_data_t * _esh_session_map(const char *nspace, size_t tbl_idx); +static inline void _esh_session_map_clean(ns_map_t *m); +static inline int _esh_jobuid_tbl_search(uid_t jobuid, size_t *tbl_idx); +static inline int _esh_session_tbl_add(size_t *tbl_idx); +static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, int setjobuid); +static inline void _esh_session_release(session_t *s); +static inline void _esh_ns_track_cleanup(void); +static inline void _esh_sessions_cleanup(void); +static inline void _esh_ns_map_cleanup(void); +static inline int _esh_dir_del(const char *dirname); + +static char *_base_path = NULL; +static size_t _initial_segment_size = 0; +static size_t _max_ns_num; +static size_t _meta_segment_size = 0; +static size_t _max_meta_elems; +static size_t _data_segment_size = 0; +static uid_t _jobuid; +static char _setjobuid = 0; + +static pmix_value_array_t *_session_array = NULL; +static pmix_value_array_t *_ns_map_array = NULL; +static pmix_value_array_t *_ns_track_array = NULL; + +ns_map_data_t * (*_esh_session_map_search)(const char *nspace) = NULL; + +#define _ESH_SESSION_path(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].nspace_path) +#define _ESH_SESSION_lockfile(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].lockfile) +#define _ESH_SESSION_jobuid(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].jobuid) +#define _ESH_SESSION_lockfd(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].lockfd) +#define _ESH_SESSION_sm_seg_first(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].sm_seg_first) +#define _ESH_SESSION_sm_seg_last(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].sm_seg_last) +#define _ESH_SESSION_ns_info(tbl_idx) (PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t)[tbl_idx].ns_info) + +/* If _direct_mode is set, it means that we use linear search + * along the array of rank meta info objects inside a meta segment + * to find the requested rank. Otherwise, we do a fast lookup + * based on rank and directly compute offset. + * This mode is called direct because it's effectively used in + * sparse communication patterns when direct modex is usually used. + */ +static int _direct_mode = 0; + +static void ncon(ns_track_elem_t *p) { + memset(&p->ns_map, 0, sizeof(p->ns_map)); + p->meta_seg = NULL; + p->data_seg = NULL; + p->num_meta_seg = 0; + p->num_data_seg = 0; +} + +static void ndes(ns_track_elem_t *p) { + _delete_sm_desc(p->meta_seg); + _delete_sm_desc(p->data_seg); + memset(&p->ns_map, 0, sizeof(p->ns_map)); +} + +PMIX_CLASS_INSTANCE(ns_track_elem_t, + pmix_value_array_t, + ncon, ndes); + +static inline int _is_server(void) +{ + return (pmix_globals.server); +} + +static inline void _esh_session_map_clean(ns_map_t *m) { + memset(m, 0, sizeof(*m)); + m->data.track_idx = -1; +} + +static inline const char *_unique_id(void) +{ + static const char *str = NULL; + if (!str) { + /* see: pmix_server.c initialize_server_base() + * to get format of uri + */ + if (_is_server()) { + static char buf[100]; + snprintf(buf, sizeof(buf) - 1, "pmix-%d", getpid()); + str = buf; + } else { + str = getenv("PMIX_SERVER_URI"); + if (str) { + str = strrchr(str, '/'); + } + str = (str ? str + 1 : "$$$"); + } + } + return str; +} + +static inline int _esh_dir_del(const char *path) +{ + DIR *dir; + struct dirent *d_ptr; + struct stat st; + pmix_status_t rc = PMIX_SUCCESS; + + char name[PMIX_PATH_MAX]; + + dir = opendir(path); + if (NULL == dir) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + + while (NULL != (d_ptr = readdir(dir))) { + snprintf(name, PMIX_PATH_MAX, "%s/%s", path, d_ptr->d_name); + if ( 0 > lstat(name, &st) ){ + /* No fatal error here - just log this event + * we will hit the error later at rmdir. Keep trying ... + */ + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + continue; + } + + if(S_ISDIR(st.st_mode)) { + if(strcmp(d_ptr->d_name, ".") && strcmp(d_ptr->d_name, "..")) { + rc = _esh_dir_del(name); + if( PMIX_SUCCESS != rc ){ + /* No fatal error here - just log this event + * we will hit the error later at rmdir. Keep trying ... + */ + PMIX_ERROR_LOG(rc); + } + } + } + else { + if( 0 > unlink(name) ){ + /* No fatal error here - just log this event + * we will hit the error later at rmdir. Keep trying ... + */ + PMIX_ERROR_LOG(PMIX_ERR_NO_PERMISSIONS); + } + } + } + closedir(dir); + + /* remove the top dir */ + if( 0 > rmdir(path) ){ + rc = PMIX_ERR_NO_PERMISSIONS; + PMIX_ERROR_LOG(rc); + } + return rc; +} + +static inline int _esh_tbls_init(void) +{ + pmix_status_t rc = PMIX_SUCCESS; + size_t idx; + + /* initial settings */ + _ns_track_array = NULL; + _session_array = NULL; + _ns_map_array = NULL; + + /* Setup namespace tracking array */ + if (NULL == (_ns_track_array = PMIX_NEW(pmix_value_array_t))) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (PMIX_SUCCESS != (rc = pmix_value_array_init(_ns_track_array, sizeof(ns_track_elem_t)))){ + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + /* Setup sessions table */ + if (NULL == (_session_array = PMIX_NEW(pmix_value_array_t))){ + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (PMIX_SUCCESS != (rc = pmix_value_array_init(_session_array, sizeof(session_t)))) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(_session_array, ESH_INIT_SESSION_TBL_SIZE))) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + for (idx = 0; idx < ESH_INIT_SESSION_TBL_SIZE; idx++) { + memset(pmix_value_array_get_item(_session_array, idx), 0, sizeof(session_t)); + } + + /* Setup namespace map array */ + if (NULL == (_ns_map_array = PMIX_NEW(pmix_value_array_t))) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (PMIX_SUCCESS != (rc = pmix_value_array_init(_ns_map_array, sizeof(ns_map_t)))) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(_ns_map_array, ESH_INIT_NS_MAP_TBL_SIZE))) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + for (idx = 0; idx < ESH_INIT_NS_MAP_TBL_SIZE; idx++) { + _esh_session_map_clean(pmix_value_array_get_item(_ns_map_array, idx)); + } + + return PMIX_SUCCESS; +err_exit: + if (NULL != _ns_track_array) { + PMIX_RELEASE(_ns_track_array); + } + if (NULL != _session_array) { + PMIX_RELEASE(_session_array); + } + if (NULL != _ns_map_array) { + PMIX_RELEASE(_ns_map_array); + } + return rc; +} + +static inline void _esh_ns_map_cleanup(void) +{ + size_t idx; + size_t size; + ns_map_t *ns_map; + + if (NULL == _ns_map_array) { + return; + } + + size = pmix_value_array_get_size(_ns_map_array); + ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + + for (idx = 0; idx < size; idx++) { + if(ns_map[idx].in_use) + _esh_session_map_clean(&ns_map[idx]); + } + + PMIX_RELEASE(_ns_map_array); + _ns_map_array = NULL; +} + +static inline void _esh_sessions_cleanup(void) +{ + size_t idx; + size_t size; + session_t *s_tbl; + + if (NULL == _session_array) { + return; + } + + size = pmix_value_array_get_size(_session_array); + s_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + + for (idx = 0; idx < size; idx++) { + if(s_tbl[idx].in_use) + _esh_session_release(&s_tbl[idx]); + } + + PMIX_RELEASE(_session_array); + _session_array = NULL; +} + +static inline void _esh_ns_track_cleanup(void) +{ + if (NULL == _ns_track_array) { + return; + } + + PMIX_RELEASE(_ns_track_array); + _ns_track_array = NULL; +} + +static inline ns_map_data_t * _esh_session_map(const char *nspace, size_t tbl_idx) +{ + size_t map_idx; + size_t size = pmix_value_array_get_size(_ns_map_array);; + ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t);; + ns_map_t *new_map = NULL; + + if (NULL == nspace) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return NULL; + } + + for(map_idx = 0; map_idx < size; map_idx++) { + if (!ns_map[map_idx].in_use) { + ns_map[map_idx].in_use = true; + strncpy(ns_map[map_idx].data.name, nspace, sizeof(ns_map[map_idx].data.name)-1); + ns_map[map_idx].data.tbl_idx = tbl_idx; + return &ns_map[map_idx].data; + } + } + + if (NULL == (new_map = pmix_value_array_get_item(_ns_map_array, map_idx))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + return NULL; + } + + _esh_session_map_clean(new_map); + new_map->in_use = true; + new_map->data.tbl_idx = tbl_idx; + strncpy(new_map->data.name, nspace, sizeof(new_map->data.name)-1); + + return &new_map->data; +} + +static inline int _esh_jobuid_tbl_search(uid_t jobuid, size_t *tbl_idx) +{ + size_t idx, size; + session_t *session_tbl = NULL; + + size = pmix_value_array_get_size(_session_array); + session_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + + for(idx = 0; idx < size; idx++) { + if (session_tbl[idx].in_use && session_tbl[idx].jobuid == jobuid) { + *tbl_idx = idx; + return PMIX_SUCCESS; + } + } + + return PMIX_ERR_NOT_FOUND; +} + +static inline int _esh_session_tbl_add(size_t *tbl_idx) +{ + size_t idx; + size_t size = pmix_value_array_get_size(_session_array); + session_t *s_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + session_t *new_sesion; + pmix_status_t rc = PMIX_SUCCESS; + + for(idx = 0; idx < size; idx ++) { + if (0 == s_tbl[idx].in_use) { + s_tbl[idx].in_use = 1; + *tbl_idx = idx; + return PMIX_SUCCESS; + } + } + + if (NULL == (new_sesion = pmix_value_array_get_item(_session_array, idx))) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + return rc; + } + s_tbl[idx].in_use = 1; + *tbl_idx = idx; + + return PMIX_SUCCESS; +} + +static inline ns_map_data_t * _esh_session_map_search_server(const char *nspace) +{ + size_t idx, size = pmix_value_array_get_size(_ns_map_array); + ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + if (NULL == nspace) { + return NULL; + } + + for (idx = 0; idx < size; idx++) { + if (ns_map[idx].in_use && + (0 == strcmp(ns_map[idx].data.name, nspace))) { + return &ns_map[idx].data; + } + } + return NULL; +} + +static inline ns_map_data_t * _esh_session_map_search_client(const char *nspace) +{ + size_t idx, size = pmix_value_array_get_size(_ns_map_array); + ns_map_t *ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + + if (NULL == nspace) { + return NULL; + } + + for (idx = 0; idx < size; idx++) { + if (ns_map[idx].in_use && + (0 == strcmp(ns_map[idx].data.name, nspace))) { + return &ns_map[idx].data; + } + } + return _esh_session_map(nspace, 0); +} + +static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, int setjobuid) +{ + struct stat st = {0}; + seg_desc_t *seg = NULL; + session_t *s = &(PMIX_VALUE_ARRAY_GET_ITEM(_session_array, session_t, idx)); + pmix_status_t rc = PMIX_SUCCESS; + + if (NULL == s) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + + s->jobuid = jobuid; + s->nspace_path = strdup(_base_path); + + /* create a lock file to prevent clients from reading while server is writing to the shared memory. + * This situation is quite often, especially in case of direct modex when clients might ask for data + * simultaneously.*/ + if(0 > asprintf(&s->lockfile, "%s/dstore_sm.lock", s->nspace_path)) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s _lockfile_name: %s", __FILE__, __LINE__, __func__, s->lockfile)); + + if ( _is_server() ) { + if (stat(s->nspace_path, &st) == -1){ + if (0 != mkdir(s->nspace_path, 0770)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + s->lockfd = open(s->lockfile, O_CREAT | O_RDWR | O_EXCL, 0600); + + /* if previous launch was crashed, the lockfile might not be deleted and unlocked, + * so we delete it and create a new one. */ + if (s->lockfd < 0) { + unlink(s->lockfile); + s->lockfd = open(s->lockfile, O_CREAT | O_RDWR, 0600); + if (s->lockfd < 0) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + if (setjobuid > 0){ + if (0 > chown(s->nspace_path, (uid_t) jobuid, (gid_t) -1)){ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + if (0 > chown(s->lockfile, (uid_t) jobuid, (gid_t) -1)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + if (0 > chmod(s->lockfile, S_IRUSR | S_IWGRP | S_IRGRP)) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + seg = _create_new_segment(INITIAL_SEGMENT, m, 0); + if( NULL == seg ){ + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + return rc; + } + } + else { + s->lockfd = open(s->lockfile, O_RDONLY); + if (-1 == s->lockfd) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + seg = _attach_new_segment(INITIAL_SEGMENT, m, 0); + if( NULL == seg ){ + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + return rc; + } + } + + s->sm_seg_first = seg; + s->sm_seg_last = s->sm_seg_first; + return PMIX_SUCCESS; +} + +static inline void _esh_session_release(session_t *s) +{ + if (!s->in_use) { + return; + } + + _delete_sm_desc(s->sm_seg_first); + close(s->lockfd); + + if (NULL != s->lockfile) { + if(_is_server()) { + unlink(s->lockfile); + } + free(s->lockfile); + } + if (NULL != s->nspace_path) { + if(_is_server()) { + _esh_dir_del(s->nspace_path); + } + free(s->nspace_path); + } + memset ((char *) s, 0, sizeof(*s)); +} + +int _esh_init(pmix_info_t info[], size_t ninfo) +{ + pmix_status_t rc; + size_t n; + char *dstor_tmpdir = NULL; + size_t tbl_idx; + struct stat st = {0}; + ns_map_data_t *ns_map = NULL; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); + + _jobuid = getuid(); + _setjobuid = 0; + + if (PMIX_SUCCESS != (rc = _esh_tbls_init())) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + rc = pmix_sm_init(); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + _set_constants_from_env(); + + if (NULL != _base_path) { + free(_base_path); + _base_path = NULL; + } + + /* find the temp dir */ + if (_is_server()) { + /* scan incoming info for directives */ + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strcmp(PMIX_USERID, info[n].key)) { + _jobuid = info[n].value.data.uint32; + _setjobuid = 1; + continue; + } + if (0 == strcmp(PMIX_DSTPATH, info[n].key)) { + /* PMIX_DSTPATH is the way for RM to customize the + * place where shared memory files are placed. + * We need this for the following reasons: + * - disk usage: files can be relatively large and the system may + * have a small common temp directory. + * - performance: system may have a fast IO device (i.e. burst buffer) + * for the local usage. + * + * PMIX_DSTPATH has higher priority than PMIX_SERVER_TMPDIR + */ + if( PMIX_STRING != info[n].value.type ){ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + dstor_tmpdir = (char*)info[n].value.data.string; + continue; + } + if (0 == strcmp(PMIX_SERVER_TMPDIR, info[n].key)) { + if( PMIX_STRING != info[n].value.type ){ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (NULL == dstor_tmpdir) { + dstor_tmpdir = (char*)info[n].value.data.string; + } + continue; + } + } + } + + if (NULL == dstor_tmpdir) { + if (NULL == (dstor_tmpdir = getenv("TMPDIR"))) { + if (NULL == (dstor_tmpdir = getenv("TEMP"))) { + if (NULL == (dstor_tmpdir = getenv("TMP"))) { + dstor_tmpdir = "/tmp"; + } + } + } + } + + rc = asprintf(&_base_path, "%s/pmix_dstor_%d", dstor_tmpdir, getpid()); + if ((0 > rc) || (NULL == _base_path)) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + if (0 > stat(_base_path, &st)){ + if (0 > mkdir(_base_path, 0770)) { + rc = PMIX_ERR_NO_PERMISSIONS; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + } + if (_setjobuid > 0) { + if (chown(_base_path, (uid_t) _jobuid, (gid_t) -1) < 0){ + rc = PMIX_ERR_NO_PERMISSIONS; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + } + _esh_session_map_search = _esh_session_map_search_server; + return PMIX_SUCCESS; + } + /* for clients */ + else { + if (NULL == (dstor_tmpdir = getenv(PMIX_DSTORE_ESH_BASE_PATH))){ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (NULL == (_base_path = strdup(dstor_tmpdir))) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + _esh_session_map_search = _esh_session_map_search_client; + } + + rc = _esh_session_tbl_add(&tbl_idx); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + ns_map = _esh_session_map(pmix_globals.myid.nspace, tbl_idx); + if (NULL == ns_map) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + if (PMIX_SUCCESS != (rc =_esh_session_init(tbl_idx, ns_map, _jobuid, _setjobuid))) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + return PMIX_SUCCESS; +err_exit: + return rc; +} + +int _esh_finalize(void) +{ + struct stat st = {0}; + pmix_status_t rc = PMIX_SUCCESS; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); + + _esh_sessions_cleanup(); + _esh_ns_map_cleanup(); + _esh_ns_track_cleanup(); + + pmix_sm_finalize(); + + if (NULL != _base_path){ + if(_is_server()) { + if (lstat(_base_path, &st) >= 0){ + if (PMIX_SUCCESS != (rc = _esh_dir_del(_base_path))) { + PMIX_ERROR_LOG(rc); + } + } + } + free(_base_path); + _base_path = NULL; + } + + return rc; +} + +int _esh_store(const char *nspace, int rank, pmix_kval_t *kv) +{ + pmix_status_t rc = PMIX_SUCCESS, tmp_rc; + ns_track_elem_t *elem; + pmix_buffer_t pbkt, xfer; + ns_seg_info_t ns_info; + ns_map_data_t *ns_map = NULL; + + if (NULL == kv) { + return PMIX_ERROR; + } + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for %s:%d", + __FILE__, __LINE__, __func__, nspace, rank)); + + if (NULL == (ns_map = _esh_session_map_search(nspace))) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + + /* set exclusive lock */ + if (PMIX_SUCCESS != (rc = _ESH_WRLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + PMIX_ERROR_LOG(rc); + return rc; + } + + /* First of all, we go through local track list (list of ns_track_elem_t structures) + * and look for an element for the target namespace. + * If it is there, then shared memory segments for it are created, so we take it. + * Otherwise, create a new element, fill its fields, create corresponding meta + * and data segments for this namespace, add it to the local track list, + * and put this info (ns_seg_info_t) to the initial segment. If initial segment + * if full, then extend it by creating a new one and mark previous one as full. + * All this stuff is done inside _get_track_elem_for_namespace function. + */ + + elem = _get_track_elem_for_namespace(ns_map); + if (NULL == elem) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + /* If a new element was just created, we need to create corresponding meta and + * data segments and update corresponding element's fields. */ + if (NULL == elem->meta_seg || NULL == elem->data_seg) { + memset(&ns_info.ns_map, 0, sizeof(ns_info.ns_map)); + strncpy(ns_info.ns_map.name, ns_map->name, sizeof(ns_info.ns_map.name)-1); + ns_info.ns_map.tbl_idx = ns_map->tbl_idx; + ns_info.num_meta_seg = 1; + ns_info.num_data_seg = 1; + rc = _update_ns_elem(elem, &ns_info); + if (PMIX_SUCCESS != rc || NULL == elem->meta_seg || NULL == elem->data_seg) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + /* zero created shared memory segments for this namespace */ + memset(elem->meta_seg->seg_info.seg_base_addr, 0, _meta_segment_size); + memset(elem->data_seg->seg_info.seg_base_addr, 0, _data_segment_size); + + /* put ns's shared segments info to the global meta segment. */ + rc = _put_ns_info_to_initial_segment(ns_map, &elem->meta_seg->seg_info, &elem->data_seg->seg_info); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + } + + /* Now we know info about meta segment for this namespace. If meta segment + * is not empty, then we look for data for the target rank. If they present, replace it. */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + PMIX_CONSTRUCT(&xfer, pmix_buffer_t); + PMIX_LOAD_BUFFER(&xfer, kv->value->data.bo.bytes, kv->value->data.bo.size); + pmix_buffer_t *pxfer = &xfer; + pmix_bfrop.pack(&pbkt, &pxfer, 1, PMIX_BUFFER); + xfer.base_ptr = NULL; + xfer.bytes_used = 0; + + rc = _store_data_for_rank(elem, rank, &pbkt); + PMIX_DESTRUCT(&xfer); + PMIX_DESTRUCT(&pbkt); + + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + /* unset lock */ + if (PMIX_SUCCESS != (rc = _ESH_UNLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + PMIX_ERROR_LOG(rc); + } + return rc; + +err_exit: + /* unset lock */ + if (PMIX_SUCCESS != (tmp_rc = _ESH_UNLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + PMIX_ERROR_LOG(tmp_rc); + } + return rc; +} + +/* + * See return codes description for the corresponding function + * in pmix_dstore.h + */ +int _esh_fetch(const char *nspace, int rank, const char *key, pmix_value_t **kvs) +{ + ns_seg_info_t *ns_info = NULL; + pmix_status_t rc = PMIX_ERROR, lock_rc; + ns_track_elem_t *elem; + rank_meta_info *rinfo = NULL; + size_t kval_cnt; + seg_desc_t *meta_seg, *data_seg; + uint8_t *addr; + pmix_buffer_t buffer; + pmix_value_t val; + uint32_t nprocs; + int cur_rank; + ns_map_data_t *ns_map = NULL; + bool all_ranks_found = true; + bool key_found = false; + + if (NULL == key) { + PMIX_OUTPUT_VERBOSE((7, pmix_globals.debug_output, + "dstore: Does not support passed parameters")); + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for %s:%d look for key %s", + __FILE__, __LINE__, __func__, nspace, rank, key)); + + if (NULL == (ns_map = _esh_session_map_search(nspace))) { + /* This call is issued from the the client. + * client must have the session, otherwise the error is fatal. + */ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + + if (kvs) { + *kvs = NULL; + } + + if (PMIX_RANK_UNDEF == rank) { + nprocs = _get_univ_size(ns_map->name); + cur_rank = -1; + } else { + nprocs = 1; + cur_rank = rank; + } + + /* grab shared lock */ + if (PMIX_SUCCESS != (lock_rc = _ESH_RDLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + /* Something wrong with the lock. The error is fatal */ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(lock_rc); + return lock_rc; + } + + /* First of all, we go through all initial segments and look at their field. + * If it's 1, then generate name of next initial segment incrementing id by one and attach to it. + * We need this step to synchronize initial shared segments with our local track list. + * Then we look for the target namespace in all initial segments. + * If it is found, we get numbers of meta & data segments and + * compare these numbers with the number of trackable meta & data + * segments for this namespace in the local track list. + * If the first number exceeds the last, or the local track list + * doesn't track current namespace yet, then we update it (attach + * to additional segments). + */ + + /* first update local information about initial segments. they can be extended, so then we need to attach to new segments. */ + _update_initial_segment_info(ns_map); + + ns_info = _get_ns_info_from_initial_segment(ns_map); + if (NULL == ns_info) { + /* no data for this namespace is found in the shared memory. */ + PMIX_OUTPUT_VERBOSE((7, pmix_globals.debug_output, + "%s:%d:%s: no data for ns %s is found in the shared memory.", + __FILE__, __LINE__, __func__, ns_map->name)); + rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; + goto done; + } + + /* get ns_track_elem_t object for the target namespace from the local track list. */ + elem = _get_track_elem_for_namespace(ns_map); + if (NULL == elem) { + /* Shouldn't happen! */ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto done; + } + + /* need to update tracker: + * attach to shared memory regions for this namespace and store its info locally + * to operate with address and detach/unlink afterwards. */ + rc = _update_ns_elem(elem, ns_info); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto done; + } + + /* Now we have the data from meta segment for this namespace. */ + meta_seg = elem->meta_seg; + data_seg = elem->data_seg; + + while (nprocs--) { + if (PMIX_RANK_UNDEF == rank) { + cur_rank++; + } + /* Get the rank meta info in the shared meta segment. */ + rinfo = _get_rank_meta_info(cur_rank, meta_seg); + if (NULL == rinfo) { + PMIX_OUTPUT_VERBOSE((7, pmix_globals.debug_output, + "%s:%d:%s: no data for this rank is found in the shared memory. rank %d", + __FILE__, __LINE__, __func__, cur_rank)); + all_ranks_found = false; + continue; + } + addr = _get_data_region_by_offset(data_seg, rinfo->offset); + if (NULL == addr) { + /* This means that meta-info is broken - error is fatal */ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto done; + } + kval_cnt = rinfo->count; + + rc = PMIX_SUCCESS; + while (0 < kval_cnt) { + /* data is stored in the following format: + * key_val_pair { + * char key[PMIX_MAX_KEYLEN+1]; + * size_t size; + * byte_t byte[size]; // should be loaded to pmix_buffer_t and unpacked. + * }; + * segment_format { + * key_val_pair kv_array[n]; + * EXTENSION slot; + * } + * EXTENSION slot which has key = EXTENSION_SLOT and a size_t value for offset + * to next data address for this process. + */ + if (0 == strncmp((const char *)addr, ESH_REGION_INVALIDATED, strlen(ESH_REGION_INVALIDATED)+1)) { + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %s:%d, skip %s region", + __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_INVALIDATED)); + /*skip it */ + size_t size; + memcpy(&size, addr + strlen(ESH_REGION_INVALIDATED) + 1, sizeof(size_t)); + /* go to next item, updating address */ + addr += KVAL_SIZE(ESH_REGION_INVALIDATED, size); + } else if (0 == strncmp((const char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1)) { + size_t offset; + memcpy(&offset, addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), sizeof(size_t)); + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %s:%d, reached %s with %lu value", + __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_EXTENSION, offset)); + if (0 < offset) { + /* go to next item, updating address */ + addr = _get_data_region_by_offset(data_seg, offset); + if (NULL == addr) { + /* This shouldn't happen - error is fatal */ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto done; + } + } else { + /* no more data for this rank */ + PMIX_OUTPUT_VERBOSE((7, pmix_globals.debug_output, + "%s:%d:%s: no more data for this rank is found in the shared memory. rank %d key %s not found", + __FILE__, __LINE__, __func__, cur_rank, key)); + break; + } + } else if (0 == strncmp((const char *)addr, key, strlen(key)+1)) { + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %s:%d, found target key %s", + __FILE__, __LINE__, __func__, nspace, cur_rank, key)); + /* target key is found, get value */ + size_t size; + memcpy(&size, addr + strlen(key) + 1, sizeof(size_t)); + addr += strlen(key) + 1 + sizeof(size_t); + PMIX_CONSTRUCT(&buffer, pmix_buffer_t); + PMIX_LOAD_BUFFER(&buffer, addr, size); + int cnt = 1; + /* unpack value for this key from the buffer. */ + PMIX_VALUE_CONSTRUCT(&val); + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buffer, &val, &cnt, PMIX_VALUE))) { + PMIX_ERROR_LOG(rc); + goto done; + } + if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, &val, PMIX_VALUE))) { + PMIX_ERROR_LOG(rc); + goto done; + } + PMIX_VALUE_DESTRUCT(&val); + buffer.base_ptr = NULL; + buffer.bytes_used = 0; + PMIX_DESTRUCT(&buffer); + key_found = true; + goto done; + } else { + char ckey[PMIX_MAX_KEYLEN+1] = {0}; + strncpy(ckey, (const char *)addr, strlen((const char *)addr)+1); + size_t size; + memcpy(&size, addr + strlen(ckey) + 1, sizeof(size_t)); + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %s:%d, skip key %s look for key %s", __FILE__, __LINE__, __func__, nspace, cur_rank, ckey, key)); + /* go to next item, updating address */ + addr += KVAL_SIZE(ckey, size); + kval_cnt--; + } + } + } + +done: + /* unset lock */ + if (PMIX_SUCCESS != (lock_rc = _ESH_UNLOCK(_ESH_SESSION_lockfd(ns_map->tbl_idx)))) { + PMIX_ERROR_LOG(lock_rc); + } + + if( rc != PMIX_SUCCESS ){ + return rc; + } + + if( key_found ){ + /* the key is found - nothing to do */ + return PMIX_SUCCESS; + } + + if( !all_ranks_found ){ + /* Not all ranks was found - need to request + * all of them and search again + */ + rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; + return rc; + } + rc = PMIX_ERR_NOT_FOUND; + return rc; +} + +static int _esh_patch_env(const char *nspace, char ***env) +{ + pmix_status_t rc = PMIX_SUCCESS; + ns_map_data_t *ns_map = NULL; + + if (NULL == _esh_session_map_search) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + + if (NULL == (ns_map = _esh_session_map_search(nspace))) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + + if ((NULL == _base_path) || (strlen(_base_path) == 0)){ + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + + if(PMIX_SUCCESS != (rc = pmix_setenv(PMIX_DSTORE_ESH_BASE_PATH, + _ESH_SESSION_path(ns_map->tbl_idx), true, env))){ + PMIX_ERROR_LOG(rc); + } + return rc; +} + +static int _esh_nspace_add(const char *nspace, pmix_info_t info[], size_t ninfo) +{ + pmix_status_t rc; + size_t tbl_idx; + uid_t jobuid = _jobuid; + char setjobuid = _setjobuid; + size_t n; + ns_map_data_t *ns_map = NULL; + + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strcmp(PMIX_USERID, info[n].key)) { + jobuid = info[n].value.data.uint32; + setjobuid = 1; + continue; + } + } + } + + if (PMIX_SUCCESS != _esh_jobuid_tbl_search(jobuid, &tbl_idx)) { + + rc = _esh_session_tbl_add(&tbl_idx); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + ns_map = _esh_session_map(nspace, tbl_idx); + if (NULL == ns_map) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + + if (PMIX_SUCCESS != (rc =_esh_session_init(tbl_idx, ns_map, jobuid, setjobuid))) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + else { + ns_map = _esh_session_map(nspace, tbl_idx); + if (NULL == ns_map) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + } + + return PMIX_SUCCESS; +} + +static int _esh_nspace_del(const char *nspace) +{ + pmix_status_t rc = PMIX_SUCCESS; + size_t map_idx, size; + int in_use = 0; + ns_map_data_t *ns_map_data = NULL; + ns_map_t *ns_map; + session_t *session_tbl = NULL; + ns_track_elem_t *trk = NULL; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s delete nspace `%s`", __FILE__, __LINE__, __func__, nspace)); + + if (NULL == (ns_map_data = _esh_session_map_search(nspace))) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + + size = pmix_value_array_get_size(_ns_map_array); + ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + + for (map_idx = 0; map_idx < size; map_idx++){ + if (ns_map[map_idx].in_use && + (ns_map[map_idx].data.tbl_idx == ns_map_data->tbl_idx)) { + if (0 == strcmp(ns_map[map_idx].data.name, nspace)) { + _esh_session_map_clean(&ns_map[map_idx]); + continue; + } + in_use++; + break; + } + } + + if(ns_map_data->track_idx >= 0) { + trk = pmix_value_array_get_item(_ns_track_array, ns_map_data->track_idx); + if((ns_map_data->track_idx + 1) > (int)pmix_value_array_get_size(_ns_track_array)) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + goto exit; + } + PMIX_DESTRUCT(trk); + } + + /* A lot of nspaces may be using same session info + * session record can only be deleted once all references are gone */ + if (!in_use) { + session_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s delete session for jobuid: %d", __FILE__, __LINE__, __func__, session_tbl[ns_map_data->tbl_idx].jobuid)); + _esh_session_release(&session_tbl[ns_map_data->tbl_idx]); + } +exit: + return rc; +} + +static void _set_constants_from_env() +{ + char *str; + int page_size = _pmix_getpagesize(); + + if( NULL != (str = getenv(ESH_ENV_INITIAL_SEG_SIZE)) ) { + _initial_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > _initial_segment_size) { + _initial_segment_size = (size_t)page_size; + } + } + if (0 == _initial_segment_size) { + _initial_segment_size = INITIAL_SEG_SIZE; + } + if( NULL != (str = getenv(ESH_ENV_NS_META_SEG_SIZE)) ) { + _meta_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > _meta_segment_size) { + _meta_segment_size = (size_t)page_size; + } + } + if (0 == _meta_segment_size) { + _meta_segment_size = NS_META_SEG_SIZE; + } + if( NULL != (str = getenv(ESH_ENV_NS_DATA_SEG_SIZE)) ) { + _data_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > _data_segment_size) { + _data_segment_size = (size_t)page_size; + } + } + if (0 == _data_segment_size) { + _data_segment_size = NS_DATA_SEG_SIZE; + } + if (NULL != (str = getenv(ESH_ENV_LINEAR))) { + if (1 == strtoul(str, NULL, 10)) { + _direct_mode = 1; + } + } + + _max_ns_num = (_initial_segment_size - sizeof(size_t) * 2) / sizeof(ns_seg_info_t); + _max_meta_elems = (_meta_segment_size - sizeof(size_t)) / sizeof(rank_meta_info); + +} + +static void _delete_sm_desc(seg_desc_t *desc) +{ + seg_desc_t *tmp; + + /* free all global segments */ + while (NULL != desc) { + tmp = desc->next; + /* detach & unlink from current desc */ + if (desc->seg_info.seg_cpid == getpid()) { + pmix_sm_segment_unlink(&desc->seg_info); + } + pmix_sm_segment_detach(&desc->seg_info); + free(desc); + desc = tmp; + } +} + +static int _pmix_getpagesize(void) +{ +#if defined(_SC_PAGESIZE ) + return sysconf(_SC_PAGESIZE); +#elif defined(_SC_PAGE_SIZE) + return sysconf(_SC_PAGE_SIZE); +#else + return 65536; /* safer to overestimate than under */ +#endif +} + +static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id) +{ + pmix_status_t rc; + char file_name[PMIX_PATH_MAX]; + size_t size; + seg_desc_t *new_seg = NULL; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: segment type %d, nspace %s, id %u", + __FILE__, __LINE__, __func__, type, ns_map->name, id)); + + switch (type) { + case INITIAL_SEGMENT: + size = _initial_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", + _ESH_SESSION_path(ns_map->tbl_idx), id); + break; + case NS_META_SEGMENT: + size = _meta_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", + _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); + break; + case NS_DATA_SEGMENT: + size = _data_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", + _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); + break; + default: + PMIX_ERROR_LOG(PMIX_ERROR); + return NULL; + } + new_seg = (seg_desc_t*)malloc(sizeof(seg_desc_t)); + if (new_seg) { + new_seg->id = id; + new_seg->next = NULL; + new_seg->type = type; + rc = pmix_sm_segment_create(&new_seg->seg_info, file_name, size); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + memset(new_seg->seg_info.seg_base_addr, 0, size); + + if (_setjobuid > 0){ + rc = PMIX_ERR_NO_PERMISSIONS; + if (0 > chown(file_name, (uid_t) _jobuid, (gid_t) -1)){ + PMIX_ERROR_LOG(rc); + goto err_exit; + } + /* set the mode as required */ + if (0 > chmod(file_name, S_IRUSR | S_IRGRP | S_IWGRP )) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + } + } + return new_seg; + +err_exit: + if( NULL != new_seg ){ + free(new_seg); + } + return NULL; +} + +static seg_desc_t *_attach_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id) +{ + pmix_status_t rc; + seg_desc_t *new_seg = NULL; + new_seg = (seg_desc_t*)malloc(sizeof(seg_desc_t)); + new_seg->id = id; + new_seg->next = NULL; + new_seg->type = type; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: segment type %d, nspace %s, id %u", + __FILE__, __LINE__, __func__, type, ns_map->name, id)); + + switch (type) { + case INITIAL_SEGMENT: + new_seg->seg_info.seg_size = _initial_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", + _ESH_SESSION_path(ns_map->tbl_idx), id); + break; + case NS_META_SEGMENT: + new_seg->seg_info.seg_size = _meta_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", + _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); + break; + case NS_DATA_SEGMENT: + new_seg->seg_info.seg_size = _data_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", + _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); + break; + default: + PMIX_ERROR_LOG(PMIX_ERROR); + return NULL; + } + rc = pmix_sm_segment_attach(&new_seg->seg_info, PMIX_SM_RONLY); + if (PMIX_SUCCESS != rc) { + free(new_seg); + new_seg = NULL; + PMIX_ERROR_LOG(rc); + } + return new_seg; +} + +/* This function synchronizes the content of initial shared segment and the local track list. */ +static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) +{ + seg_desc_t *seg, *tmp = NULL; + size_t i, offs; + ns_map_data_t *ns_map = NULL; + pmix_status_t rc; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); + + if (NULL == (ns_map = _esh_session_map_search(info->ns_map.name))) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + + tmp = ns_elem->meta_seg; + if (NULL != tmp) { + while(NULL != tmp->next) { + tmp = tmp->next; + } + } + + /* synchronize number of meta segments for the target namespace. */ + for (i = ns_elem->num_meta_seg; i < info->num_meta_seg; i++) { + if (_is_server()) { + seg = _create_new_segment(NS_META_SEGMENT, &info->ns_map, i); + if (NULL == seg) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + return rc; + } + } else { + seg = _attach_new_segment(NS_META_SEGMENT, &info->ns_map, i); + if (NULL == seg) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + } + + if (NULL == tmp) { + ns_elem->meta_seg = seg; + } else { + tmp->next = seg; + } + tmp = seg; + ns_elem->num_meta_seg++; + } + + tmp = ns_elem->data_seg; + if (NULL != tmp) { + while(NULL != tmp->next) { + tmp = tmp->next; + } + } + /* synchronize number of data segments for the target namespace. */ + for (i = ns_elem->num_data_seg; i < info->num_data_seg; i++) { + if (_is_server()) { + seg = _create_new_segment(NS_DATA_SEGMENT, &info->ns_map, i); + if (NULL == seg) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + return rc; + } + offs = sizeof(size_t);//shift on offset field itself + memcpy(seg->seg_info.seg_base_addr, &offs, sizeof(size_t)); + } else { + seg = _attach_new_segment(NS_DATA_SEGMENT, &info->ns_map, i); + if (NULL == seg) { + rc = PMIX_ERR_NOT_FOUND; + PMIX_ERROR_LOG(rc); + return rc; + } + } + + if (NULL == tmp) { + ns_elem->data_seg = seg; + } else { + tmp->next = seg; + } + tmp = seg; + ns_elem->num_data_seg++; + } + + return PMIX_SUCCESS; +} + +static seg_desc_t *extend_segment(seg_desc_t *segdesc, const ns_map_data_t *ns_map) +{ + seg_desc_t *tmp, *seg; + + PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); + /* find last segment */ + tmp = segdesc; + while (NULL != tmp->next) { + tmp = tmp->next; + } + /* create another segment, the old one is full. */ + seg = _create_new_segment(segdesc->type, ns_map, tmp->id + 1); + tmp->next = seg; + + return seg; +} + +static int _put_ns_info_to_initial_segment(const ns_map_data_t *ns_map, pmix_sm_seg_t *metaseg, pmix_sm_seg_t *dataseg) +{ + ns_seg_info_t elem; + size_t num_elems; + num_elems = *((size_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr)); + seg_desc_t *last_seg = _ESH_SESSION_sm_seg_last(ns_map->tbl_idx); + pmix_status_t rc; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); + + if (_max_ns_num == num_elems) { + num_elems = 0; + if (NULL == (last_seg = extend_segment(last_seg, ns_map))) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; + } + /* mark previous segment as full */ + size_t full = 1; + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr + sizeof(size_t)), &full, sizeof(size_t)); + _ESH_SESSION_sm_seg_last(ns_map->tbl_idx) = last_seg; + memset(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr, 0, _initial_segment_size); + } + memset(&elem.ns_map, 0, sizeof(elem.ns_map)); + strncpy(elem.ns_map.name, ns_map->name, sizeof(elem.ns_map.name)-1); + elem.ns_map.tbl_idx = ns_map->tbl_idx; + elem.num_meta_seg = 1; + elem.num_data_seg = 1; + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr) + sizeof(size_t) * 2 + num_elems * sizeof(ns_seg_info_t), + &elem, sizeof(ns_seg_info_t)); + num_elems++; + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr), &num_elems, sizeof(size_t)); + return PMIX_SUCCESS; +} + +/* clients should sync local info with information from initial segment regularly */ +static void _update_initial_segment_info(const ns_map_data_t *ns_map) +{ + seg_desc_t *tmp; + tmp = _ESH_SESSION_sm_seg_first(ns_map->tbl_idx); + + PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); + + /* go through all global segments */ + do { + /* check if current segment was marked as full but no more next segment is in the chain */ + if (NULL == tmp->next && 1 == *((size_t*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t)))) { + tmp->next = _attach_new_segment(INITIAL_SEGMENT, ns_map, tmp->id+1); + } + tmp = tmp->next; + } + while (NULL != tmp); +} + +/* this function will be used by clients to get ns data from the initial segment and add them to the tracker list */ +static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_map) +{ + pmix_status_t rc; + size_t i; + seg_desc_t *tmp; + ns_seg_info_t *elem, *cur_elem; + elem = NULL; + size_t num_elems; + + PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); + + tmp = _ESH_SESSION_sm_seg_first(ns_map->tbl_idx); + + rc = 1; + /* go through all global segments */ + do { + num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); + for (i = 0; i < num_elems; i++) { + cur_elem = (ns_seg_info_t*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) * 2 + i * sizeof(ns_seg_info_t)); + if (0 == (rc = strncmp(cur_elem->ns_map.name, ns_map->name, strlen(ns_map->name)+1))) { + break; + } + } + if (0 == rc) { + elem = cur_elem; + break; + } + tmp = tmp->next; + } + while (NULL != tmp); + return elem; +} + +static ns_track_elem_t *_get_track_elem_for_namespace(ns_map_data_t *ns_map) +{ + ns_track_elem_t *new_elem = NULL; + size_t size = pmix_value_array_get_size(_ns_track_array); + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: nspace %s", + __FILE__, __LINE__, __func__, ns_map->name)); + + /* check if this namespace is already being tracked to avoid duplicating data. */ + if (ns_map->track_idx >= 0) { + if ((ns_map->track_idx + 1) > (int)size) { + return NULL; + } + /* data for this namespace should be already stored in shared memory region. */ + /* so go and just put new data. */ + return pmix_value_array_get_item(_ns_track_array, ns_map->track_idx); + } + + /* create shared memory regions for this namespace and store its info locally + * to operate with address and detach/unlink afterwards. */ + if (NULL == (new_elem = pmix_value_array_get_item(_ns_track_array, size))) { + return NULL; + } + PMIX_CONSTRUCT(new_elem, ns_track_elem_t); + strncpy(new_elem->ns_map.name, ns_map->name, sizeof(new_elem->ns_map.name)-1); + /* save latest track idx to info of nspace */ + ns_map->track_idx = size; + + return new_elem; +} + +static rank_meta_info *_get_rank_meta_info(int rank, seg_desc_t *segdesc) +{ + size_t i; + rank_meta_info *elem = NULL; + seg_desc_t *tmp = segdesc; + size_t num_elems, rel_offset; + int id; + rank_meta_info *cur_elem; + size_t rcount = rank == PMIX_RANK_WILDCARD ? 0 : rank + 1; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); + + if (1 == _direct_mode) { + /* do linear search to find the requested rank inside all meta segments + * for this namespace. */ + /* go through all existing meta segments for this namespace */ + do { + num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); + for (i = 0; i < num_elems; i++) { + cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) + i * sizeof(rank_meta_info)); + if (rcount == cur_elem->rank) { + elem = cur_elem; + break; + } + } + tmp = tmp->next; + } + while (NULL != tmp && NULL == elem); + } else { + /* directly compute index of meta segment (id) and relative offset (rel_offset) + * inside this segment for fast lookup a rank_meta_info object for the requested rank. */ + id = rcount/_max_meta_elems; + rel_offset = (rcount%_max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); + /* go through all existing meta segments for this namespace. + * Stop at id number if it exists. */ + while (NULL != tmp->next && 0 != id) { + tmp = tmp->next; + id--; + } + if (0 == id) { + /* the segment is found, looking for data for the target rank. */ + elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + rel_offset); + if ( 0 == elem->offset) { + /* offset can never be 0, it means that there is no data for this rank yet. */ + elem = NULL; + } + } + } + return elem; +} + +static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) +{ + /* it's claimed that there is still no meta info for this rank stored */ + seg_desc_t *tmp; + size_t num_elems, rel_offset; + int id, count; + rank_meta_info *cur_elem; + size_t rcount; + + if (!ns_info || !rinfo) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + + rcount = rinfo->rank == PMIX_RANK_WILDCARD ? 0 : rinfo->rank + 1; + + PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + "%s:%d:%s: nspace %s, add rank %lu offset %lu count %lu meta info", + __FILE__, __LINE__, __func__, + ns_info->ns_map.name, rinfo->rank, rinfo->offset, rinfo->count)); + + tmp = ns_info->meta_seg; + if (1 == _direct_mode) { + /* get the last meta segment to put new rank_meta_info at the end. */ + while (NULL != tmp->next) { + tmp = tmp->next; + } + num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); + if (_max_meta_elems <= num_elems) { + PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + "%s:%d:%s: extend meta segment for nspace %s", + __FILE__, __LINE__, __func__, ns_info->ns_map.name)); + /* extend meta segment, so create a new one */ + tmp = extend_segment(tmp, &ns_info->ns_map); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + ns_info->num_meta_seg++; + memset(tmp->seg_info.seg_base_addr, 0, sizeof(rank_meta_info)); + /* update number of meta segments for namespace in initial_segment */ + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + if (NULL == elem) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + if (ns_info->num_meta_seg != elem->num_meta_seg) { + elem->num_meta_seg = ns_info->num_meta_seg; + } + num_elems = 0; + } + cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) + num_elems * sizeof(rank_meta_info)); + memcpy(cur_elem, rinfo, sizeof(rank_meta_info)); + num_elems++; + memcpy(tmp->seg_info.seg_base_addr, &num_elems, sizeof(size_t)); + } else { + /* directly compute index of meta segment (id) and relative offset (rel_offset) + * inside this segment for fast lookup a rank_meta_info object for the requested rank. */ + + id = rcount/_max_meta_elems; + rel_offset = (rcount % _max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); + count = id; + /* go through all existing meta segments for this namespace. + * Stop at id number if it exists. */ + while (NULL != tmp->next && 0 != count) { + tmp = tmp->next; + count--; + } + /* if there is no segment with this id, then create all missing segments till the id number. */ + if ((int)ns_info->num_meta_seg < (id+1)) { + while ((int)ns_info->num_meta_seg != (id+1)) { + /* extend meta segment, so create a new one */ + tmp = extend_segment(tmp, &ns_info->ns_map); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + memset(tmp->seg_info.seg_base_addr, 0, sizeof(rank_meta_info)); + ns_info->num_meta_seg++; + } + /* update number of meta segments for namespace in initial_segment */ + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + if (NULL == elem) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + if (ns_info->num_meta_seg != elem->num_meta_seg) { + elem->num_meta_seg = ns_info->num_meta_seg; + } + } + /* store rank_meta_info object by rel_offset. */ + cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + rel_offset); + memcpy(cur_elem, rinfo, sizeof(rank_meta_info)); + } + return PMIX_SUCCESS; +} + +static uint8_t *_get_data_region_by_offset(seg_desc_t *segdesc, size_t offset) +{ + seg_desc_t *tmp = segdesc; + size_t rel_offset = offset; + uint8_t *dataaddr = NULL; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); + + /* go through all existing data segments for this namespace */ + do { + if (rel_offset >= _data_segment_size) { + rel_offset -= _data_segment_size; + } else { + dataaddr = tmp->seg_info.seg_base_addr + rel_offset; + } + tmp = tmp->next; + } while (NULL != tmp && NULL == dataaddr); + + return dataaddr; +} + +static size_t get_free_offset(seg_desc_t *data_seg) +{ + size_t offset; + seg_desc_t *tmp; + int id = 0; + tmp = data_seg; + /* first find the last data segment */ + while (NULL != tmp->next) { + tmp = tmp->next; + id++; + } + offset = *((size_t*)(tmp->seg_info.seg_base_addr)); + if (0 == offset) { + /* this is the first created data segment, the first 8 bytes are used to place the free offset value itself */ + offset = sizeof(size_t); + } + return (id * _data_segment_size + offset); +} + +static int put_empty_ext_slot(seg_desc_t *dataseg) +{ + size_t global_offset, rel_offset, data_ended, sz, val; + uint8_t *addr; + global_offset = get_free_offset(dataseg); + rel_offset = global_offset % _data_segment_size; + if (rel_offset + EXT_SLOT_SIZE(ESH_REGION_EXTENSION) > _data_segment_size) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + addr = _get_data_region_by_offset(dataseg, global_offset); + strncpy((char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1); + val = 0; + sz = sizeof(size_t); + memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1, &sz, sz); + memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), &val, sz); + + /* update offset at the beginning of current segment */ + data_ended = rel_offset + EXT_SLOT_SIZE(ESH_REGION_EXTENSION); + addr = (uint8_t*)(addr - rel_offset); + memcpy(addr, &data_ended, sizeof(size_t)); + return PMIX_SUCCESS; +} + +static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, char *key, void *buffer, size_t size) +{ + size_t offset; + seg_desc_t *tmp; + int id = 0; + size_t global_offset, data_ended; + uint8_t *addr; + size_t sz; + + PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + "%s:%d:%s: key %s", + __FILE__, __LINE__, __func__, key)); + + tmp = dataseg; + while (NULL != tmp->next) { + tmp = tmp->next; + id++; + } + global_offset = get_free_offset(dataseg); + offset = global_offset % _data_segment_size; + + /* We should provide additional space at the end of segment to place EXTENSION_SLOT to have an ability to enlarge data for this rank.*/ + if (sizeof(size_t) + KVAL_SIZE(key, size) + EXT_SLOT_SIZE(key) > _data_segment_size) { + /* this is an error case: segment is so small that cannot place evem a single key-value pair. + * warn a user about it and fail. */ + offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ + pmix_output(0, "PLEASE set NS_DATA_SEG_SIZE to value which is larger when %lu.", + sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + size + EXT_SLOT_SIZE(key)); + return offset; + } + if (offset + KVAL_SIZE(key, size) + EXT_SLOT_SIZE(key) > _data_segment_size) { + id++; + /* create a new data segment. */ + tmp = extend_segment(tmp, &ns_info->ns_map); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERROR); + offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ + return offset; + } + ns_info->num_data_seg++; + /* update_ns_info_in_initial_segment */ + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + if (NULL == elem) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + elem->num_data_seg++; + + offset = sizeof(size_t); + } + global_offset = offset + id * _data_segment_size; + addr = (uint8_t*)(tmp->seg_info.seg_base_addr)+offset; + strncpy((char *)addr, key, strlen(key)+1); + sz = size; + memcpy(addr + strlen(key) + 1, &sz, sizeof(size_t)); + memcpy(addr + strlen(key) + 1 + sizeof(size_t), buffer, size); + + /* update offset at the beginning of current segment */ + data_ended = offset + KVAL_SIZE(key, size); + addr = (uint8_t*)(tmp->seg_info.seg_base_addr); + memcpy(addr, &data_ended, sizeof(size_t)); + PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + "%s:%d:%s: key %s, rel start offset %lu, rel end offset %lu, abs shift %lu size %lu", + __FILE__, __LINE__, __func__, key, offset, data_ended, id * _data_segment_size, size)); + return global_offset; +} + +static int pmix_sm_store(ns_track_elem_t *ns_info, int rank, pmix_kval_t *kval, rank_meta_info **rinfo, int data_exist) +{ + size_t offset, size, kval_cnt; + pmix_buffer_t *buffer; + pmix_status_t rc; + seg_desc_t *datadesc; + uint8_t *addr; + + PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, + "%s:%d:%s: for rank %d, replace flag %d", + __FILE__, __LINE__, __func__, rank, data_exist)); + + datadesc = ns_info->data_seg; + /* pack value to the buffer */ + buffer = PMIX_NEW(pmix_buffer_t); + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(buffer, kval->value, 1, PMIX_VALUE))) { + PMIX_RELEASE(buffer); + PMIX_ERROR_LOG(rc); + return rc; + } + size = buffer->bytes_used; + + if (0 == data_exist) { + /* there is no data blob for this rank yet, so add it. */ + size_t free_offset; + free_offset = get_free_offset(datadesc); + offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer->base_ptr, size); + if (0 == offset) { + /* this is an error */ + PMIX_RELEASE(buffer); + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + /* if it's the first time when we put data for this rank, then *rinfo == NULL, + * and even if segment was extended, and data was put into the next segment, + * we don't need to extension slot at the end of previous segment. + * If we try, we might overwrite other segments memory, + * because previous segment is already full. */ + if (free_offset != offset && NULL != *rinfo) { + /* here we compare previous free offset with the offset where we just put data. + * It should be equal in the normal case. It it's not true, then it means that + * segment was extended, and we put data to the next segment, so we now need to + * put extension slot at the end of previous segment with a "reference" to a new_offset */ + size_t sz = sizeof(size_t); + addr = _get_data_region_by_offset(datadesc, free_offset); + strncpy((char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1); + memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1, &sz, sizeof(size_t)); + memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), &offset, sizeof(size_t)); + } + if (NULL == *rinfo) { + *rinfo = (rank_meta_info*)malloc(sizeof(rank_meta_info)); + (*rinfo)->rank = rank; + (*rinfo)->offset = offset; + (*rinfo)->count = 0; + } + (*rinfo)->count++; + } else if (NULL != *rinfo) { + /* there is data blob for this rank */ + addr = _get_data_region_by_offset(datadesc, (*rinfo)->offset); + if (NULL == addr) { + PMIX_RELEASE(buffer); + PMIX_ERROR_LOG(PMIX_ERROR); + return rc; + } + /* go through previous data region and find key matches. + * If one is found, then mark this kval as invalidated. + * Then put a new empty offset to the next extension slot, + * and add new kval by this offset. + * no need to update meta info, it's still the same. */ + kval_cnt = (*rinfo)->count; + int add_to_the_end = 1; + while (0 < kval_cnt) { + /* data is stored in the following format: + * key[PMIX_MAX_KEYLEN+1] + * size_t size + * byte buffer containing pmix_value, should be loaded to pmix_buffer_t and unpacked. + * next kval pair + * ..... + * extension slot which has key = EXTENSION_SLOT and a size_t value for offset to next data address for this process. + */ + if (0 == strncmp((const char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1)) { + memcpy(&offset, addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), sizeof(size_t)); + if (0 < offset) { + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %d, replace flag %d %s is filled with %lu value", + __FILE__, __LINE__, __func__, rank, data_exist, ESH_REGION_EXTENSION, offset)); + /* go to next item, updating address */ + addr = _get_data_region_by_offset(datadesc, offset); + if (NULL == addr) { + PMIX_RELEASE(buffer); + PMIX_ERROR_LOG(PMIX_ERROR); + return rc; + } + } else { + /* should not be, we should be out of cycle when this happens */ + } + } else if (0 == strncmp((const char *)addr, kval->key, strlen(kval->key)+1)) { + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %d, replace flag %d found target key %s", + __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); + /* target key is found, compare value sizes */ + size_t cur_size; + memcpy(&cur_size, addr + strlen(kval->key) + 1, sizeof(size_t)); + if (cur_size != size) { + //if (1) { /* if we want to test replacing values for existing keys. */ + /* invalidate current value and store another one at the end of data region. */ + strncpy((char *)addr, ESH_REGION_INVALIDATED, strlen(ESH_REGION_INVALIDATED)+1); + /* decrementing count, it will be incremented back when we add a new value for this key at the end of region. */ + (*rinfo)->count--; + kval_cnt--; + /* go to next item, updating address */ + addr += KVAL_SIZE(ESH_REGION_INVALIDATED, cur_size); + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %d, replace flag %d mark key %s regions as invalidated. put new data at the end.", + __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); + } else { + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %d, replace flag %d replace data for key %s type %d in place", + __FILE__, __LINE__, __func__, rank, data_exist, kval->key, kval->value->type)); + /* replace old data with new one. */ + addr += strlen(kval->key) + 1; + memcpy(addr, &size, sizeof(size_t)); + addr += sizeof(size_t); + memset(addr, 0, cur_size); + memcpy(addr, buffer->base_ptr, size); + addr += cur_size; + add_to_the_end = 0; + break; + } + } else { + char ckey[PMIX_MAX_KEYLEN+1] = {0}; + strncpy(ckey, (const char *)addr, strlen(addr)+1); + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %d, replace flag %d skip %s key, look for %s key", + __FILE__, __LINE__, __func__, rank, data_exist, ckey, kval->key)); + /* Skip it: key is "INVALIDATED" or key is valid but different from target one. */ + if (0 != strncmp(ESH_REGION_INVALIDATED, ckey, strlen(ckey)+1)) { + /* count only valid items */ + kval_cnt--; + } + size_t size; + memcpy(&size, addr + strlen(ckey) + 1, sizeof(size_t)); + /* go to next item, updating address */ + addr += KVAL_SIZE(ckey, size); + } + } + if (1 == add_to_the_end) { + /* if we get here, it means that we want to add a new item for the target rank, or + * we mark existing item with the same key as "invalidated" and want to add new item + * for the same key. */ + (*rinfo)->count++; + size_t free_offset; + free_offset = get_free_offset(datadesc); + /* add to the end */ + offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer->base_ptr, size); + if (0 == offset) { + PMIX_RELEASE(buffer); + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + /* we just reached the end of data for the target rank, and there can be two cases: + * (1) - we are in the middle of data segment; data for this rank is separated from + * data for different ranks, and that's why next element is EXTENSION_SLOT. + * We put new data to the end of data region and just update EXTENSION_SLOT value by new offset. + */ + if (0 == strncmp((const char *)addr, ESH_REGION_EXTENSION, strlen(ESH_REGION_EXTENSION)+1)) { + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %d, replace flag %d %s should be filled with offset %lu value", + __FILE__, __LINE__, __func__, rank, data_exist, ESH_REGION_EXTENSION, offset)); + memcpy(addr + strlen(ESH_REGION_EXTENSION) + 1 + sizeof(size_t), &offset, sizeof(size_t)); + } else { + /* (2) - we point to the first free offset, no more data is stored further in this segment. + * There is no EXTENSION_SLOT by this addr since we continue pushing data for the same rank, + * and there is no need to split it. + * But it's possible that we reached the end of current data region and just jumped to the new region + * to put new data, in that case free_offset != offset and we must put EXTENSION_SLOT by the current addr + * forcibly and store new offset in its value. */ + if (free_offset != offset) { + /* segment was extended, need to put extension slot by free_offset indicating new_offset */ + size_t sz = sizeof(size_t); + size_t length = strlen(ESH_REGION_EXTENSION); + strncpy((char *)addr, ESH_REGION_EXTENSION, length + 1); + memcpy(addr + length + 1, &sz, sz); + memcpy(addr + length + 1 + sizeof(size_t), &offset, sz); + } + } + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %d, replace flag %d item not found ext slot empty, put key %s to the end", + __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); + } + } + buffer->base_ptr = NULL; + buffer->bytes_used = 0; + PMIX_RELEASE(buffer); + return rc; +} + +static int _store_data_for_rank(ns_track_elem_t *ns_info, int rank, pmix_buffer_t *buf) +{ + pmix_status_t rc; + int32_t cnt; + + pmix_buffer_t *bptr; + pmix_kval_t *kp; + seg_desc_t *metadesc, *datadesc; + + rank_meta_info *rinfo = NULL; + size_t num_elems, free_offset, new_free_offset; + int data_exist; + + PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + "%s:%d:%s: for rank %d", __FILE__, __LINE__, __func__, rank)); + + metadesc = ns_info->meta_seg; + datadesc = ns_info->data_seg; + + if (NULL == datadesc || NULL == metadesc) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + + num_elems = *((size_t*)(metadesc->seg_info.seg_base_addr)); + data_exist = 0; + /* when we don't use linear search (_direct_mode ==0 ) we don't use num_elems field, + * so anyway try to get rank_meta_info first. */ + if (0 < num_elems || 0 == _direct_mode) { + /* go through all elements in meta segment and look for target rank. */ + rinfo = _get_rank_meta_info(rank, metadesc); + if (NULL != rinfo) { + data_exist = 1; + } + } + /* incoming buffer may contain several inner buffers for different scopes, + * so unpack these buffers, and then unpack kvals from each modex buffer, + * storing them in the shared memory dstore. + */ + cnt = 1; + free_offset = get_free_offset(datadesc); + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) { + cnt = 1; + kp = PMIX_NEW(pmix_kval_t); + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, kp, &cnt, PMIX_KVAL))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: unpacked key %s", kp->key); + if (PMIX_SUCCESS != (rc = pmix_sm_store(ns_info, rank, kp, &rinfo, data_exist))) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain + cnt = 1; + kp = PMIX_NEW(pmix_kval_t); + } + cnt = 1; + PMIX_RELEASE(kp); + PMIX_RELEASE(bptr); // free's the data region + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + break; + } + } + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + /* TODO: should we error-exit here? */ + } else { + rc = PMIX_SUCCESS; + } + + /* Check if new data was put at the end of data segment. + * It's possible that old data just was replaced with new one, + * in that case we don't reserve space for EXTENSION_SLOT, it's + * already reserved. + * */ + new_free_offset = get_free_offset(datadesc); + if (new_free_offset != free_offset) { + /* Reserve space for EXTENSION_SLOT at the end of data blob. + * We need it to split data for one rank from data for different + * ranks and to allow extending data further. + * We also put EXTENSION_SLOT at the end of each data segment, and + * its value points to the beginning of next data segment. + * */ + rc = put_empty_ext_slot(ns_info->data_seg); + if (PMIX_SUCCESS != rc) { + if (NULL != rinfo) { + free(rinfo); + } + PMIX_ERROR_LOG(rc); + return rc; + } + } + + /* if this is the first data posted for this rank, then + * update meta info for it */ + if (0 == data_exist) { + set_rank_meta_info(ns_info, rinfo); + if (NULL != rinfo) { + free(rinfo); + } + } + + return rc; +} + +static inline uint32_t _get_univ_size(const char *nspace) +{ + pmix_value_t *val = NULL; + uint32_t nprocs = 0; + pmix_nspace_t *ns, *nptr; + + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(nspace, ns->nspace)) { + nptr = ns; + break; + } + } + + if (nptr && (PMIX_SUCCESS == pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, PMIX_UNIV_SIZE, &val))) { + if (val->type == PMIX_UINT32) { + nprocs = val->data.uint32; + } + PMIX_VALUE_RELEASE(val); + } + + return nprocs; +} diff --git a/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.h b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.h new file mode 100644 index 00000000000..47ad97103c0 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/dstore/pmix_esh.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_DSTORE_ESH_H +#define PMIX_DSTORE_ESH_H + +#include +#include "src/class/pmix_value_array.h" +#include "pmix_dstore.h" +#include "src/sm/pmix_sm.h" + +BEGIN_C_DECLS + +#define INITIAL_SEG_SIZE 4096 +#define NS_META_SEG_SIZE (1<<22) +#define NS_DATA_SEG_SIZE (1<<22) + +#define PMIX_DSTORE_ESH_BASE_PATH "PMIX_DSTORE_ESH_BASE_PATH" + +/* this structs are used to store information about + * shared segments addresses locally at each process, + * so they are common for different types of segments + * and don't have a specific content (namespace's info, + * rank's meta info, ranks's data). */ + +typedef enum { + INITIAL_SEGMENT, + NS_META_SEGMENT, + NS_DATA_SEGMENT +} segment_type; + +typedef struct seg_desc_t seg_desc_t; +struct seg_desc_t { + segment_type type; + pmix_sm_seg_t seg_info; + uint32_t id; + seg_desc_t *next; +}; + +typedef struct ns_map_data_s ns_map_data_t; +typedef struct session_s session_t; +typedef struct ns_map_s ns_map_t; + +struct session_s { + int in_use; + uid_t jobuid; + char *nspace_path; + char *lockfile; + int lockfd; + seg_desc_t *sm_seg_first; + seg_desc_t *sm_seg_last; +}; + +struct ns_map_data_s { + char name[PMIX_MAX_NSLEN+1]; + size_t tbl_idx; + int track_idx; +}; + +struct ns_map_s { + int in_use; + ns_map_data_t data; +}; + +/* initial segment format: + * size_t num_elems; + * size_t full; //indicate to client that it needs to attach to the next segment + * ns_seg_info_t ns_seg_info[max_ns_num]; + */ + +typedef struct { + ns_map_data_t ns_map; + size_t num_meta_seg;/* read by clients to attach to this number of segments. */ + size_t num_data_seg; +} ns_seg_info_t; + +/* meta segment format: + * size_t num_elems; + * rank_meta_info meta_info[max_meta_elems]; + */ + +typedef struct { + size_t rank; + size_t offset; + size_t count; +} rank_meta_info; + +typedef struct { + pmix_value_array_t super; + ns_map_data_t ns_map; + size_t num_meta_seg; + size_t num_data_seg; + seg_desc_t *meta_seg; + seg_desc_t *data_seg; +} ns_track_elem_t; + +extern pmix_dstore_base_module_t pmix_dstore_esh_module; + +END_C_DECLS + +#endif /* PMIX_DSTORE_ESH_H */ diff --git a/opal/mca/pmix/pmix112/pmix/src/include/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/include/Makefile.am index 37552037246..d68c5d7cccd 100644 --- a/opal/mca/pmix/pmix112/pmix/src/include/Makefile.am +++ b/opal/mca/pmix/pmix112/pmix/src/include/Makefile.am @@ -23,7 +23,8 @@ # src/Makefile.am headers += \ - src/include/pmix_globals.h + src/include/pmix_globals.h \ + src/include/pmix_jobdata.h sources += \ src/include/pmix_globals.c diff --git a/opal/mca/pmix/pmix112/pmix/src/include/pmix_config.h b/opal/mca/pmix/pmix112/pmix/src/include/pmix_config.h index e614a332a3d..b612a55f7ee 100644 --- a/opal/mca/pmix/pmix112/pmix/src/include/pmix_config.h +++ b/opal/mca/pmix/pmix112/pmix/src/include/pmix_config.h @@ -13,6 +13,5 @@ #define PMIX_INCLUDE_CONFIG_H #include -#include #endif /* PMIX_INCLUDE_CONFIG_H */ diff --git a/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.c index db040c9aef0..c00cd152e2d 100644 --- a/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.c @@ -190,3 +190,18 @@ static void scdes(pmix_shift_caddy_t *p) PMIX_CLASS_INSTANCE(pmix_shift_caddy_t, pmix_object_t, scon, scdes); + +static void jdcon(pmix_job_data_caddy_t *p) +{ + p->nsptr = NULL; + p->job_data = NULL; + p->dstore_fn = NULL; + p->hstore_fn = NULL; +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + p->bufs = NULL; +#endif +} + +PMIX_CLASS_INSTANCE(pmix_job_data_caddy_t, + pmix_object_t, + jdcon, NULL); diff --git a/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.h index 3f17a8fce6d..6fc26f7c4f9 100644 --- a/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix112/pmix/src/include/pmix_globals.h @@ -35,6 +35,7 @@ #include "src/buffer_ops/types.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" +#include "src/class/pmix_value_array.h" BEGIN_C_DECLS @@ -271,6 +272,24 @@ PMIX_CLASS_DECLARATION(pmix_server_trkr_t); } pmix_shift_caddy_t; PMIX_CLASS_DECLARATION(pmix_shift_caddy_t); +typedef int (*pmix_store_dstor_cbfunc_t)(const char *nsname, + int rank, pmix_kval_t *kv); +typedef int (*pmix_store_hash_cbfunc_t)(pmix_hash_table_t *table, + int rank, pmix_kval_t *kv); + +typedef struct { + pmix_object_t super; + pmix_nspace_t *nsptr; + pmix_buffer_t *job_data; + pmix_store_dstor_cbfunc_t dstore_fn; + pmix_store_hash_cbfunc_t hstore_fn; +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + /* array of buffers per rank */ + pmix_value_array_t *bufs; +#endif +} pmix_job_data_caddy_t; +PMIX_CLASS_DECLARATION(pmix_job_data_caddy_t); + #define PMIX_THREADSHIFT(r, c) \ do { \ (r)->active = true; \ diff --git a/opal/mca/pmix/pmix112/pmix/src/include/pmix_jobdata.h b/opal/mca/pmix/pmix112/pmix/src/include/pmix_jobdata.h new file mode 100644 index 00000000000..f8a61a656ff --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/include/pmix_jobdata.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_JOBDATA_H +#define PMIX_JOBDATA_H + +#include + +#include "src/buffer_ops/buffer_ops.h" +#include "src/class/pmix_hash_table.h" + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +pmix_status_t pmix_job_data_dstore_store(const char *nspace, pmix_buffer_t *bptr); +#endif +pmix_status_t pmix_job_data_htable_store(const char *nspace, pmix_buffer_t *bptr); + +#endif // PMIX_JOBDATA_H diff --git a/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sasl.c b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sasl.c index 04a858d8adf..e2915187181 100644 --- a/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sasl.c +++ b/opal/mca/pmix/pmix112/pmix/src/sec/pmix_sasl.c @@ -1,98 +1,98 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2015 Artem Y. Polyakov . - * All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - -#include -#include -#include - -#include "src/include/pmix_globals.h" - -#ifdef HAVE_STRING_H -#include -#endif -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_SYS_UN_H -#include -#endif -#ifdef HAVE_SYS_UIO_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#include PMIX_EVENT_HEADER -#include - -#include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" -#include "src/util/argv.h" -#include "src/util/error.h" -#include "src/util/output.h" -#include "src/util/pmix_environ.h" -#include "src/util/progress_threads.h" -#include "src/usock/usock.h" - -#include "pmix_sasl.h" - -static int sasl_init(void); -static void sasl_finalize(void); -static int client_handshake(int sd); -static int server_handshake(pmix_peer_t *peer); - -pmix_sec_base_module_t pmix_sasl_module = { - "sasl", - sasl_init, - sasl_finalize, - NULL, - client_handshake, - NULL, - server_handshake -}; - - -static int sasl_init(void) -{ - pmix_output_verbose(2, pmix_globals.debug_output, - "sec: sasl init"); - - return PMIX_ERR_NOT_SUPPORTED; -} - -static void sasl_finalize(void) -{ - pmix_output_verbose(2, pmix_globals.debug_output, - "sec: sasl finalize"); -} - -static int client_handshake(int sd) -{ - return PMIX_ERR_NOT_SUPPORTED; -} - - -static int server_handshake(pmix_peer_t *peer) -{ - return PMIX_ERR_NOT_IMPLEMENTED; -} +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2015 Artem Y. Polyakov . + * All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#include +#include + +#include "src/include/pmix_globals.h" + +#ifdef HAVE_STRING_H +#include +#endif +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_SYS_UN_H +#include +#endif +#ifdef HAVE_SYS_UIO_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#include PMIX_EVENT_HEADER +#include + +#include "src/class/pmix_list.h" +#include "src/buffer_ops/buffer_ops.h" +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/util/pmix_environ.h" +#include "src/util/progress_threads.h" +#include "src/usock/usock.h" + +#include "pmix_sasl.h" + +static int sasl_init(void); +static void sasl_finalize(void); +static int client_handshake(int sd); +static int server_handshake(pmix_peer_t *peer); + +pmix_sec_base_module_t pmix_sasl_module = { + "sasl", + sasl_init, + sasl_finalize, + NULL, + client_handshake, + NULL, + server_handshake +}; + + +static int sasl_init(void) +{ + pmix_output_verbose(2, pmix_globals.debug_output, + "sec: sasl init"); + + return PMIX_ERR_NOT_SUPPORTED; +} + +static void sasl_finalize(void) +{ + pmix_output_verbose(2, pmix_globals.debug_output, + "sec: sasl finalize"); +} + +static int client_handshake(int sd) +{ + return PMIX_ERR_NOT_SUPPORTED; +} + + +static int server_handshake(pmix_peer_t *peer) +{ + return PMIX_ERR_NOT_IMPLEMENTED; +} diff --git a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c index a81df696e2d..e2a3c198120 100644 --- a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c @@ -24,6 +24,7 @@ #include #include #include "src/include/pmix_globals.h" +#include "src/include/pmix_jobdata.h" #ifdef HAVE_STRING_H #include @@ -55,6 +56,9 @@ #include "src/util/progress_threads.h" #include "src/usock/usock.h" #include "src/sec/pmix_sec.h" +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +#include "src/dstore/pmix_dstore.h" +#endif /* PMIX_ENABLE_DSTORE */ #include "pmix_server_ops.h" @@ -70,6 +74,7 @@ static char *mytmpdir = NULL; // local functions for connection support static void server_message_handler(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata); +static inline int _my_client(const char *nspace, int rank); typedef struct { pmix_object_t super; @@ -285,6 +290,12 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, return rc; } +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) { + return rc; + } +#endif /* PMIX_ENABLE_DSTORE */ + /* and the usock system */ pmix_usock_init(NULL); @@ -400,6 +411,10 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) pmix_usock_finalize(); +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + pmix_dstore_finalize(); +#endif /* PMIX_ENABLE_DSTORE */ + /* cleanup the rendezvous file */ unlink(myaddress.sun_path); @@ -422,9 +437,14 @@ static void _register_nspace(int sd, short args, void *cbdata) pmix_info_t *iptr; pmix_value_t val; char *msg; +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + pmix_buffer_t *jobdata = PMIX_NEW(pmix_buffer_t); + char *nspace = NULL; + int32_t cnt; +#endif pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:server _register_nspace"); + "pmix:server _register_nspace %s", cd->proc.nspace); /* see if we already have this nspace */ nptr = NULL; @@ -560,6 +580,27 @@ static void _register_nspace(int sd, short args, void *cbdata) } /* do not destruct the kv object - no memory leak will result */ +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS != (rc = pmix_dstore_nspace_add(cd->proc.nspace, cd->info, cd->ninfo))) { + PMIX_ERROR_LOG(rc); + goto release; + } + pmix_bfrop.copy_payload(jobdata, &nptr->server->job_info); + pmix_bfrop.copy_payload(jobdata, &pmix_server_globals.gdata); + + /* unpack the nspace - we don't really need it, but have to + * unpack it to maintain sequence */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(jobdata, &nspace, &cnt, PMIX_STRING))) { + PMIX_ERROR_LOG(rc); + goto release; + } + if (PMIX_SUCCESS != (rc = pmix_job_data_dstore_store(cd->proc.nspace, jobdata))) { + PMIX_ERROR_LOG(rc); + goto release; + } +#endif + release: if (NULL != nodes) { pmix_argv_free(nodes); @@ -570,6 +611,14 @@ static void _register_nspace(int sd, short args, void *cbdata) if (NULL != cd->opcbfunc) { cd->opcbfunc(rc, cd->cbdata); } +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (NULL != nspace) { + free(nspace); + } + if (NULL != jobdata) { + PMIX_RELEASE(jobdata); + } +#endif PMIX_RELEASE(cd); } @@ -603,6 +652,7 @@ static void _deregister_nspace(int sd, short args, void *cbdata) pmix_nspace_t *nptr; int i; pmix_peer_t *peer; + pmix_status_t rc = PMIX_SUCCESS; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _deregister_nspace %s", @@ -633,6 +683,15 @@ static void _deregister_nspace(int sd, short args, void *cbdata) } } +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (0 > (rc = pmix_dstore_nspace_del(cd->proc.nspace))) { + PMIX_ERROR_LOG(rc); + } +#endif + + if (NULL != cd->opcbfunc) { + cd->opcbfunc(rc, cd->cbdata); + } PMIX_RELEASE(cd); } @@ -944,6 +1003,7 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc) PMIX_EXPORT pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char ***env) { char rankstr[128]; + pmix_status_t rc = PMIX_SUCCESS; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server setup_fork for nspace %s rank %d", @@ -959,7 +1019,15 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char * /* pass our active security mode */ pmix_setenv("PMIX_SECURITY_MODE", security_mode, true, env); - return PMIX_SUCCESS; +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + /* pass dstore path to files */ + if (PMIX_SUCCESS != (rc = pmix_dstore_patch_env(proc->nspace, env))) { + PMIX_ERROR_LOG(rc); + return rc; + } +#endif + + return rc; } /*************************************************************************************************** @@ -1989,15 +2057,24 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) /* there may be multiple blobs for this rank, each from a different scope */ cnt = 1; while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, &bpscope, &cnt, PMIX_BUFFER))) { + /* don't store blobs to the sm dstore from local clients */ + if (_my_client(nptr->nspace, rank)) { + continue; + } pmix_kval_t *kp = PMIX_NEW(pmix_kval_t); kp->key = strdup("modex"); PMIX_VALUE_CREATE(kp->value, 1); kp->value->type = PMIX_BYTE_OBJECT; PMIX_UNLOAD_BUFFER(bpscope, kp->value->data.bo.bytes, kp->value->data.bo.size); /* store it in the appropriate hash */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->server->remote, rank, kp))) { + if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->server->remote, rank, kp))) { + PMIX_ERROR_LOG(rc); + } +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS != (rc = pmix_dstore_store(nptr->nspace, rank, kp))) { PMIX_ERROR_LOG(rc); } +#endif /* PMIX_ENABLE_DSTORE */ PMIX_RELEASE(kp); // maintain acctg } // while bpscope if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { @@ -2320,8 +2397,16 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_REQ_CMD == cmd) { reply = PMIX_NEW(pmix_buffer_t); +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + char *msg = peer->info->nptr->nspace; + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &msg, 1, PMIX_STRING))) { + PMIX_ERROR_LOG(rc); + return rc; + } +#else pmix_bfrop.copy_payload(reply, &(peer->info->nptr->server->job_info)); pmix_bfrop.copy_payload(reply, &(pmix_server_globals.gdata)); +#endif PMIX_SERVER_QUEUE_REPLY(peer, tag, reply); return PMIX_SUCCESS; } @@ -2472,3 +2557,21 @@ static void server_message_handler(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr PMIX_SERVER_QUEUE_REPLY(peer, hdr->tag, reply); } } + +static inline int _my_client(const char *nspace, int rank) +{ + pmix_peer_t *peer; + int i; + int local = 0; + + for (i = 0; i < pmix_server_globals.clients.size; i++) { + if (NULL != (peer = (pmix_peer_t *)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) { + if (0 == strcmp(peer->info->nptr->nspace, nspace) && peer->info->rank == rank) { + local = 1; + break; + } + } + } + + return local; +} diff --git a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_get.c index 515d5d0dd4c..8b83242dba3 100644 --- a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_get.c @@ -55,6 +55,10 @@ #include "src/usock/usock.h" #include "src/sec/pmix_sec.h" +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +#include "src/dstore/pmix_dstore.h" +#endif /* PMIX_ENABLE_DSTORE */ + #include "pmix_server_ops.h" extern pmix_server_module_t pmix_host_server; @@ -361,7 +365,7 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, int rank, size_t sz; int cur_rank; int found = 0; - pmix_buffer_t xfer, pbkt, *xptr; + pmix_buffer_t pbkt; void *last; pmix_hash_table_t *hts[3]; pmix_hash_table_t **htptr; @@ -410,6 +414,21 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, int rank, } while (PMIX_SUCCESS == rc) { if (NULL != val) { +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + pmix_kval_t *kv; + + /* setup to xfer the data */ + kv = PMIX_NEW(pmix_kval_t); + kv->key = strdup("modex"); + kv->value = (pmix_value_t *)malloc(sizeof(pmix_value_t)); + rc = pmix_value_xfer(kv->value, val); + if (PMIX_SUCCESS != (rc = pmix_dstore_store(nptr->nspace, cur_rank, kv))) { + PMIX_ERROR_LOG(rc); + } + PMIX_RELEASE(kv); +#else + pmix_buffer_t xfer, *xptr; + pmix_bfrop.pack(&pbkt, &cur_rank, 1, PMIX_INT); /* the client is expecting this to arrive as a byte object * containing a buffer, so package it accordingly */ @@ -420,6 +439,7 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, int rank, xfer.base_ptr = NULL; // protect the passed data xfer.bytes_used = 0; PMIX_DESTRUCT(&xfer); +#endif /* PMIX_ENABLE_DSTORE */ PMIX_VALUE_RELEASE(val); found++; } diff --git a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c index 9d7db586ce5..10ff4d704f9 100644 --- a/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c @@ -57,6 +57,11 @@ #include "pmix_server_ops.h" +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) +#include "src/dstore/pmix_dstore.h" +#endif /* PMIX_ENABLE_DSTORE */ + + pmix_server_module_t pmix_host_server = {0}; pmix_status_t pmix_server_abort(pmix_peer_t *peer, pmix_buffer_t *buf, @@ -169,13 +174,32 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) PMIX_ERROR_LOG(rc); return rc; } - /* see if we already have info for this proc */ - if (PMIX_SUCCESS == pmix_hash_fetch(ht, info->rank, "modex", &val) && NULL != val) { - /* create the new data storage */ + + /* create the new data storage */ + kp = PMIX_NEW(pmix_kval_t); + kp->key = strdup("modex"); + PMIX_VALUE_CREATE(kp->value, 1); + kp->value->type = PMIX_BYTE_OBJECT; + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + /* The local buffer must go directly the dstore */ + if( PMIX_LOCAL == scope ){ + /* need to deposit this in the dstore now */ + PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); + if (PMIX_SUCCESS != (rc = pmix_dstore_store(nptr->nspace, info->rank, kp))) { + PMIX_ERROR_LOG(rc); + } + PMIX_RELEASE(kp); + kp = PMIX_NEW(pmix_kval_t); kp->key = strdup("modex"); PMIX_VALUE_CREATE(kp->value, 1); kp->value->type = PMIX_BYTE_OBJECT; + } +#endif /* PMIX_ENABLE_DSTORE */ + + /* see if we already have info for this proc */ + if (PMIX_SUCCESS == pmix_hash_fetch(ht, info->rank, "modex", &val) && NULL != val) { /* get space for the new new data blob */ kp->value->data.bo.bytes = (char*)malloc(b2->bytes_used + val->data.bo.size); memcpy(kp->value->data.bo.bytes, val->data.bo.bytes, val->data.bo.size); @@ -183,25 +207,18 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) kp->value->data.bo.size = val->data.bo.size + b2->bytes_used; /* release the storage */ PMIX_VALUE_FREE(val, 1); - /* store it in the appropriate hash */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) { - PMIX_ERROR_LOG(rc); - } - PMIX_RELEASE(kp); // maintain acctg } else { - /* create a new kval to hold this data */ - kp = PMIX_NEW(pmix_kval_t); - kp->key = strdup("modex"); - PMIX_VALUE_CREATE(kp->value, 1); - kp->value->type = PMIX_BYTE_OBJECT; PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); - PMIX_RELEASE(b2); - /* store it in the appropriate hash */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) { - PMIX_ERROR_LOG(rc); - } - PMIX_RELEASE(kp); // maintain acctg } + + /* store it in the appropriate hash */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) { + PMIX_ERROR_LOG(rc); + } + /* maintain the accounting */ + PMIX_RELEASE(kp); + PMIX_RELEASE(b2); + cnt = 1; } if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { diff --git a/opal/mca/pmix/pmix112/pmix/src/sm/Makefile.am b/opal/mca/pmix/pmix112/pmix/src/sm/Makefile.am new file mode 100644 index 00000000000..476011224e1 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/sm/Makefile.am @@ -0,0 +1,17 @@ +# +# Copyright (c) 2016 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + src/sm/pmix_sm.h \ + src/sm/pmix_mmap.h + +sources += \ + src/sm/pmix_sm.c \ + src/sm/pmix_mmap.c diff --git a/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.c b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.c new file mode 100644 index 00000000000..a3c89afc885 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.c @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#include +#include +#include + +#include +#include "src/include/pmix_globals.h" +#include "src/util/output.h" + +#include "pmix_sm.h" +#include "pmix_mmap.h" + +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +# define MAP_ANONYMOUS MAP_ANON +#endif /* MAP_ANONYMOUS and MAP_ANON */ + + +static int _mmap_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size); +static int _mmap_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode); +static int _mmap_segment_detach(pmix_sm_seg_t *sm_seg); +static int _mmap_segment_unlink(pmix_sm_seg_t *sm_seg); + +pmix_sm_base_module_t pmix_sm_mmap_module = { + "mmap", + _mmap_segment_create, + _mmap_segment_attach, + _mmap_segment_detach, + _mmap_segment_unlink +}; + + +int _mmap_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size) +{ + int rc = PMIX_SUCCESS; + void *seg_addr = MAP_FAILED; + pid_t my_pid = getpid(); + + _segment_ds_reset(sm_seg); + /* enough space is available, so create the segment */ + if (-1 == (sm_seg->seg_id = open(file_name, O_CREAT | O_RDWR, 0600))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call open(2) fail\n"); + rc = PMIX_ERROR; + goto out; + } + /* size backing file - note the use of real_size here */ + if (0 != ftruncate(sm_seg->seg_id, size)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call ftruncate(2) fail\n"); + rc = PMIX_ERROR; + goto out; + } + if (MAP_FAILED == (seg_addr = mmap(NULL, size, + PROT_READ | PROT_WRITE, MAP_SHARED, + sm_seg->seg_id, 0))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call mmap(2) fail\n"); + rc = PMIX_ERROR; + goto out; + } + sm_seg->seg_cpid = my_pid; + sm_seg->seg_size = size; + sm_seg->seg_base_addr = (unsigned char *)seg_addr; + (void)strncpy(sm_seg->seg_name, file_name, PMIX_PATH_MAX - 1); + +out: + if (-1 != sm_seg->seg_id) { + if (0 != close(sm_seg->seg_id)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call close(2) fail\n"); + rc = PMIX_ERROR; + } + } + /* an error occured, so invalidate the shmem object and munmap if needed */ + if (PMIX_SUCCESS != rc) { + if (MAP_FAILED != seg_addr) { + munmap((void *)seg_addr, size); + } + _segment_ds_reset(sm_seg); + } + return rc; +} + +int _mmap_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode) +{ + mode_t mode = O_RDWR; + int mmap_prot = PROT_READ | PROT_WRITE; + + if (sm_mode == PMIX_SM_RONLY) { + mode = O_RDONLY; + mmap_prot = PROT_READ; + } + + if (-1 == (sm_seg->seg_id = open(sm_seg->seg_name, mode))) { + return PMIX_ERROR; + } + if (MAP_FAILED == (sm_seg->seg_base_addr = (unsigned char *) + mmap(NULL, sm_seg->seg_size, + mmap_prot, MAP_SHARED, + sm_seg->seg_id, 0))) { + /* mmap failed, so close the file and return NULL - no error check + * here because we are already in an error path... + */ + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call mmap(2) fail\n"); + close(sm_seg->seg_id); + return PMIX_ERROR; + } + /* all is well */ + /* if close fails here, that's okay. just let the user know and + * continue. if we got this far, open and mmap were successful... + */ + if (0 != close(sm_seg->seg_id)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call close(2) fail\n"); + } + sm_seg->seg_cpid = 0;/* FIXME */ + return PMIX_SUCCESS; +} + +int _mmap_segment_detach(pmix_sm_seg_t *sm_seg) +{ + int rc = PMIX_SUCCESS; + + if (0 != munmap((void *)sm_seg->seg_base_addr, sm_seg->seg_size)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call munmap(2) fail\n"); + rc = PMIX_ERROR; + } + /* reset the contents of the pmix_sm_seg_t associated with this + * shared memory segment. + */ + _segment_ds_reset(sm_seg); + return rc; +} + +int _mmap_segment_unlink(pmix_sm_seg_t *sm_seg) +{ + if (-1 == unlink(sm_seg->seg_name)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "sys call unlink(2) fail\n"); + return PMIX_ERROR; + } + + sm_seg->seg_id = PMIX_SHMEM_DS_ID_INVALID; + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.h b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.h new file mode 100644 index 00000000000..3c1af183fa5 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_mmap.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SM_MMAP_H +#define PMIX_SM_MMAP_H + +#include +#include "pmix_sm.h" + +BEGIN_C_DECLS + +extern pmix_sm_base_module_t pmix_sm_mmap_module; + +END_C_DECLS + +#endif /* PMIX_SM_MMAP_H */ diff --git a/opal/mca/pmix/pmix112/pmix/src/sm/pmix_sm.c b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_sm.c new file mode 100644 index 00000000000..6c6abcb04d8 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_sm.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include "src/include/pmix_globals.h" +#include "pmix_sm.h" +#include "pmix_mmap.h" + +/* + * Array of all possible SMs + */ + +/**** ENSURE THE FOLLOWING VALUE IS AT LEAST AS + **** LARGE AS THE TOTAL NUMBER OF SUPPORTED SPCs + **** IN THE ARRAY BELOW + */ + +static pmix_sm_base_module_t *all[] = { + &pmix_sm_mmap_module, + + /* Always end the array with a NULL */ + NULL +}; + +pmix_sm_base_module_t pmix_sm = {0}; + +int pmix_sm_init(void) +{ + pmix_sm = *all[0]; + return PMIX_SUCCESS; +} + +void pmix_sm_finalize(void) +{ + return ; +} + +int pmix_sm_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size) +{ + if (!pmix_sm.segment_create) { + return PMIX_ERR_NOT_SUPPORTED; + } + + return pmix_sm.segment_create(sm_seg, file_name, size); +} + +int pmix_sm_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode) +{ + if (!pmix_sm.segment_attach) { + return PMIX_ERR_NOT_SUPPORTED; + } + + return pmix_sm.segment_attach(sm_seg, sm_mode); +} + +int pmix_sm_segment_detach(pmix_sm_seg_t *sm_seg) +{ + if (!pmix_sm.segment_detach) { + return PMIX_ERR_NOT_SUPPORTED; + } + + return pmix_sm.segment_detach(sm_seg); +} + +int pmix_sm_segment_unlink(pmix_sm_seg_t *sm_seg) +{ + if (!pmix_sm.segment_unlink) { + return PMIX_ERR_NOT_SUPPORTED; + } + + return pmix_sm.segment_unlink(sm_seg); +} diff --git a/opal/mca/pmix/pmix112/pmix/src/sm/pmix_sm.h b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_sm.h new file mode 100644 index 00000000000..4efed6a2e16 --- /dev/null +++ b/opal/mca/pmix/pmix112/pmix/src/sm/pmix_sm.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SM_H +#define PMIX_SM_H + +#include +#include + +BEGIN_C_DECLS + +#if !defined(MAP_FAILED) +# define MAP_FAILED ((char*)-1) +#endif /* MAP_FAILED */ + +#define PMIX_SHMEM_DS_ID_INVALID -1 + +typedef enum { + PMIX_SM_RONLY, + PMIX_SM_RW +} pmix_sm_access_mode_t; + +typedef struct pmix_sm_seg_t { + /* pid of the shared memory segment creator */ + pid_t seg_cpid; + /* ds id */ + int seg_id; + /* size of shared memory segment */ + size_t seg_size; + /* base address of shared memory segment */ + unsigned char *seg_base_addr; + char seg_name[PMIX_PATH_MAX]; +} pmix_sm_seg_t; + +int pmix_sm_init(void); +void pmix_sm_finalize(void); +int pmix_sm_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size); +int pmix_sm_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode); +int pmix_sm_segment_detach(pmix_sm_seg_t *sm_seg); +int pmix_sm_segment_unlink(pmix_sm_seg_t *sm_seg); + +static inline void _segment_ds_reset(pmix_sm_seg_t *sm_seg) +{ + sm_seg->seg_cpid = 0; + sm_seg->seg_id = PMIX_SHMEM_DS_ID_INVALID; + sm_seg->seg_size = 0; + memset(sm_seg->seg_name, '\0', PMIX_PATH_MAX); + sm_seg->seg_base_addr = (unsigned char *)MAP_FAILED; +} + + +/** +* create a new shared memory segment and initialize members in structure +* pointed to by sm_seg. +* +* @param sm_seg pointer to pmix_sm_seg_t structure +* +* @param file_name unique string identifier that must be a valid, +* writable path (IN). +* +* @param size size of the shared memory segment. +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_sm_base_module_segment_create_fn_t)(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size); + +/** +* attach to an existing shared memory segment initialized by segment_create. +* +* @param sm_seg pointer to initialized pmix_sm_seg_t typedef'd +* structure (IN/OUT). +* +* @return base address of shared memory segment on success. returns +* NULL otherwise. +*/ +typedef int (*pmix_sm_base_module_segment_attach_fn_t)(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode); + +/** +* detach from an existing shared memory segment. +* +* @param sm_seg pointer to initialized pmix_sm_seg_t typedef'd structure +* (IN/OUT). +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_sm_base_module_segment_detach_fn_t)(pmix_sm_seg_t *sm_seg); + +/** +* unlink an existing shared memory segment. +* +* @param sm_seg pointer to initialized pmix_sm_seg_t typedef'd structure +* (IN/OUT). +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_sm_base_module_unlink_fn_t)(pmix_sm_seg_t *sm_seg); + + +/** +* structure for sm modules +*/ +typedef struct { + const char *name; + pmix_sm_base_module_segment_create_fn_t segment_create; + pmix_sm_base_module_segment_attach_fn_t segment_attach; + pmix_sm_base_module_segment_detach_fn_t segment_detach; + pmix_sm_base_module_unlink_fn_t segment_unlink; +} pmix_sm_base_module_t; + + +END_C_DECLS + +#endif /* PMIX_SM_H */ diff --git a/opal/mca/pmix/pmix112/pmix/src/usock/usock_sendrecv.c b/opal/mca/pmix/pmix112/pmix/src/usock/usock_sendrecv.c index 0f784ce1a54..18d7a0749f0 100644 --- a/opal/mca/pmix/pmix112/pmix/src/usock/usock_sendrecv.c +++ b/opal/mca/pmix/pmix112/pmix/src/usock/usock_sendrecv.c @@ -53,7 +53,6 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) pmix_server_trkr_t *trk; pmix_rank_info_t *rinfo, *rnext; pmix_trkr_caddy_t *tcd; - pmix_regevents_info_t *reginfoptr, *regnext; /* stop all events */ if (peer->recv_ev_active) { @@ -108,15 +107,7 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) /* do some cleanup as the client has left us */ pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); - /* remove all registered event handlers so libevent doesn't complain */ - PMIX_LIST_FOREACH_SAFE(reginfoptr, regnext, &pmix_server_globals.client_eventregs, pmix_regevents_info_t) { - if (reginfoptr->peer == peer) { - pmix_list_remove_item(&pmix_server_globals.client_eventregs, ®infoptr->super); - PMIX_RELEASE(reginfoptr); - break; - } - } - PMIX_RELEASE(peer); + PMIX_RELEASE(peer); } else { /* if I am a client, there is only * one connection we can have */ diff --git a/opal/mca/pmix/pmix112/pmix/src/util/error.c b/opal/mca/pmix/pmix112/pmix/src/util/error.c index 4da7542c34c..510eb38d2c2 100644 --- a/opal/mca/pmix/pmix112/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix112/pmix/src/util/error.c @@ -154,144 +154,28 @@ void pmix_errhandler_invoke(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) { - /* We need to parse thru each registered handler and determine - * which one to call for the specific error */ - int i, idflt; - size_t j; - bool fired = false; - pmix_error_reg_info_t *errreg, *errdflt=NULL; - pmix_info_t *iptr; - - PMIX_INFO_CREATE(iptr, ninfo+1); - (void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN); - iptr[0].value.type = PMIX_INT; - if (NULL != info) { - for (j=0; j < ninfo; j++) { - PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type); - } - } - - for (i = 0; i < pmix_globals.errregs.size; i++) { - if (NULL == (errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item(&pmix_globals.errregs, i))) { - continue; - } - if (NULL == errreg->info || 0 == errreg->ninfo) { - // this is a general err handler - we will call it if there is no better match - errdflt = errreg; - idflt = i; - continue; - } - iptr[0].value.data.integer = i; - /* match error name key first */ - for (j = 0; j < errreg->ninfo; j++) { - if ((0 == strcmp(errreg->info[j].key, PMIX_ERROR_NAME)) && - (status == errreg->info[j].value.data.int32)) { - iptr[0].value.data.integer = i; - errreg->errhandler(status, procs, nprocs, iptr, ninfo+1); - fired = true; - break; - } - } - } - - /* if nothing fired and we found a general err handler, then fire it */ - if (!fired && NULL != errdflt) { - iptr[0].value.data.integer = idflt; - errdflt->errhandler(status, procs, nprocs, iptr, ninfo+1); - } - /* cleanup */ - PMIX_INFO_FREE(iptr, ninfo+1); + return; } pmix_status_t pmix_lookup_errhandler(pmix_notification_fn_t err, int *index) { - int i; - pmix_error_reg_info_t *errreg; - pmix_status_t rc = PMIX_ERR_NOT_FOUND; - - for (i = 0; i < pmix_pointer_array_get_size(&pmix_globals.errregs) ; i++) { - errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, i); - if ((NULL != errreg) && (err == errreg->errhandler)) { - *index = i; - rc = PMIX_SUCCESS; - break; - } - } - return rc; + return PMIX_ERR_NOT_FOUND; } pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err, pmix_info_t *info, int ninfo, int *index) { - int i; - pmix_status_t rc = PMIX_SUCCESS; - pmix_error_reg_info_t *errreg; - - errreg = PMIX_NEW(pmix_error_reg_info_t); - errreg->errhandler = err; - errreg->ninfo = ninfo; - if (NULL != info && 0 < ninfo) { - PMIX_INFO_CREATE(errreg->info, ninfo); - for (i=0; i < ninfo; i++) { - (void)strncpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN); - pmix_value_xfer(&errreg->info[i].value, &info[i].value); - } - } - *index = pmix_pointer_array_add(&pmix_globals.errregs, errreg); - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_add_errhandler index =%d", *index); - if (*index < 0) { - PMIX_RELEASE(errreg); - rc = PMIX_ERROR; - } - return rc; + return PMIX_ERR_NOT_SUPPORTED; } pmix_status_t pmix_remove_errhandler(int errhandler_ref) { - int rc = PMIX_SUCCESS; - pmix_error_reg_info_t *errreg; - - errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, - errhandler_ref); - if (NULL != errreg) { - PMIX_RELEASE(errreg); - pmix_pointer_array_set_item(&pmix_globals.errregs, errhandler_ref, NULL); - } else { - rc = PMIX_ERR_NOT_FOUND; - } - return rc; + return PMIX_ERR_NOT_SUPPORTED; } void pmix_get_errorgroup(pmix_status_t status, char *pmix_error_group) { - switch(status) { - case PMIX_ERR_UNREACH: - case PMIX_ERR_COMM_FAILURE: - case PMIX_ERR_SERVER_NOT_AVAIL: - case PMIX_ERR_TIMEOUT: - case PMIX_ERR_PACK_FAILURE: - case PMIX_ERR_UNPACK_FAILURE: - (void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_COMM, PMIX_MAX_KEYLEN); - break; - case PMIX_ERR_OUT_OF_RESOURCE: - case PMIX_ERR_RESOURCE_BUSY: - case PMIX_ERR_NOMEM: - (void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_RESOURCE, PMIX_MAX_KEYLEN); - break; - case PMIX_ERR_PROC_MIGRATE: - case PMIX_ERR_PROC_CHECKPOINT: - case PMIX_ERR_PROC_RESTART: - (void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_MIGRATE, PMIX_MAX_KEYLEN); - break; - case PMIX_ERR_PROC_ABORTING: - case PMIX_ERR_PROC_REQUESTED_ABORT: - case PMIX_ERR_PROC_ABORTED: - (void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_ABORT, PMIX_MAX_KEYLEN); - break; - default: - (void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_GENERAL, PMIX_MAX_KEYLEN); - } + return; } diff --git a/opal/mca/pmix/pmix112/pmix/src/util/progress_threads.h b/opal/mca/pmix/pmix112/pmix/src/util/progress_threads.h index 3ead0348387..c7fec60fbcf 100644 --- a/opal/mca/pmix/pmix112/pmix/src/util/progress_threads.h +++ b/opal/mca/pmix/pmix112/pmix/src/util/progress_threads.h @@ -1,34 +1,34 @@ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef PROGRESS_THREADS_H -#define PROGRESS_THREADS_H - -#include - -#include - -#include PMIX_EVENT_HEADER - -/* start a progress thread, assigning it the provided name for - * tracking purposes. If create_block is true, then this function - * will also create a pipe so that libevent has something to block - * against, thus keeping the thread from free-running - */ -PMIX_DECLSPEC pmix_event_base_t* pmix_start_progress_thread(void); - -/* stop the progress thread of the provided name. This function will - * also cleanup the blocking pipes and release the event base if - * the cleanup param is true */ -PMIX_DECLSPEC void pmix_stop_progress_thread(pmix_event_base_t *ev_base); - -#endif +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PROGRESS_THREADS_H +#define PROGRESS_THREADS_H + +#include + +#include + +#include PMIX_EVENT_HEADER + +/* start a progress thread, assigning it the provided name for + * tracking purposes. If create_block is true, then this function + * will also create a pipe so that libevent has something to block + * against, thus keeping the thread from free-running + */ +PMIX_DECLSPEC pmix_event_base_t* pmix_start_progress_thread(void); + +/* stop the progress thread of the provided name. This function will + * also cleanup the blocking pipes and release the event base if + * the cleanup param is true */ +PMIX_DECLSPEC void pmix_stop_progress_thread(pmix_event_base_t *ev_base); + +#endif diff --git a/opal/mca/pmix/pmix112/pmix/test/pmix_test.c b/opal/mca/pmix/pmix112/pmix/test/pmix_test.c index 1e6c468cea6..768adbc2f94 100644 --- a/opal/mca/pmix/pmix112/pmix/test/pmix_test.c +++ b/opal/mca/pmix/pmix112/pmix/test/pmix_test.c @@ -35,6 +35,9 @@ #include "server_callbacks.h" #include "utils.h" +#include "src/include/pmix_globals.h" + +bool spawn_wait = false; int main(int argc, char **argv) { @@ -174,6 +177,10 @@ int main(int argc, char **argv) test_fail = 1; } + if (0 != params.test_spawn) { + PMIX_WAIT_FOR_COMPLETION(spawn_wait); + } + pmix_argv_free(client_argv); pmix_argv_free(client_env); diff --git a/opal/mca/pmix/pmix112/pmix/test/server_callbacks.c b/opal/mca/pmix/pmix112/pmix/test/server_callbacks.c index fd897292163..50bc6c625da 100644 --- a/opal/mca/pmix/pmix112/pmix/test/server_callbacks.c +++ b/opal/mca/pmix/pmix112/pmix/test/server_callbacks.c @@ -13,9 +13,12 @@ * */ +#include #include "server_callbacks.h" #include "src/util/argv.h" +extern int spawn_wait; + pmix_server_module_t mymodule = { connected, finalized, @@ -260,13 +263,27 @@ typedef struct { void *cbdata; } release_cbdata; -static void release_cb(pmix_status_t status, void *cbdata) + +static void * _release_cb(void *arg) { - release_cbdata *cb = (release_cbdata*)cbdata; + release_cbdata *cb = (release_cbdata*)arg; if (NULL != cb->cbfunc) { cb->cbfunc(cb->status, "foobar", cb->cbdata); } free(cb); + spawn_wait = false; + pthread_exit(NULL); +} + +static void release_cb(pmix_status_t status, void *cbdata) +{ + pthread_t thread; + + if (0 > pthread_create(&thread, NULL, _release_cb, cbdata)) { + spawn_wait = false; + return; + } + pthread_detach(thread); } pmix_status_t spawn_fn(const pmix_proc_t *proc, @@ -275,9 +292,12 @@ pmix_status_t spawn_fn(const pmix_proc_t *proc, pmix_spawn_cbfunc_t cbfunc, void *cbdata) { release_cbdata *cb = malloc(sizeof(release_cbdata)); + cb->status = PMIX_SUCCESS; cb->cbfunc = cbfunc; cb->cbdata = cbdata; + + spawn_wait = true; PMIx_server_register_nspace("foobar", napps, NULL, 0, release_cb, (void*)cb); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix112/pmix/test/test_cd.h b/opal/mca/pmix/pmix112/pmix/test/test_cd.h index d4e789102f2..4068cd1e599 100644 --- a/opal/mca/pmix/pmix112/pmix/test/test_cd.h +++ b/opal/mca/pmix/pmix112/pmix/test/test_cd.h @@ -1,17 +1,17 @@ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include - -#include "test_common.h" - -int test_connect_disconnect(char *my_nspace, int my_rank); -int test_cd_common(pmix_proc_t *procs, size_t nprocs, int blocking, int disconnect); +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include +#include + +#include "test_common.h" + +int test_connect_disconnect(char *my_nspace, int my_rank); +int test_cd_common(pmix_proc_t *procs, size_t nprocs, int blocking, int disconnect); diff --git a/opal/mca/pmix/pmix112/pmix/test/test_fence.c b/opal/mca/pmix/pmix112/pmix/test/test_fence.c index 57473451ebd..78a545a651b 100644 --- a/opal/mca/pmix/pmix112/pmix/test/test_fence.c +++ b/opal/mca/pmix/pmix112/pmix/test/test_fence.c @@ -313,7 +313,7 @@ int test_fence(test_params params, char *my_nspace, int my_rank) PMIX_LIST_DESTRUCT(&test_fences); return rc; } - GET(int, fence_num+p->proc.rank, p->proc.nspace, p->proc.rank, fence_num, put_ind++, params.use_same_keys, 0, 0); + GET(int, (int)(fence_num+p->proc.rank), p->proc.nspace, p->proc.rank, fence_num, put_ind++, params.use_same_keys, 0, 0); if (PMIX_SUCCESS != rc) { TEST_ERROR(("%s:%d: PMIx_Get failed (%d) from %s:%d", my_nspace, my_rank, rc, p->proc.nspace, p->proc.rank)); PMIX_PROC_FREE(pcs, npcs);