From 52d7e7e8a61f756017fd28bdc501c67aa3c315ca Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 9 Aug 2019 14:02:49 -0700 Subject: [PATCH] Update to PMIx v2.2.3rc2 Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/NEWS | 18 + opal/mca/pmix/pmix2x/pmix/VERSION | 10 +- .../pmix2x/pmix/config/c_get_alignment.m4 | 8 +- opal/mca/pmix/pmix2x/pmix/config/pmix.m4 | 8 +- .../config/pmix_check_compiler_version.m4 | 5 + .../pmix/pmix2x/pmix/config/pmix_check_icc.m4 | 7 +- .../pmix2x/pmix/config/pmix_check_psm2.m4 | 89 -- opal/mca/pmix/pmix2x/pmix/configure.ac | 3 +- opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec | 2 +- .../pmix2x/pmix/contrib/whitespace-purge.sh | 4 +- opal/mca/pmix/pmix2x/pmix/examples/client.c | 3 + opal/mca/pmix/pmix2x/pmix/examples/dynamic.c | 82 +- opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c | 18 +- .../pmix/src/client/pmix_client_fence.c | 16 +- .../pmix2x/pmix/src/client/pmix_client_get.c | 93 +- .../pmix/pmix2x/pmix/src/common/pmix_query.c | 43 +- .../pmix/src/event/pmix_event_notification.c | 2 +- .../pmix2x/pmix/src/include/pmix_globals.c | 1 + .../pmix2x/pmix/src/include/pmix_globals.h | 12 + .../mca/base/pmix_mca_base_component_find.c | 4 +- .../pmix/src/mca/base/pmix_mca_base_open.c | 4 +- .../pmix2x/pmix/src/mca/bfrops/v20/copy.c | 24 +- .../pmix2x/pmix/src/mca/gds/hash/gds_hash.c | 1179 +++++++++++++++-- .../pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c | 2 +- .../src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c | 2 +- .../src/mca/pif/linux_ipv6/pif_linux_ipv6.c | 13 +- opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h | 2 +- .../mca/pif/solaris_ipv6/pif_solaris_ipv6.c | 2 +- .../pmix2x/pmix/src/mca/pnet/opa/Makefile.am | 59 - .../pmix2x/pmix/src/mca/pnet/opa/configure.m4 | 110 -- .../pmix2x/pmix/src/mca/pnet/opa/pnet_opa.c | 240 ---- .../pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h | 27 - .../src/mca/pnet/opa/pnet_opa_component.c | 84 -- .../pmix/src/mca/pshmem/mmap/pshmem_mmap.c | 4 +- .../pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c | 4 +- .../pmix/src/mca/ptl/tcp/ptl_tcp_component.c | 32 +- .../src/mca/ptl/usock/ptl_usock_component.c | 2 +- .../pmix2x/pmix/src/runtime/pmix_finalize.c | 6 +- .../pmix/pmix2x/pmix/src/runtime/pmix_init.c | 6 +- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 33 +- .../pmix2x/pmix/src/server/pmix_server_get.c | 81 +- .../pmix2x/pmix/src/server/pmix_server_ops.c | 3 +- .../mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c | 8 +- opal/mca/pmix/pmix2x/pmix/src/util/name_fns.c | 44 +- opal/mca/pmix/pmix2x/pmix/src/util/output.c | 6 +- opal/mca/pmix/pmix2x/pmix/src/util/pif.h | 6 +- .../pmix/pmix2x/pmix/src/util/pmix_environ.c | 78 +- opal/mca/pmix/pmix2x/pmix/test/cli_stages.c | 2 +- opal/mca/pmix/pmix2x/pmix/test/pmi_client.c | 59 +- .../pmix/pmix2x/pmix/test/simple/Makefile.am | 2 +- .../pmix2x/pmix/test/simple/quietclient.c | 117 +- .../pmix/pmix2x/pmix/test/simple/simpclient.c | 128 +- .../pmix/pmix2x/pmix/test/simple/simpdie.c | 9 +- .../pmix/pmix2x/pmix/test/simple/simpdmodex.c | 76 +- .../pmix/pmix2x/pmix/test/simple/simpdyn.c | 35 +- .../mca/pmix/pmix2x/pmix/test/simple/simpft.c | 11 +- .../pmix/pmix2x/pmix/test/simple/simpjctrl.c | 13 +- .../pmix/pmix2x/pmix/test/simple/simplegacy.c | 248 +--- .../pmix/pmix2x/pmix/test/simple/simppub.c | 11 +- .../pmix/pmix2x/pmix/test/simple/simptest.c | 236 ++-- .../pmix2x/pmix/test/simple/simptimeout.c | 8 +- opal/mca/pmix/pmix2x/pmix/test/test_common.h | 6 +- opal/mca/pmix/pmix2x/pmix/test/test_error.c | 6 +- 63 files changed, 2023 insertions(+), 1433 deletions(-) delete mode 100644 opal/mca/pmix/pmix2x/pmix/config/pmix_check_psm2.m4 delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/Makefile.am delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/configure.m4 delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa_component.c diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index ad547b6a4a0..a2574826921 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -53,6 +53,24 @@ current release as well as the "stable" bug fix release branch. - PR #1311: Work around memory bug in older gcc compilers - PR #1329: Add -fPIC to static builds - PR #1334: Cache only -W CFLAG entries to fix 32-bit builds +- PR #1341: Do not use '==' in m4 test statements +- PR #1342: Fix if_linux_ipv6_open interface filter +- PR #1344: Remove unnecessary libtool init for c++ +- PR #1346: Fix incorrect pointer casts/deref +- PR #1347/#1348: Fix use of gethostname +- PR #1353/#1357: util/environ: use setenv() if available +- PR #1354: Plug a misc memory leak in the pmix_query_caddy_t destructor +- PR #1356: Fix another pointer cast/deref in test suite +- PR #1358: Implement support for class-based info arrays +- PR #1359: Plug misc minor memory leaks +- PR #1369: Fix legacy support for PMI-1 +- PR #1370: Cleanup handling of data requests for different nspaces +- PR #1193: Resolve get of proc-specific job-level info from another nspace +- PR #1377: Skip fastpath/dstore for NULL keys +- PR #1379: Change IF_NAMESIZE to PMIX_IF_NAMESIZE and set to safe size +- PR #1385: Check for EINVAL return from posix_fallocate +- PR #1389: Plug misc memory leaks in configure + 2.2.2 -- 24 Jan 2019 diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index fbb55b172c3..64da98814ea 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -24,14 +24,14 @@ release=3 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek=rc1 +greek=rc2 # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git8aac6645 +repo_rev=git9a14c877 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -45,7 +45,7 @@ tarball_version= # The date when this release was created -date="Jun 26, 2019" +date="Aug 09, 2019" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -76,8 +76,8 @@ date="Jun 26, 2019" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=3:22:1 -libpmi_so_version=1:0:0 +libpmix_so_version=3:23:1 +libpmi_so_version=1:1:0 libpmi2_so_version=1:0:0 # "Common" components install standalone libraries that are run-time diff --git a/opal/mca/pmix/pmix2x/pmix/config/c_get_alignment.m4 b/opal/mca/pmix/pmix2x/pmix/config/c_get_alignment.m4 index db379100994..6596c0ae88d 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/c_get_alignment.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/c_get_alignment.m4 @@ -11,9 +11,9 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -44,7 +44,9 @@ AC_DEFUN([PMIX_C_GET_ALIGNMENT],[ FILE *f=fopen("conftestval", "w"); if (!f) exit(1); diff = ((char *)&p->x) - ((char *)&p->c); + free(p); fprintf(f, "%d\n", (diff >= 0) ? diff : -diff); + fclose(f); ]])], [AS_TR_SH([pmix_cv_c_align_$1])=`cat conftestval`], [AC_MSG_WARN([*** Problem running configure test!]) AC_MSG_WARN([*** See config.log for details.]) diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index ec9109c1aad..db1b06ecbf5 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -191,7 +191,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ [Link the output PMIx library to this extra lib (used in embedded mode)])) AC_MSG_CHECKING([for extra lib]) AS_IF([test ! -z "$with_pmix_extra_lib"], - [AS_IF([test "$with_pmix_extra_lib" == "yes" || test "$with_pmix_extra_lib" == "no"], + [AS_IF([test "$with_pmix_extra_lib" = "yes" || test "$with_pmix_extra_lib" = "no"], [AC_MSG_RESULT([ERROR]) AC_MSG_WARN([Invalid value for --with-extra-pmix-lib:]) AC_MSG_WARN([ $with_pmix_extra_lib]) @@ -209,7 +209,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ [Link any embedded components/tools that require it to the provided libtool lib (used in embedded mode)])) AC_MSG_CHECKING([for extra ltlib]) AS_IF([test ! -z "$with_pmix_extra_ltlib"], - [AS_IF([test "$with_pmix_extra_ltlib" == "yes" || test "$with_pmix_extra_ltlib" == "no"], + [AS_IF([test "$with_pmix_extra_ltlib" = "yes" || test "$with_pmix_extra_ltlib" = "no"], [AC_MSG_RESULT([ERROR]) AC_MSG_WARN([Invalid value for --with-pmix-extra-ltlib:]) AC_MSG_WARN([ $with_pmix_extra_ltlib]) @@ -663,7 +663,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # -lrt might be needed for clock_gettime PMIX_SEARCH_LIBS_CORE([clock_gettime], [rt]) - AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp]) + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp setpgid ptsname openpty setenv]) # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get # confused. On others, it's in the standard library, but stubbed with @@ -1162,7 +1162,7 @@ AC_MSG_CHECKING([if want to support dlopen of non-global namespaces]) AC_ARG_ENABLE([nonglobal-dlopen], AC_HELP_STRING([--enable-nonglobal-dlopen], [enable non-global dlopen (default: enabled)])) -if test "$enable_nonglobal_dlopen" == "no"; then +if test "$enable_nonglobal_dlopen" = "no"; then AC_MSG_RESULT([no]) pmix_need_libpmix=0 else diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_check_compiler_version.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_check_compiler_version.m4 index f918d7b590b..da822b04810 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_check_compiler_version.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_check_compiler_version.m4 @@ -2,6 +2,8 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl dnl $COPYRIGHT$ dnl @@ -43,6 +45,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%d", PLATFORM_COMPILER_$1); + fclose(f); return 0; } ], [ @@ -75,6 +78,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%s", PLATFORM_COMPILER_$1); + fclose(f); return 0; } ], [ @@ -110,6 +114,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%s", _STRINGIFY(PLATFORM_COMPILER_$1)); + fclose(f); return 0; } ], [ diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_check_icc.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_check_icc.m4 index e8a06b25148..05ce9431bd3 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_check_icc.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_check_icc.m4 @@ -10,9 +10,9 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2016 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -45,6 +45,7 @@ int main () func (4711, "Help %d [%s]\n", 10, "ten"); f=fopen ("conftestval", "w"); if (!f) exit (1); + fclose(f); return 0; } diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_check_psm2.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_check_psm2.m4 deleted file mode 100644 index b2c291fae49..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_check_psm2.m4 +++ /dev/null @@ -1,89 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006 QLogic Corp. All rights reserved. -# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# Copyright (c) 2016 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# PMIX_CHECK_PSM2(prefix, [action-if-found], [action-if-not-found]) -# -------------------------------------------------------- -# check if PSM2 support can be found. sets prefix_{CPPFLAGS, -# LDFLAGS, LIBS} as needed and runs action-if-found if there is -# support, otherwise executes action-if-not-found -AC_DEFUN([PMIX_CHECK_PSM2],[ - if test -z "$pmix_check_psm2_happy" ; then - AC_ARG_WITH([psm2], - [AC_HELP_STRING([--with-psm2(=DIR)], - [Build PSM2 (Intel PSM2) support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) - PMIX_CHECK_WITHDIR([psm2], [$with_psm2], [include/psm2.h]) - AC_ARG_WITH([psm2-libdir], - [AC_HELP_STRING([--with-psm2-libdir=DIR], - [Search for PSM (Intel PSM2) libraries in DIR])]) - PMIX_CHECK_WITHDIR([psm2-libdir], [$with_psm2_libdir], [libpsm2.*]) - - pmix_check_psm2_$1_save_CPPFLAGS="$CPPFLAGS" - pmix_check_psm2_$1_save_LDFLAGS="$LDFLAGS" - pmix_check_psm2_$1_save_LIBS="$LIBS" - - AS_IF([test "$with_psm2" != "no"], - [AS_IF([test ! -z "$with_psm2" && test "$with_psm2" != "yes"], - [pmix_check_psm2_dir="$with_psm2"]) - AS_IF([test ! -z "$with_psm2_libdir" && test "$with_psm2_libdir" != "yes"], - [pmix_check_psm2_libdir="$with_psm2_libdir"]) - - PMIX_CHECK_PACKAGE([pmix_check_psm2], - [psm2.h], - [psm2], - [psm2_mq_irecv2], - [], - [$pmix_check_psm2_dir], - [$pmix_check_psm2_libdir], - [pmix_check_psm2_happy="yes"], - [pmix_check_psm2_happy="no"])], - [pmix_check_psm2_happy="no"]) - - CPPFLAGS="$pmix_check_psm2_$1_save_CPPFLAGS" - LDFLAGS="$pmix_check_psm2_$1_save_LDFLAGS" - LIBS="$pmix_check_psm2_$1_save_LIBS" - - AS_IF([test "$pmix_check_psm2_happy" = "yes" && test "$enable_progress_threads" = "yes"], - [AC_MSG_WARN([PSM2 driver does not currently support progress threads. Disabling MTL.]) - pmix_check_psm2_happy="no"]) - - AS_IF([test "$pmix_check_psm2_happy" = "yes"], - [AC_CHECK_HEADERS( - glob.h, - [], - [AC_MSG_WARN([glob.h not found. Can not build component.]) - pmix_check_psm2_happy="no"])]) - - fi - - AS_IF([test "$pmix_check_psm2_happy" = "yes"], - [$1_LDFLAGS="[$]$1_LDFLAGS $pmix_check_psm2_LDFLAGS" - $1_CPPFLAGS="[$]$1_CPPFLAGS $pmix_check_psm2_CPPFLAGS" - $1_LIBS="[$]$1_LIBS $pmix_check_psm2_LIBS" - $2], - [AS_IF([test ! -z "$with_psm2" && test "$with_psm2" != "no"], - [AC_MSG_ERROR([PSM2 support requested but not found. Aborting])]) - $3]) -]) diff --git a/opal/mca/pmix/pmix2x/pmix/configure.ac b/opal/mca/pmix/pmix2x/pmix/configure.ac index ccb2a4aae95..f525f06b996 100644 --- a/opal/mca/pmix/pmix2x/pmix/configure.ac +++ b/opal/mca/pmix/pmix2x/pmix/configure.ac @@ -171,7 +171,7 @@ LT_PREREQ([2.2.6]) pmix_enable_shared="$enable_shared" pmix_enable_static="$enable_static" -AS_IF([test ! -z "$enable_static" && test "$enable_static" == "yes"], +AS_IF([test ! -z "$enable_static" && test "$enable_static" = "yes"], [CFLAGS="$CFLAGS -fPIC"]) AM_ENABLE_SHARED @@ -201,7 +201,6 @@ AS_IF([test "$pmix_debug" = "1"], LT_INIT() LT_LANG([C]) -LT_LANG([C++]) ############################################################################ # Setup the core diff --git a/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec b/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec index 0652e442570..56860482340 100644 --- a/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec +++ b/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec @@ -192,7 +192,7 @@ Summary: An extended/exascale implementation of PMI Name: %{?_name:%{_name}}%{!?_name:pmix} -Version: 2.2.3rc1 +Version: 2.2.3rc2 Release: 1%{?dist} License: BSD Group: Development/Libraries diff --git a/opal/mca/pmix/pmix2x/pmix/contrib/whitespace-purge.sh b/opal/mca/pmix/pmix2x/pmix/contrib/whitespace-purge.sh index 9c9d8fe909e..905796bc1ef 100755 --- a/opal/mca/pmix/pmix2x/pmix/contrib/whitespace-purge.sh +++ b/opal/mca/pmix/pmix2x/pmix/contrib/whitespace-purge.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved # Copyright (c) 2015 Cisco Systems, Inc. @@ -18,7 +18,7 @@ for file in $(git ls-files) ; do # skip sym links, pdfs, etc. If any other file types should be # skipped add the check here. type=$(file -b --mime-type -h $file) - if test ${type::4} == "text" ; then + if test ${type::4} = "text" ; then # Eliminate whitespace at the end of lines perl -pi -e 's/\s*$/\n/' $file fi diff --git a/opal/mca/pmix/pmix2x/pmix/examples/client.c b/opal/mca/pmix/pmix2x/pmix/examples/client.c index 519ef649d62..49e471fb258 100644 --- a/opal/mca/pmix/pmix2x/pmix/examples/client.c +++ b/opal/mca/pmix/pmix2x/pmix/examples/client.c @@ -254,6 +254,9 @@ int main(int argc, char **argv) fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } + if (0 == myproc.rank) { + sleep(2); + } /* call fence to synchronize with our peers - instruct * the fence operation to collect and return all "put" diff --git a/opal/mca/pmix/pmix2x/pmix/examples/dynamic.c b/opal/mca/pmix/pmix2x/pmix/examples/dynamic.c index d7c0d3701ff..6b929420b7e 100644 --- a/opal/mca/pmix/pmix2x/pmix/examples/dynamic.c +++ b/opal/mca/pmix/pmix2x/pmix/examples/dynamic.c @@ -49,9 +49,7 @@ int main(int argc, char **argv) char nsp2[PMIX_MAX_NSLEN+1]; pmix_app_t *app; char hostname[1024], dir[1024]; - pmix_proc_t *peers; - size_t npeers, ntmp=0; - char *nodelist; + size_t ntmp=0; if (0 > gethostname(hostname, sizeof(hostname))) { exit(1); @@ -71,14 +69,14 @@ int main(int argc, char **argv) (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + /* get our job size */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); + fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); @@ -103,13 +101,6 @@ int main(int argc, char **argv) app->env = (char**)malloc(2 * sizeof(char*)); app->env[0] = strdup("PMIX_ENV_VALUE=3"); app->env[1] = NULL; - PMIX_INFO_CREATE(app->info, 2); - (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); - app->info[0].value.type = PMIX_INT8; - app->info[0].value.data.int8 = 12; - (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); - app->info[1].value.type = PMIX_DOUBLE; - app->info[1].value.data.dval = 12.34; fprintf(stderr, "Client ns %s rank %d: calling PMIx_Spawn\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { @@ -122,65 +113,28 @@ int main(int argc, char **argv) val = NULL; (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val)) || NULL == val) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe %s size %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); - } - - /* just cycle the connect/disconnect functions */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Connect(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Connect failed: %d\n", myproc.nspace, myproc.rank, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Connect succeeded\n", - myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Disconnect(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Disonnect failed: %d\n", myproc.nspace, myproc.rank, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Disconnect succeeded\n", myproc.nspace, myproc.rank); + fprintf(stderr, "Client %s:%d job %s size %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); - /* finally, test the resolve functions */ - if (0 == myproc.rank) { - if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, NULL, &peers, &npeers))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d\n", myproc.nspace, myproc.rank, nsp2, rc); - goto done; - } - if ((nprocs+ntmp) != npeers) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d\n", myproc.nspace, myproc.rank, (int)(nprocs+ntmp), (int)npeers); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers\n", myproc.nspace, myproc.rank, (int)npeers); - if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(nsp2, &nodelist))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes failed for nspace %s: %d\n", myproc.nspace, myproc.rank, nsp2, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); - } else { - if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, myproc.nspace, &peers, &npeers))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d\n", myproc.nspace, myproc.rank, myproc.nspace, rc); - goto done; - } - if (nprocs != npeers) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d\n", myproc.nspace, myproc.rank, nprocs, (int)npeers); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers\n", myproc.nspace, myproc.rank, (int)npeers); - if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(myproc.nspace, &nodelist))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes failed: %d\n", myproc.nspace, myproc.rank, rc); + /* get a proc-specific value */ + val = NULL; + (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); + proc.rank = 1; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_RANK, NULL, 0, &val)) || + NULL == val) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get local rank failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes %s\n", myproc.nspace, myproc.rank, nodelist); + ntmp = (int)val->data.uint16; + PMIX_VALUE_RELEASE(val); + fprintf(stderr, "Client %s:%d job %s local rank %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } - PMIX_PROC_FREE(peers, npeers); - free(nodelist); done: /* call fence to sync */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c index ac7ce80d5df..56774e4fb24 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -85,7 +85,7 @@ PMIX_EXPORT int PMI_Init(int *spawned) /* getting internal key requires special rank value */ memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_UNDEF; + proc.rank = PMIX_RANK_WILDCARD; /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup @@ -394,8 +394,6 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; - pmix_proc_t proc = myproc; - proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -414,11 +412,11 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); - rc = PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val); + rc = PMIx_Get(&myproc, PMIX_APPNUM, info, 1, &val); if (PMIX_SUCCESS == rc) { rc = convert_int(appnum, val); PMIX_VALUE_RELEASE(val); - } else if( PMIX_ERR_NOT_FOUND == rc ){ + } else { /* this is optional value, set to 0 */ *appnum = 0; rc = PMIX_SUCCESS; @@ -445,7 +443,7 @@ PMIX_EXPORT int PMI_Publish_name(const char service_name[], const char port[]) } /* pass the service/port */ - pmix_strncpy(info.key, service_name, PMIX_MAX_KEYLEN); + pmix_strncpy(info.key, service_name, PMIX_MAX_KEYLEN); info.value.type = PMIX_STRING; info.value.data.string = (char*) port; @@ -497,7 +495,7 @@ PMIX_EXPORT int PMI_Lookup_name(const char service_name[], char port[]) PMIX_PDATA_CONSTRUCT(&pdata); /* pass the service */ - pmix_strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN); + pmix_strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN); /* PMI-1 doesn't want the nspace back */ if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) { @@ -514,7 +512,7 @@ PMIX_EXPORT int PMI_Lookup_name(const char service_name[], char port[]) * potential we could overrun it. As this feature * isn't widely supported in PMI-1, try being * conservative */ - pmix_strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN); + pmix_strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN); PMIX_PDATA_DESTRUCT(&pdata); return PMIX_SUCCESS; diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c index 342305ea206..adac9bbf4d3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -73,7 +73,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, PMIX_ACQUIRE_THREAD(&pmix_global_lock); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: executing fence"); if (pmix_globals.init_cntr <= 0) { @@ -106,7 +106,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, rc = cb->status; PMIX_RELEASE(cb); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence released"); return rc; @@ -125,7 +125,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs PMIX_ACQUIRE_THREAD(&pmix_global_lock); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence_nb called"); if (pmix_globals.init_cntr <= 0) { @@ -185,7 +185,7 @@ static pmix_status_t unpack_return(pmix_buffer_t *data) pmix_status_t ret; int32_t cnt; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "client:unpack fence called"); /* unpack the status code */ @@ -196,7 +196,7 @@ static pmix_status_t unpack_return(pmix_buffer_t *data) PMIX_ERROR_LOG(rc); return rc; } - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "client:unpack fence received status %d", ret); return ret; } @@ -255,7 +255,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence_nb callback recvd"); if (NULL == cb) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index ac3c950bb87..48cee715afe 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -57,6 +57,7 @@ #include "src/util/compress.h" #include "src/util/error.h" #include "src/util/hash.h" +#include "src/util/name_fns.h" #include "src/util/output.h" #include "src/mca/gds/gds.h" #include "src/mca/ptl/ptl.h" @@ -99,14 +100,15 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_client_globals.get_output, - "pmix:client get for %s:%d key %s", - (NULL == proc) ? "NULL" : proc->nspace, - (NULL == proc) ? PMIX_RANK_UNDEF : proc->rank, + "pmix:client get for %s key %s", + (NULL == proc) ? "NULL" : PMIX_NAME_PRINT(proc), (NULL == key) ? "NULL" : key); /* try to get data directly, without threadshift */ - if (PMIX_SUCCESS == (rc = _getfn_fastpath(proc, key, info, ninfo, val))) { - goto done; + if (PMIX_RANK_UNDEF != proc->rank && NULL != key) { + if (PMIX_SUCCESS == (rc = _getfn_fastpath(proc, key, info, ninfo, val))) { + goto done; + } } /* create a callback object as we need to pass it to the @@ -329,9 +331,14 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, } if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); goto done; } - PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_client_globals.myserver, buf); + if (PMIX_RANK_UNDEF == proc.rank) { + PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_globals.mypeer, buf); + } else { + PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_client_globals.myserver, buf); + } if (PMIX_SUCCESS != rc) { goto done; } @@ -350,7 +357,11 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, /* fetch the data from server peer module - since it is passing * it back to the user, we need a copy of it */ cb->copy = true; - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_RANK_UNDEF == proc.rank) { + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + } else { + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + } if (PMIX_SUCCESS == rc) { if (1 != pmix_list_get_size(&cb->kvs)) { rc = PMIX_ERR_INVALID_VAL; @@ -496,9 +507,15 @@ static pmix_status_t _getfn_fastpath(const pmix_proc_t *proc, const pmix_key_t k /* scan the incoming directives */ if (NULL != info) { for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_DATA_SCOPE, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_DATA_SCOPE)) { cb->scope = info[n].value.data.scope; - break; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_OPTIONAL) || + PMIX_CHECK_KEY(&info[n], PMIX_IMMEDIATE)) { + continue; + } else { + /* we cannot handle any other directives via this path */ + PMIX_RELEASE(cb); + return PMIX_ERR_NOT_SUPPORTED; } } } @@ -508,16 +525,16 @@ static pmix_status_t _getfn_fastpath(const pmix_proc_t *proc, const pmix_key_t k cb->info = (pmix_info_t*)info; cb->ninfo = ninfo; - PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_globals.mypeer); + PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_client_globals.myserver); if (PMIX_SUCCESS == rc) { - PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); if (PMIX_SUCCESS == rc) { goto done; } } - PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_client_globals.myserver); + PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_globals.mypeer); if (PMIX_SUCCESS == rc) { - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); if (PMIX_SUCCESS == rc) { goto done; } @@ -551,23 +568,23 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* cb was passed to us from another thread - acquire it */ PMIX_ACQUIRE_OBJECT(cb); - pmix_output_verbose(2, pmix_client_globals.get_output, - "pmix: getnbfn value for proc %s:%u key %s", - cb->pname.nspace, cb->pname.rank, - (NULL == cb->key) ? "NULL" : cb->key); - /* set the proc object identifier */ pmix_strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); proc.rank = cb->pname.rank; + pmix_output_verbose(2, pmix_client_globals.get_output, + "pmix: getnbfn value for proc %s key %s", + PMIX_NAME_PRINT(&proc), + (NULL == cb->key) ? "NULL" : cb->key); + /* scan the incoming directives */ if (NULL != cb->info) { for (n=0; n < cb->ninfo; n++) { - if (0 == strncmp(cb->info[n].key, PMIX_OPTIONAL, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&cb->info[n], PMIX_OPTIONAL)) { optional = PMIX_INFO_TRUE(&cb->info[n]); - } else if (0 == strncmp(cb->info[n].key, PMIX_IMMEDIATE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_IMMEDIATE)) { immediate = PMIX_INFO_TRUE(&cb->info[n]); - } else if (0 == strncmp(cb->info[n].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_TIMEOUT)) { /* set a timer to kick us out if we don't * have an answer within their window */ if (0 < cb->info[n].value.data.integer) { @@ -578,8 +595,16 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_event_evtimer_add(&cb->ev, &tv); cb->timer_running = true; } - } else if (0 == strncmp(cb->info[n].key, PMIX_DATA_SCOPE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_DATA_SCOPE)) { cb->scope = cb->info[n].value.data.scope; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_SESSION_INFO)) { + cb->level = PMIX_LEVEL_SESSION; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_JOB_INFO)) { + cb->level = PMIX_LEVEL_JOB; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_APP_INFO)) { + cb->level = PMIX_LEVEL_APP; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_NODE_INFO)) { + cb->level = PMIX_LEVEL_NODE; } } } @@ -604,7 +629,13 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* fetch the data from my server's module - since we are passing * it back to the user, we need a copy of it */ cb->copy = true; - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + /* if the peer and server GDS component are the same, then no + * point in trying it again */ + if (0 != strcmp(pmix_globals.mypeer->nptr->compat.gds->name, pmix_client_globals.myserver->nptr->compat.gds->name)) { + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + } else { + rc = PMIX_ERR_TAKE_NEXT_OPTION; + } if (PMIX_SUCCESS != rc) { pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client job-level data NOT found"); @@ -653,7 +684,17 @@ static void _getnbfn(int fd, short flags, void *cbdata) "pmix:client job-level data NOT found"); rc = process_values(&val, cb); goto respond; + } else if (PMIX_RANK_UNDEF == proc.rank) { + /* the data would have to be stored on our own peer, so + * we need to go request it */ + goto request; } else { + /* if the peer and server GDS component are the same, then no + * point in trying it again */ + if (0 == strcmp(pmix_globals.mypeer->nptr->compat.gds->name, pmix_client_globals.myserver->nptr->compat.gds->name)) { + val = NULL; + goto request; + } cb->proc = &proc; cb->copy = true; PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); @@ -722,9 +763,9 @@ static void _getnbfn(int fd, short flags, void *cbdata) } pmix_output_verbose(2, pmix_client_globals.get_output, - "%s:%d REQUESTING DATA FROM SERVER FOR %s:%d KEY %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, - cb->pname.nspace, cb->pname.rank, cb->key); + "%s REQUESTING DATA FROM SERVER FOR %s KEY %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(cb->proc), cb->key); /* track the callback object */ pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c index f5223b83b5d..ba1e73a1d24 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c @@ -144,6 +144,7 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque pmix_list_t results; pmix_kval_t *kv, *kvnxt; pmix_proc_t proc; + bool rank_given; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -181,37 +182,41 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_PROCID)) { PMIX_LOAD_NSPACE(proc.nspace, queries[n].qualifiers[p].value.data.proc->nspace); proc.rank = queries[n].qualifiers[p].value.data.proc->rank; + rank_given = true; } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_NSPACE)) { PMIX_LOAD_NSPACE(proc.nspace, queries[n].qualifiers[p].value.data.string); } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_RANK)) { proc.rank = queries[n].qualifiers[p].value.data.rank; - } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_HOSTNAME)) { - if (0 != strcmp(queries[n].qualifiers[p].value.data.string, pmix_globals.hostname)) { - /* asking about a different host, so ask for the info */ - PMIX_LIST_DESTRUCT(&results); - goto query; - } + rank_given = true; } } /* we get here if a refresh isn't required - first try a local * "get" on the data to see if we already have it */ PMIX_CONSTRUCT(&cb, pmix_cb_t); cb.copy = false; - /* set the proc */ - if (PMIX_RANK_INVALID == proc.rank && - 0 == strlen(proc.nspace)) { - /* use our id */ - cb.proc = &pmix_globals.myid; + /* if they are querying about node or app values not directly + * associated with a proc (i.e., they didn't specify the proc), + * then we obtain those by leaving the proc info as undefined */ + if (!rank_given) { + proc.rank = PMIX_RANK_UNDEF; + cb.proc = &proc; } else { - if (0 == strlen(proc.nspace)) { - /* use our nspace */ - PMIX_LOAD_NSPACE(cb.proc->nspace, pmix_globals.myid.nspace); - } - if (PMIX_RANK_INVALID == proc.rank) { - /* user the wildcard rank */ - proc.rank = PMIX_RANK_WILDCARD; + /* set the proc */ + if (PMIX_RANK_INVALID == proc.rank && + 0 == strlen(proc.nspace)) { + /* use our id */ + cb.proc = &pmix_globals.myid; + } else { + if (0 == strlen(proc.nspace)) { + /* use our nspace */ + PMIX_LOAD_NSPACE(cb.proc->nspace, pmix_globals.myid.nspace); + } + if (PMIX_RANK_INVALID == proc.rank) { + /* user the wildcard rank */ + proc.rank = PMIX_RANK_WILDCARD; + } + cb.proc = &proc; } - cb.proc = &proc; } for (p=0; NULL != queries[n].keys[p]; p++) { cb.key = queries[n].keys[p]; diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 574607ec4b0..c667489394c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -883,7 +883,7 @@ static void _notify_client_event(int sd, short args, void *cbdata) } else { /* look up the nspace for this proc */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (PMIX_CHECK_NSPACE(tmp->nspace, cd->targets[n].nspace)) { nptr = tmp; break; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index dc945e7a0c0..e2245901676 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -285,6 +285,7 @@ static void cbcon(pmix_cb_t *p) PMIX_CONSTRUCT(&p->kvs, pmix_list_t); p->copy = false; p->timer_running = false; + p->level = PMIX_LEVEL_UNDEF; } static void cbdes(pmix_cb_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index d0048d69478..a4e94850cf5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -115,6 +115,16 @@ typedef enum { PMIX_COLLECT_MAX } pmix_collect_t; +/* define a set of flags indicating the level + * of information being stored/requested */ +typedef enum { + PMIX_LEVEL_UNDEF, + PMIX_LEVEL_SESSION, + PMIX_LEVEL_JOB, + PMIX_LEVEL_APP, + PMIX_LEVEL_NODE +} pmix_level_t; + /**** PEER STRUCTURES ****/ /* clients can only talk to their server, and servers are @@ -367,6 +377,7 @@ typedef struct { pmix_list_t kvs; bool copy; bool timer_running; + pmix_level_t level; } pmix_cb_t; PMIX_CLASS_DECLARATION(pmix_cb_t); @@ -454,6 +465,7 @@ typedef struct { * interface so that other parts of the process can * look them up */ pmix_gds_base_module_t *mygds; + pmix_list_t nspaces; } pmix_globals_t; /* provide access to a function to cleanup epilogs */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_find.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_find.c index c9cd23740ed..7d96e21c36a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_find.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_find.c @@ -339,8 +339,8 @@ static int component_find_check (pmix_mca_base_framework_t *framework, char **re } if (!found) { - char h[MAXHOSTNAMELEN]; - gethostname(h, sizeof(h)); + char h[PMIX_MAXHOSTNAMELEN] = {0}; + gethostname(h, sizeof(h)-1); pmix_show_help("help-pmix-mca-base.txt", "find-available:not-valid", true, h, framework->framework_name, requested_component_names[i]); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_open.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_open.c index dd9b2cd08ee..fbb55dcb355 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_open.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_open.c @@ -68,7 +68,7 @@ int pmix_mca_base_open(void) { char *value; pmix_output_stream_t lds; - char hostname[64]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; int var_id; int rc; @@ -155,7 +155,7 @@ int pmix_mca_base_open(void) } else { set_defaults(&lds); } - gethostname(hostname, 64); + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); rc = asprintf(&lds.lds_prefix, "[%s:%05d] ", hostname, getpid()); if (0 > rc) { return PMIX_ERR_OUT_OF_RESOURCE; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/v20/copy.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/v20/copy.c index a5c945d96bc..fbdbae3efff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/v20/copy.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/v20/copy.c @@ -431,10 +431,15 @@ pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src) memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t)); break; case PMIX_PROC: - memcpy(&p->data.proc, &src->data.proc, sizeof(pmix_proc_t)); + /* create the storage */ + p->data.proc = (pmix_proc_t*)malloc(sizeof(pmix_proc_t)); + if (NULL == p->data.proc) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.proc, src->data.proc, sizeof(pmix_proc_t)); break; case PMIX_PROC_RANK: - memcpy(&p->data.proc, &src->data.rank, sizeof(pmix_rank_t)); + memcpy(&p->data.rank, &src->data.rank, sizeof(pmix_rank_t)); break; case PMIX_BYTE_OBJECT: case PMIX_COMPRESSED_STRING: @@ -653,7 +658,12 @@ pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src) p1 = (pmix_info_t*)p->data.darray->array; s1 = (pmix_info_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { - PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); + PMIX_LOAD_KEY(p1[n].key, s1[n].key); + rc = pmix_value_xfer(&p1[n].value, &s1[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_INFO_FREE(p1, src->data.darray->size); + return rc; + } } break; case PMIX_PDATA: @@ -664,7 +674,13 @@ pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src) pd = (pmix_pdata_t*)p->data.darray->array; sd = (pmix_pdata_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { - PMIX_PDATA_LOAD(&pd[n], &sd[n].proc, sd[n].key, &sd[n].value.data.flag, sd[n].value.type); + memcpy(&pd[n].proc, &sd[n].proc, sizeof(pmix_proc_t)); + PMIX_LOAD_KEY(pd[n].key, sd[n].key); + rc = pmix_value_xfer(&pd[n].value, &sd[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_INFO_FREE(pd, src->data.darray->size); + return rc; + } } break; case PMIX_BUFFER: diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.c index 4e092fc5a68..652e4d08381 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.c @@ -41,6 +41,7 @@ #include "src/util/error.h" #include "src/util/hash.h" #include "src/util/output.h" +#include "src/util/name_fns.h" #include "src/util/pmix_environ.h" #include "src/mca/preg/preg.h" @@ -115,6 +116,24 @@ pmix_gds_base_module_t pmix_hash_module = { .accept_kvs_resp = accept_kvs_resp }; +/* Define a bitmask to track what information may not have + * been provided but is computable from other info */ +#define PMIX_HASH_PROC_DATA 0x00000001 +#define PMIX_HASH_JOB_SIZE 0x00000002 +#define PMIX_HASH_MAX_PROCS 0x00000004 +#define PMIX_HASH_NUM_NODES 0x00000008 +#define PMIX_HASH_PROC_MAP 0x00000010 +#define PMIX_HASH_NODE_MAP 0x00000020 + +/**********************************************/ +/* struct definitions */ +typedef struct { + pmix_list_item_t super; + uint32_t session; + pmix_list_t sessioninfo; + pmix_list_t nodeinfo; +} pmix_session_t; + typedef struct { pmix_list_item_t super; char *ns; @@ -123,12 +142,49 @@ typedef struct { pmix_hash_table_t remote; pmix_hash_table_t local; bool gdata_added; -} pmix_hash_trkr_t; + pmix_list_t jobinfo; + pmix_list_t apps; + pmix_list_t nodeinfo; + pmix_session_t *session; +} pmix_job_t; + +typedef struct { + pmix_list_item_t super; + uint32_t appnum; + pmix_list_t appinfo; + pmix_list_t nodeinfo; + pmix_job_t *job; +} pmix_apptrkr_t; -static void htcon(pmix_hash_trkr_t *p) +typedef struct { + pmix_list_item_t super; + uint32_t nodeid; + char *hostname; + pmix_list_t info; +} pmix_nodeinfo_t; + +/**********************************************/ +/* class instantiations */ +static void scon(pmix_session_t *s) +{ + s->session = UINT32_MAX; + PMIX_CONSTRUCT(&s->sessioninfo, pmix_list_t); + PMIX_CONSTRUCT(&s->nodeinfo, pmix_list_t); +} +static void sdes(pmix_session_t *s) +{ + PMIX_LIST_DESTRUCT(&s->sessioninfo); + PMIX_LIST_DESTRUCT(&s->nodeinfo); +} +static PMIX_CLASS_INSTANCE(pmix_session_t, + pmix_list_item_t, + scon, sdes); + +static void htcon(pmix_job_t *p) { p->ns = NULL; p->nptr = NULL; + PMIX_CONSTRUCT(&p->jobinfo, pmix_list_t); PMIX_CONSTRUCT(&p->internal, pmix_hash_table_t); pmix_hash_table_init(&p->internal, 256); PMIX_CONSTRUCT(&p->remote, pmix_hash_table_t); @@ -136,8 +192,11 @@ static void htcon(pmix_hash_trkr_t *p) PMIX_CONSTRUCT(&p->local, pmix_hash_table_t); pmix_hash_table_init(&p->local, 256); p->gdata_added = false; + PMIX_CONSTRUCT(&p->apps, pmix_list_t); + PMIX_CONSTRUCT(&p->nodeinfo, pmix_list_t); + p->session = NULL; } -static void htdes(pmix_hash_trkr_t *p) +static void htdes(pmix_job_t *p) { if (NULL != p->ns) { free(p->ns); @@ -145,25 +204,411 @@ static void htdes(pmix_hash_trkr_t *p) if (NULL != p->nptr) { PMIX_RELEASE(p->nptr); } + PMIX_LIST_DESTRUCT(&p->jobinfo); pmix_hash_remove_data(&p->internal, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->internal); pmix_hash_remove_data(&p->remote, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->remote); pmix_hash_remove_data(&p->local, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->local); + PMIX_LIST_DESTRUCT(&p->apps); + PMIX_LIST_DESTRUCT(&p->nodeinfo); + if (NULL != p->session) { + PMIX_RELEASE(p->session); + } } -static PMIX_CLASS_INSTANCE(pmix_hash_trkr_t, +static PMIX_CLASS_INSTANCE(pmix_job_t, pmix_list_item_t, htcon, htdes); -static pmix_list_t myhashes; +static void apcon(pmix_apptrkr_t *p) +{ + p->appnum = 0; + PMIX_CONSTRUCT(&p->appinfo, pmix_list_t); + PMIX_CONSTRUCT(&p->nodeinfo, pmix_list_t); + p->job = NULL; +} +static void apdes(pmix_apptrkr_t *p) +{ + PMIX_LIST_DESTRUCT(&p->appinfo); + PMIX_LIST_DESTRUCT(&p->nodeinfo); + if (NULL != p->job) { + PMIX_RELEASE(p->job); + } +} +static PMIX_CLASS_INSTANCE(pmix_apptrkr_t, + pmix_list_item_t, + apcon, apdes); + +static void ndinfocon(pmix_nodeinfo_t *p) +{ + p->nodeid = 0; + p->hostname = NULL; + PMIX_CONSTRUCT(&p->info, pmix_list_t); +} +static void ndinfodes(pmix_nodeinfo_t *p) +{ + if (NULL != p->hostname) { + free(p->hostname); + } + PMIX_LIST_DESTRUCT(&p->info); +} +static PMIX_CLASS_INSTANCE(pmix_nodeinfo_t, + pmix_list_item_t, + ndinfocon, ndinfodes); + +/**********************************************/ + +/* process a node array - contains an array of + * node-level info for a single node. Either the + * nodeid, hostname, or both must be included + * in the array to identify the node */ +static pmix_status_t process_node_array(pmix_info_t *info, + pmix_list_t *tgt) +{ + size_t size, j; + pmix_info_t *iptr; + pmix_status_t rc = PMIX_SUCCESS; + pmix_kval_t *kp2, *k1, *knext; + pmix_list_t cache; + pmix_nodeinfo_t *nd = NULL, *ndptr; + bool update; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING NODE ARRAY"); + + /* array of node-level info for a specific node */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + + /* setup arrays */ + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + + /* cache the values while searching for the nodeid + * and/or hostname */ + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODEID)) { + if (NULL == nd) { + nd = PMIX_NEW(pmix_nodeinfo_t); + } + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, nd->nodeid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(nd); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_HOSTNAME)) { + if (NULL == nd) { + nd = PMIX_NEW(pmix_nodeinfo_t); + } + nd->hostname = strdup(iptr[j].value.data.string); + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + if (NULL != nd) { + PMIX_RELEASE(nd); + } + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + pmix_list_append(&cache, &kp2->super); + } + } + + if (NULL == nd) { + /* they forgot to pass us the ident for the node */ + PMIX_LIST_DESTRUCT(&cache); + return PMIX_ERR_BAD_PARAM; + } + + /* see if we already have this node on the + * provided list */ + update = false; + PMIX_LIST_FOREACH(ndptr, tgt, pmix_nodeinfo_t) { + if (ndptr->nodeid == nd->nodeid || + (NULL != ndptr->hostname && NULL != nd->hostname && 0 == strcmp(ndptr->hostname, nd->hostname))) { + /* we assume that the data is updating the current + * values */ + if (NULL == ndptr->hostname && NULL != nd->hostname) { + ndptr->hostname = strdup(nd->hostname); + } + PMIX_RELEASE(nd); + nd = ndptr; + update = true; + break; + } + } + + /* transfer the cached items to the nodeinfo list */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + /* if this is an update, we have to ensure each data + * item only appears once on the list */ + if (update) { + PMIX_LIST_FOREACH_SAFE(k1, knext, &nd->info, pmix_kval_t) { + if (PMIX_CHECK_KEY(k1, kp2->key)) { + pmix_list_remove_item(&nd->info, &k1->super); + PMIX_RELEASE(k1); + break; + } + } + } + pmix_list_append(&nd->info, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + PMIX_LIST_DESTRUCT(&cache); + + pmix_list_append(tgt, &nd->super); + return PMIX_SUCCESS; +} + +/* process an app array - contains an array of + * app-level info for a single app. If the + * appnum is not included in the array, then + * it is assumed that only app is in the job. + * This assumption is checked and generates + * an error if violated */ +static pmix_status_t process_app_array(pmix_info_t *info, + pmix_job_t *trk) +{ + pmix_list_t cache, ncache; + size_t size, j; + pmix_info_t *iptr; + pmix_status_t rc = PMIX_SUCCESS; + uint32_t appnum; + pmix_apptrkr_t *app = NULL, *apptr; + pmix_kval_t *kp2, *k1, *knext; + pmix_nodeinfo_t *nd; + bool update; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING APP ARRAY"); + + /* apps have to belong to a job */ + if (NULL == trk) { + return PMIX_ERR_BAD_PARAM; + } + + /* array of app-level info */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + + /* setup arrays and lists */ + PMIX_CONSTRUCT(&cache, pmix_list_t); + PMIX_CONSTRUCT(&ncache, pmix_list_t); + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_APPNUM)) { + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, appnum, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto release; + } + if (NULL != app) { + /* this is an error - there can be only one app + * described in this array */ + PMIX_RELEASE(app); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + return PMIX_ERR_BAD_PARAM; + } + app = PMIX_NEW(pmix_apptrkr_t); + app->appnum = appnum; + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &ncache))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + pmix_list_append(&cache, &kp2->super); + } + } + if (NULL == app) { + /* per the standard, they don't have to provide us with + * an appnum so long as only one app is in the job */ + if (0 == pmix_list_get_size(&trk->apps)) { + app = PMIX_NEW(pmix_apptrkr_t); + } else { + /* this is not allowed to happen - they are required + * to provide us with an app number per the standard */ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto release; + } + } + /* see if we already have this app on the + * provided list */ + update = false; + PMIX_LIST_FOREACH(apptr, &trk->apps, pmix_apptrkr_t) { + if (apptr->appnum == app->appnum) { + /* we assume that the data is updating the current + * values */ + PMIX_RELEASE(app); + app = apptr; + update = true; + break; + } + } + + /* point the app at its job */ + if (NULL == app->job) { + PMIX_RETAIN(trk); + app->job = trk; + } + + /* transfer the app-level data across */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + /* if this is an update, we have to ensure each data + * item only appears once on the list */ + if (update) { + PMIX_LIST_FOREACH_SAFE(k1, knext, &app->appinfo, pmix_kval_t) { + if (PMIX_CHECK_KEY(k1, kp2->key)) { + pmix_list_remove_item(&app->appinfo, &k1->super); + PMIX_RELEASE(k1); + break; + } + } + } + pmix_list_append(&app->appinfo, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + /* transfer the associated node-level data across */ + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + while (NULL != nd) { + pmix_list_append(&app->nodeinfo, &nd->super); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + } + + release: + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + + return rc; +} + +/* process a job array */ +static pmix_status_t process_job_array(pmix_info_t *info, + pmix_job_t *trk, + uint32_t *flags, + char ***procs, + char ***nodes) +{ + pmix_list_t cache; + size_t j, size; + pmix_info_t *iptr; + pmix_kval_t *kp2; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING JOB ARRAY"); + + /* array of job-level info */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_APP_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_app_array(&iptr[j], trk))) { + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &trk->nodeinfo))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_PROC_MAP)) { + /* not allowed to get this more than once */ + if (*flags & PMIX_HASH_PROC_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + /* parse the regex to get the argv array containing proc ranks on each node */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(iptr[j].value.data.string, procs))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* mark that we got the map */ + *flags |= PMIX_HASH_PROC_MAP; + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_MAP)) { + /* not allowed to get this more than once */ + if (*flags & PMIX_HASH_NODE_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + /* store the node map itself since that is + * what v3 uses */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_MAP); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = strdup(iptr[j].value.data.string); + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->internal, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + + /* parse the regex to get the argv array of node names */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_nodes(iptr[j].value.data.string, nodes))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* mark that we got the map */ + *flags |= PMIX_HASH_NODE_MAP; + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(kp2); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + pmix_list_append(&trk->jobinfo, &kp2->super); + } + } + return PMIX_SUCCESS; +} + +static pmix_list_t mysessions, myjobs; static pmix_status_t hash_init(pmix_info_t info[], size_t ninfo) { pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: hash init"); - PMIX_CONSTRUCT(&myhashes, pmix_list_t); + PMIX_CONSTRUCT(&mysessions, pmix_list_t); + PMIX_CONSTRUCT(&myjobs, pmix_list_t); return PMIX_SUCCESS; } @@ -172,7 +617,8 @@ static void hash_finalize(void) pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: hash finalize"); - PMIX_LIST_DESTRUCT(&myhashes); + PMIX_LIST_DESTRUCT(&mysessions); + PMIX_LIST_DESTRUCT(&myjobs); } static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, @@ -201,13 +647,6 @@ static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, return PMIX_SUCCESS; } -/* Define a bitmask to track what information may not have - * been provided but is computable from other info */ -#define PMIX_HASH_PROC_DATA 0x00000001 -#define PMIX_HASH_JOB_SIZE 0x00000002 -#define PMIX_HASH_MAX_PROCS 0x00000004 -#define PMIX_HASH_NUM_NODES 0x00000008 - static pmix_status_t store_map(pmix_hash_table_t *ht, char **nodes, char **ppn, uint32_t flags) @@ -496,16 +935,20 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, pmix_info_t info[], size_t ninfo) { pmix_namespace_t *nptr = (pmix_namespace_t*)ns; - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; + pmix_session_t *s = NULL, *sptr; pmix_hash_table_t *ht; pmix_kval_t *kp2, *kvptr; pmix_info_t *iptr; char **nodes=NULL, **procs=NULL; uint8_t *tmp; + uint32_t sid=UINT32_MAX; pmix_rank_t rank; pmix_status_t rc=PMIX_SUCCESS; size_t n, j, size, len; uint32_t flags = 0; + pmix_list_t cache, ncache; + pmix_nodeinfo_t *nd; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:cache_job_info for nspace %s", @@ -514,7 +957,7 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(nptr->nspace, t->ns)) { trk = t; break; @@ -522,14 +965,14 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, } if (NULL == trk) { /* create a tracker as we will likely need it */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); if (NULL == trk) { return PMIX_ERR_NOMEM; } PMIX_RETAIN(nptr); trk->nptr = nptr; trk->ns = strdup(nptr->nspace); - pmix_list_append(&myhashes, &trk->super); + pmix_list_append(&myjobs, &trk->super); } /* if there isn't any data, then be content with just @@ -541,7 +984,141 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, /* cache the job info on the internal hash table for this nspace */ ht = &trk->internal; for (n=0; n < ninfo; n++) { - if (0 == strcmp(info[n].key, PMIX_NODE_MAP)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_SESSION_ID)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto release; + } + /* see if we have this session */ + s = NULL; + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + s = sptr; + break; + } + } + if (NULL == s) { + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + /* point the job at it */ + if (NULL == trk->session) { + PMIX_RETAIN(s); + trk->session = s; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SESSION_INFO_ARRAY)) { + /* array of session-level info */ + if (PMIX_DATA_ARRAY != info[n].value.type) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + rc = PMIX_ERR_TYPE_MISMATCH; + goto release; + } + size = info[n].value.data.darray->size; + iptr = (pmix_info_t*)info[n].value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + PMIX_CONSTRUCT(&ncache, pmix_list_t); + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_SESSION_ID)) { + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + /* setup a session object */ + if (NULL != s) { + /* does this match the one we were previously given? */ + if (sid != s->session) { + /* no - see if we already have this session */ + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + s = sptr; + break; + } + } + if (sid != s->session) { + /* wasn't found, so create one */ + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + } + } else { + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &ncache))) { + PMIX_ERROR_LOG(rc); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + goto release; + } + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + goto release; + } + pmix_list_append(&cache, &kp2->super); + } + } + if (NULL == s) { + /* this is not allowed to happen - they are required + * to provide us with a session ID per the standard */ + PMIX_LIST_DESTRUCT(&cache); + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto release; + } + /* point the job at it */ + if (NULL == trk->session) { + PMIX_RETAIN(s); + trk->session = s; + } + /* transfer the data across */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + pmix_list_append(&s->sessioninfo, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + PMIX_LIST_DESTRUCT(&cache); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + while (NULL != nd) { + pmix_list_append(&s->nodeinfo, &nd->super); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + } + PMIX_LIST_DESTRUCT(&ncache); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_JOB_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_job_array(&info[n], trk, &flags, &procs, &nodes))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_APP_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_app_array(&info[n], trk))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&info[n], &trk->nodeinfo))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NODE_MAP)) { + /* not allowed to get this more than once */ + if (flags & PMIX_HASH_NODE_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } /* store the node map itself since that is * what v3 uses */ kp2 = PMIX_NEW(pmix_kval_t); @@ -561,12 +1138,21 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, PMIX_ERROR_LOG(rc); goto release; } - } else if (0 == strcmp(info[n].key, PMIX_PROC_MAP)) { + /* mark that we got the map */ + flags |= PMIX_HASH_NODE_MAP; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_PROC_MAP)) { + /* not allowed to get this more than once */ + if (flags & PMIX_HASH_PROC_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } /* parse the regex to get the argv array containing proc ranks on each node */ if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(info[n].value.data.string, &procs))) { PMIX_ERROR_LOG(rc); goto release; } + /* mark that we got the map */ + flags |= PMIX_HASH_PROC_MAP; } else if (0 == strcmp(info[n].key, PMIX_PROC_DATA)) { flags |= PMIX_HASH_PROC_DATA; /* an array of data pertaining to a specific proc */ @@ -723,18 +1309,18 @@ static pmix_status_t register_info(pmix_peer_t *peer, pmix_namespace_t *ns, pmix_buffer_t *reply) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_hash_table_t *ht; pmix_value_t *val, blob; pmix_status_t rc = PMIX_SUCCESS; pmix_info_t *info; size_t ninfo, n; - pmix_kval_t kv; + pmix_kval_t kv, *kvptr; pmix_buffer_t buf; pmix_rank_t rank; trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(ns->nspace, t->ns)) { trk = t; break; @@ -773,6 +1359,12 @@ static pmix_status_t register_info(pmix_peer_t *peer, PMIX_VALUE_RELEASE(val); } + /* add all values in the jobinfo list */ + PMIX_LIST_FOREACH(kvptr, &trk->jobinfo, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, peer, reply, kvptr, 1, PMIX_KVAL); + } + + /* get the proc-level data for each proc in the job */ for (rank=0; rank < ns->nprocs; rank++) { val = NULL; rc = pmix_hash_fetch(ht, rank, NULL, &val); @@ -820,7 +1412,7 @@ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, pmix_namespace_t *ns = peer->nptr; char *msg; pmix_status_t rc; - pmix_hash_trkr_t *trk, *t2; + pmix_job_t *trk, *t2; if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { @@ -857,7 +1449,7 @@ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, /* setup a tracker for this nspace as we will likely * need it again */ trk = NULL; - PMIX_LIST_FOREACH(t2, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t2, &myjobs, pmix_job_t) { if (ns == t2->nptr) { trk = t2; if (NULL == trk->ns) { @@ -867,11 +1459,11 @@ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, } } if (NULL == trk) { - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(ns->nspace); PMIX_RETAIN(ns); trk->nptr = ns; - pmix_list_append(&myhashes, &trk->super); + pmix_list_append(&myjobs, &trk->super); } /* the job info for the specified nspace has @@ -914,10 +1506,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, pmix_byte_object_t *bo; pmix_buffer_t buf2; int rank; - pmix_hash_trkr_t *htptr; + pmix_job_t *htptr; pmix_hash_table_t *ht; char **nodelist = NULL; pmix_info_t *info, *iptr; + pmix_namespace_t *ns, *nptr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%u] pmix:gds:hash store job info for nspace %s", @@ -937,9 +1530,27 @@ static pmix_status_t hash_store_job_info(const char *nspace, return rc; } + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(ns->nspace, nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + return rc; + } + nptr->nspace = strdup(nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + /* see if we already have a hash table for this nspace */ ht = NULL; - PMIX_LIST_FOREACH(htptr, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(htptr, &myjobs, pmix_job_t) { if (0 == strcmp(htptr->ns, nspace)) { ht = &htptr->internal; break; @@ -947,9 +1558,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, } if (NULL == ht) { /* nope - create one */ - htptr = PMIX_NEW(pmix_hash_trkr_t); + htptr = PMIX_NEW(pmix_job_t); htptr->ns = strdup(nspace); - pmix_list_append(&myhashes, &htptr->super); + PMIX_RETAIN(nptr); + htptr->nptr = nptr; + pmix_list_append(&myjobs, &htptr->super); ht = &htptr->internal; } @@ -961,7 +1574,7 @@ static pmix_status_t hash_store_job_info(const char *nspace, pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%u] pmix:gds:hash store job info working key %s", pmix_globals.myid.nspace, pmix_globals.myid.rank, kptr->key); - if (0 == strcmp(kptr->key, PMIX_PROC_BLOB)) { + if (PMIX_CHECK_KEY(kptr, PMIX_PROC_BLOB)) { bo = &(kptr->value->data.bo); PMIX_CONSTRUCT(&buf2, pmix_buffer_t); PMIX_LOAD_BUFFER(pmix_client_globals.myserver, &buf2, bo->bytes, bo->size); @@ -1012,7 +1625,7 @@ static pmix_status_t hash_store_job_info(const char *nspace, /* cleanup */ PMIX_DESTRUCT(&buf2); // releases the original kptr data PMIX_RELEASE(kp2); - } else if (0 == strcmp(kptr->key, PMIX_MAP_BLOB)) { + } else if (PMIX_CHECK_KEY(kptr, PMIX_MAP_BLOB)) { /* transfer the byte object for unpacking */ bo = &(kptr->value->data.bo); PMIX_CONSTRUCT(&buf2, pmix_buffer_t); @@ -1183,6 +1796,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, PMIX_RELEASE(kptr); return rc; } + /* if this is the job size, then store it in + * the nptr tracker */ + if (0 == nptr->nprocs && PMIX_CHECK_KEY(kptr, PMIX_JOB_SIZE)) { + nptr->nprocs = kptr->value->data.uint32; + } } PMIX_RELEASE(kptr); kptr = PMIX_NEW(pmix_kval_t); @@ -1205,14 +1823,15 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, pmix_scope_t scope, pmix_kval_t *kv) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_status_t rc; pmix_kval_t *kp; + pmix_namespace_t *ns, *nptr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, - "[%s:%d] gds:hash:hash_store for proc [%s:%d] key %s type %s scope %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, - proc->nspace, proc->rank, kv->key, + "%s gds:hash:hash_store for proc %s key %s type %s scope %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(proc), kv->key, PMIx_Data_type_string(kv->value->type), PMIx_Scope_string(scope)); if (NULL == kv->key) { @@ -1221,7 +1840,7 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1229,9 +1848,29 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, } if (NULL == trk) { /* create one */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(proc->nspace); - pmix_list_append(&myhashes, &trk->super); + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(ns->nspace, proc->nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + PMIX_RELEASE(trk); + return rc; + } + nptr->nspace = strdup(proc->nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + PMIX_RETAIN(nptr); + trk->nptr = nptr; + pmix_list_append(&myjobs, &trk->super); } /* see if the proc is me */ @@ -1264,6 +1903,11 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, } } + /* if the number of procs for the nspace object is new, then update it */ + if (0 == trk->nptr->nprocs && PMIX_CHECK_KEY(kv, PMIX_JOB_SIZE)) { + trk->nptr->nprocs = kv->value->data.uint32; + } + /* store it in the corresponding hash table */ if (PMIX_INTERNAL == scope) { if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->internal, proc->rank, kv))) { @@ -1332,12 +1976,13 @@ static pmix_status_t _hash_store_modex(void * cbdata, pmix_byte_object_t *bo) { pmix_namespace_t *ns = (pmix_namespace_t*)nspace; - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_status_t rc = PMIX_SUCCESS; int32_t cnt; pmix_buffer_t pbkt; pmix_proc_t proc; pmix_kval_t *kv; + pmix_namespace_t *nptr, *nptr2; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:store_modex for nspace %s", @@ -1346,7 +1991,7 @@ static pmix_status_t _hash_store_modex(void * cbdata, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(ns->nspace, t->ns)) { trk = t; break; @@ -1354,9 +1999,29 @@ static pmix_status_t _hash_store_modex(void * cbdata, } if (NULL == trk) { /* create one */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(ns->nspace); - pmix_list_append(&myhashes, &trk->super); + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(nptr2, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(nptr2->nspace, ns->nspace)) { + nptr = nptr2; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + PMIX_RELEASE(trk); + return rc; + } + nptr->nspace = strdup(ns->nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + PMIX_RETAIN(nptr); + trk->nptr = nptr; + pmix_list_append(&myjobs, &trk->super); } /* this is data returned via the PMIx_Fence call when @@ -1386,14 +2051,20 @@ static pmix_status_t _hash_store_modex(void * cbdata, kv = PMIX_NEW(pmix_kval_t); PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, kv, &cnt, PMIX_KVAL); while (PMIX_SUCCESS == rc) { - /* store this in the hash table */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc.rank, kv))) { - PMIX_ERROR_LOG(rc); - bo->bytes = pbkt.base_ptr; - bo->size = pbkt.bytes_used; // restore the incoming data - pbkt.base_ptr = NULL; - PMIX_DESTRUCT(&pbkt); - return rc; + if (PMIX_RANK_UNDEF == proc.rank) { + /* if the rank is undefined, then we store it on the + * remote table of rank=0 as we know that rank must + * always exist */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, 0, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else { + /* store this in the hash table */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc.rank, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } } PMIX_RELEASE(kv); // maintain accounting as the hash increments the ref count /* continue along */ @@ -1415,25 +2086,257 @@ static pmix_status_t _hash_store_modex(void * cbdata, } +static pmix_status_t dohash(pmix_hash_table_t *ht, + const char *key, + pmix_rank_t rank, + bool skip_genvals, + pmix_list_t *kvs) +{ + pmix_status_t rc; + pmix_value_t *val; + pmix_kval_t *kv, *k2; + pmix_info_t *info; + size_t n, ninfo; + bool found; + + rc = pmix_hash_fetch(ht, rank, key, &val); + if (PMIX_SUCCESS == rc) { + /* if the key was NULL, then all found keys will be + * returned as a pmix_data_array_t in the value */ + if (NULL == key) { + if (NULL == val->data.darray || + PMIX_INFO != val->data.darray->type || + 0 == val->data.darray->size) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + PMIX_RELEASE(val); + return PMIX_ERR_NOT_FOUND; + } + info = (pmix_info_t*)val->data.darray->array; + ninfo = val->data.darray->size; + for (n=0; n < ninfo; n++) { + /* if the rank is UNDEF, then we don't want + * anything that starts with "pmix" */ + if (skip_genvals && + 0 == strncmp(info[n].key, "pmix", 4)) { + continue; + } + /* see if we already have this on the list */ + found = false; + PMIX_LIST_FOREACH(k2, kvs, pmix_kval_t) { + if (PMIX_CHECK_KEY(&info[n], k2->key)) { + found = true; + break; + } + } + if (found) { + continue; + } + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(info[n].key); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_VALUE_RELEASE(val); + PMIX_RELEASE(kv); + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + kv->value, &info[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_VALUE_RELEASE(val); + PMIX_RELEASE(kv); + return rc; + } + pmix_list_append(kvs, &kv->super); + } + PMIX_VALUE_RELEASE(val); + } else { + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(key); + kv->value = val; + pmix_list_append(kvs, &kv->super); + } + } + return rc; +} + +static pmix_status_t fetch_nodeinfo(const char *key, pmix_list_t *tgt, + pmix_info_t *info, size_t ninfo, + pmix_list_t *kvs) +{ + size_t n; + pmix_status_t rc; + uint32_t nid=0; + char *hostname = NULL; + bool found = false; + pmix_nodeinfo_t *nd, *ndptr; + pmix_kval_t *kv, *kp2; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "FETCHING NODE INFO"); + + /* scan for the nodeID or hostname to identify + * which node they are asking about */ + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&info[n], PMIX_NODEID)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, nid, uint32_t); + if (PMIX_SUCCESS != rc) { + return rc; + } + found = true; + break; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_HOSTNAME)) { + hostname = info[n].value.data.string; + found = true; + break; + } + } + if (!found) { + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + } + + /* scan the list of nodes to find the matching entry */ + nd = NULL; + PMIX_LIST_FOREACH(ndptr, tgt, pmix_nodeinfo_t) { + if (NULL != hostname && 0 == strcmp(ndptr->hostname, hostname)) { + nd = ndptr; + break; + } + if (NULL == hostname && nid == ndptr->nodeid) { + nd = ndptr; + break; + } + } + if (NULL == nd) { + return PMIX_ERR_NOT_FOUND; + } + /* scan the info list of this node to generate the results */ + rc = PMIX_ERR_NOT_FOUND; + PMIX_LIST_FOREACH(kv, &nd->info, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kv, key)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(kv->key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + pmix_list_append(kvs, &kp2->super); + rc = PMIX_SUCCESS; + if (NULL != key) { + break; + } + } + } + + return rc; +} + +static pmix_status_t fetch_appinfo(const char *key, pmix_list_t *tgt, + pmix_info_t *info, size_t ninfo, + pmix_list_t *kvs) +{ + size_t n; + pmix_status_t rc; + uint32_t appnum; + bool found = false; + pmix_apptrkr_t *app, *apptr; + pmix_kval_t *kv, *kp2; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "FETCHING APP INFO"); + + /* scan for the appnum to identify + * which app they are asking about */ + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&info[n], PMIX_APPNUM)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, appnum, uint32_t); + if (PMIX_SUCCESS != rc) { + return rc; + } + found = true; + break; + } + } + if (!found) { + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + } + + /* scan the list of apps to find the matching entry */ + app = NULL; + PMIX_LIST_FOREACH(apptr, tgt, pmix_apptrkr_t) { + if (appnum == apptr->appnum) { + app = apptr; + break; + } + } + if (NULL == app) { + return PMIX_ERR_NOT_FOUND; + } + + /* see if they wanted to know something about a node that + * is associated with this app */ + rc = fetch_nodeinfo(key, &app->nodeinfo, info, ninfo, kvs); + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + + /* scan the info list of this app to generate the results */ + rc = PMIX_ERR_NOT_FOUND; + PMIX_LIST_FOREACH(kv, &app->appinfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kv, key)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(kv->key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + pmix_list_append(kvs, &kp2->super); + rc = PMIX_SUCCESS; + if (NULL != key) { + break; + } + } + } + + return rc; +} + static pmix_status_t hash_fetch(const pmix_proc_t *proc, pmix_scope_t scope, bool copy, const char *key, pmix_info_t qualifiers[], size_t nqual, pmix_list_t *kvs) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_status_t rc; pmix_value_t *val; - pmix_kval_t *kv; + pmix_kval_t *kv, *kvptr; pmix_info_t *info; size_t n, ninfo; pmix_hash_table_t *ht; + pmix_session_t *sptr; + uint32_t sid; + pmix_rank_t rnk; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, - "[%s:%u] pmix:gds:hash fetch %s for proc %s:%u on scope %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, + "[%s] pmix:gds:hash fetch %s for proc %s on scope %s", + PMIX_NAME_PRINT(&pmix_globals.myid), (NULL == key) ? "NULL" : key, - proc->nspace, proc->rank, PMIx_Scope_string(scope)); + PMIX_NAME_PRINT(proc), PMIx_Scope_string(scope)); /* if the rank is wildcard and the key is NULL, then * they are asking for a complete copy of the job-level @@ -1442,7 +2345,7 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, /* see if we have a tracker for this nspace - we will * if we already cached the job info for it */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1497,9 +2400,58 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, return PMIX_SUCCESS; } + /* if the nspace and rank are undefined, then they are asking + * for session-level information. */ + if (0 == strlen(proc->nspace) && PMIX_RANK_UNDEF == proc->rank) { + /* they must have included something identifying the info + * class they are querying */ + for (n=0; n < nqual; n++) { + if (PMIX_CHECK_KEY(&qualifiers[n], PMIX_SESSION_ID)) { + /* they want session-level info - see if we have + * that session */ + PMIX_VALUE_GET_NUMBER(rc, &qualifiers[n].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + /* didn't provide a correct value */ + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + /* see if they want info for a specific node */ + rc = fetch_nodeinfo(key, &sptr->nodeinfo, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + /* check the session info */ + PMIX_LIST_FOREACH(kvptr, &sptr->sessioninfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kvptr, key)) { + kv = PMIX_NEW(pmix_kval_t); + kv->key = strdup(kvptr->key); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kv->value, kvptr->value); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(kv); + return rc; + } + pmix_list_append(kvs, &kv->super); + if (NULL != key) { + /* we are done */ + return PMIX_SUCCESS; + } + } + } + } + } + /* if we get here, then the session wasn't found */ + return PMIX_ERR_NOT_FOUND; + } + } + } + /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1509,6 +2461,24 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, return PMIX_ERR_INVALID_NAMESPACE; } + /* if the rank isn't specified, check to see if they + * are looking for app-level or node-level info for + * this job */ + if (PMIX_RANK_UNDEF == proc->rank) { + /* see if they want info for a specific node */ + rc = fetch_nodeinfo(key, &trk->nodeinfo, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + /* see if they want info for a specific app */ + rc = fetch_appinfo(key, &trk->apps, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + } + /* fetch from the corresponding hash table - note that * we always provide a copy as we don't support * shared memory */ @@ -1528,59 +2498,56 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, } doover: - rc = pmix_hash_fetch(ht, proc->rank, key, &val); - if (PMIX_SUCCESS == rc) { - /* if the key was NULL, then all found keys will be - * returned as a pmix_data_array_t in the value */ - if (NULL == key) { - if (NULL == val->data.darray || - PMIX_INFO != val->data.darray->type || - 0 == val->data.darray->size) { - PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); - return PMIX_ERR_NOT_FOUND; + /* if rank=PMIX_RANK_UNDEF, then we need to search all + * known ranks for this nspace as any one of them could + * be the source */ + if (PMIX_RANK_UNDEF == proc->rank) { + for (rnk=0; rnk < trk->nptr->nprocs; rnk++) { + rc = dohash(ht, key, rnk, true, kvs); + if (PMIX_ERR_NOMEM == rc) { + return rc; } - info = (pmix_info_t*)val->data.darray->array; - ninfo = val->data.darray->size; - for (n=0; n < ninfo; n++) { + if (PMIX_SUCCESS == rc && NULL != key) { + return rc; + } + } + /* also need to check any job-level info */ + PMIX_LIST_FOREACH(kvptr, &trk->jobinfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kvptr, key)) { kv = PMIX_NEW(pmix_kval_t); - if (NULL == kv) { - PMIX_VALUE_RELEASE(val); - return PMIX_ERR_NOMEM; - } - kv->key = strdup(info[n].key); + kv->key = strdup(kvptr->key); kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - if (NULL == kv->value) { - PMIX_VALUE_RELEASE(val); - PMIX_RELEASE(kv); - return PMIX_ERR_NOMEM; - } - PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, - kv->value, &info[n].value); + PMIX_VALUE_XFER(rc, kv->value, kvptr->value); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_VALUE_RELEASE(val); PMIX_RELEASE(kv); return rc; } pmix_list_append(kvs, &kv->super); + if (NULL != key) { + break; + } } - PMIX_VALUE_RELEASE(val); - if (PMIX_GLOBAL == scope && ht == &trk->local) { + } + if (NULL == key) { + /* and need to add all job info just in case that was + * passed via a different GDS component */ + dohash(&trk->internal, NULL, PMIX_RANK_WILDCARD, false, kvs); + } + } else { + rc = dohash(ht, key, proc->rank, false, kvs); + } + if (PMIX_SUCCESS == rc) { + if (PMIX_GLOBAL == scope) { + if (ht == &trk->local) { /* need to do this again for the remote data */ ht = &trk->remote; goto doover; + } else if (ht == &trk->internal) { + /* check local */ + ht = &trk->local; + goto doover; } - return PMIX_SUCCESS; } - /* just return the value */ - kv = PMIX_NEW(pmix_kval_t); - if (NULL == kv) { - PMIX_VALUE_RELEASE(val); - return PMIX_ERR_NOMEM; - } - kv->key = strdup(key); - kv->value = val; - pmix_list_append(kvs, &kv->super); } else { if (PMIX_GLOBAL == scope || PMIX_SCOPE_UNDEF == scope) { @@ -1595,6 +2562,9 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, } } } + if (0 == pmix_list_get_size(kvs)) { + rc = PMIX_ERR_NOT_FOUND; + } return rc; } @@ -1615,13 +2585,13 @@ static pmix_status_t nspace_add(const char *nspace, static pmix_status_t nspace_del(const char *nspace) { - pmix_hash_trkr_t *t; + pmix_job_t *t; /* find the hash table for this nspace */ - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(nspace, t->ns)) { /* release it */ - pmix_list_remove_item(&myhashes, &t->super); + pmix_list_remove_item(&myjobs, &t->super); PMIX_RELEASE(t); break; } @@ -1685,6 +2655,12 @@ static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf) PMIX_ERROR_LOG(rc); return rc; } + /* if the rank is UNDEF, then we store this on our own + * rank tables */ + if (PMIX_RANK_UNDEF == proct.rank) { + proct.rank = pmix_globals.myid.rank; + } + cnt = 1; kv = PMIX_NEW(pmix_kval_t); PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, @@ -1694,7 +2670,6 @@ static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf) * the kval contains shmem connection info, then the * component will know what to do about it (or else * we selected the wrong component for this peer!) */ - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, &proct, PMIX_INTERNAL, kv); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c index b5b49e035b2..1d48b462770 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c @@ -158,7 +158,7 @@ static int if_bsdx_open(void) /* fill values into the pmix_pif_t */ memcpy(&a4, &(sin_addr->sin_addr), sizeof(struct in_addr)); - pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE-1); + pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list) + 1; ((struct sockaddr_in*) &intf->if_addr)->sin_addr = a4; ((struct sockaddr_in*) &intf->if_addr)->sin_family = AF_INET; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c index 699984369d1..ff30d73500b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c @@ -183,7 +183,7 @@ static int if_bsdx_ipv6_open(void) return PMIX_ERR_OUT_OF_RESOURCE; } intf->af_family = AF_INET6; - pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE-1); + pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list) + 1; ((struct sockaddr_in6*) &intf->if_addr)->sin6_addr = a6; ((struct sockaddr_in6*) &intf->if_addr)->sin6_family = AF_INET6; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c index ec5261fbda4..53bec6fb04a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c @@ -82,12 +82,15 @@ static int if_linux_ipv6_open(void) if ((f = fopen("/proc/net/if_inet6", "r"))) { /* IF_NAMESIZE is normally 16 on Linux, but the next scanf allows up to 21 bytes */ - char ifname[21]; + char ifname[PMIX_IF_NAMESIZE]; unsigned int idx, pfxlen, scope, dadstat; struct in6_addr a6; int iter; uint32_t flag; - unsigned int addrbyte[16]; + unsigned int addrbyte[PMIX_IF_NAMESIZE]; + + memset(addrbyte, 0, PMIX_IF_NAMESIZE*sizeof(unsigned int)); + memset(ifname, 0, PMIX_IF_NAMESIZE*sizeof(char)); while (fscanf(f, "%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x %x %x %x %x %20s\n", &addrbyte[0], &addrbyte[1], &addrbyte[2], &addrbyte[3], @@ -104,8 +107,8 @@ static int if_linux_ipv6_open(void) addrbyte[8], addrbyte[9], addrbyte[10], addrbyte[11], addrbyte[12], addrbyte[13], addrbyte[14], addrbyte[15], scope); - /* we don't want any other scope less than link-local */ - if (scope < 0x20) { + /* Only interested in global (0x00) scope */ + if (scope != 0x00) { pmix_output_verbose(1, pmix_pif_base_framework.framework_output, "skipping interface %2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x scope %x\n", addrbyte[0], addrbyte[1], addrbyte[2], addrbyte[3], @@ -129,7 +132,7 @@ static int if_linux_ipv6_open(void) } /* now construct the pmix_pif_t */ - pmix_strncpy(intf->if_name, ifname, IF_NAMESIZE-1); + pmix_strncpy(intf->if_name, ifname, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list)+1; intf->if_kernel_index = (uint16_t) idx; ((struct sockaddr_in6*) &intf->if_addr)->sin6_addr = a6; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h index 1582122122f..9d23fdf1ff5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h @@ -73,7 +73,7 @@ BEGIN_C_DECLS typedef struct pmix_pif_t { pmix_list_item_t super; - char if_name[IF_NAMESIZE+1]; + char if_name[PMIX_IF_NAMESIZE+1]; int if_index; uint16_t if_kernel_index; uint16_t af_family; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c index 6872b5c0c2d..c9895cb6617 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c @@ -190,7 +190,7 @@ static int if_solaris_ipv6_open(void) } intf->af_family = AF_INET6; - pmix_strncpy (intf->if_name, lifreq->lifr_name, IF_NAMESIZE-1); + pmix_strncpy (intf->if_name, lifreq->lifr_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list)+1; memcpy(&intf->if_addr, my_addr, sizeof (*my_addr)); intf->if_mask = 64; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/Makefile.am deleted file mode 100644 index fe01cde836e..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/Makefile.am +++ /dev/null @@ -1,59 +0,0 @@ -# -*- makefile -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. -# Copyright (c) 2017 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(pnet_opa_CPPFLAGS) - -headers = pnet_opa.h -sources = \ - pnet_opa_component.c \ - pnet_opa.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_pmix_pnet_opa_DSO -lib = -lib_sources = -component = mca_pnet_opa.la -component_sources = $(headers) $(sources) -else -lib = libmca_pnet_opa.la -lib_sources = $(headers) $(sources) -component = -component_sources = -endif - -mcacomponentdir = $(pmixlibdir) -mcacomponent_LTLIBRARIES = $(component) -mca_pnet_opa_la_SOURCES = $(component_sources) -mca_pnet_opa_la_LIBADD = $(pnet_opa_LIBS) -mca_pnet_opa_la_LDFLAGS = -module -avoid-version $(pnet_opa_LDFLAGS) -if NEED_LIBPMIX -mca_pnet_opa_la_LIBADD += $(top_builddir)/src/libpmix.la -endif - -noinst_LTLIBRARIES = $(lib) -libmca_pnet_opa_la_SOURCES = $(lib_sources) -libmca_pnet_opa_la_LIBADD = $(pnet_opa_LIBS) -libmca_pnet_opa_la_LDFLAGS = -module -avoid-version $(pnet_opa_LDFLAGS) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/configure.m4 deleted file mode 100644 index 2273fabe47e..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/configure.m4 +++ /dev/null @@ -1,110 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Sandia National Laboratories. All rights reserved. -# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_pnet_opa_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_pmix_pnet_opa_CONFIG],[ - AC_CONFIG_FILES([src/mca/pnet/opa/Makefile]) - - PMIX_CHECK_PSM2([pnet_opa], - [pnet_opa_happy="yes"], - [pnet_opa_happy="no"]) - - AC_ARG_WITH([opamgt], - [AC_HELP_STRING([--with-opamgt(=DIR)], - [Build OmniPath Fabric Management support (optionally adding DIR/include, DIR/include/opamgt, DIR/lib, and DIR/lib64 to the search path for headers and libraries])], [], [with_opamgt=no]) - - AC_ARG_WITH([opamgt-libdir], - [AC_HELP_STRING([--with-opamgt-libdir=DIR], - [Search for OmniPath Fabric Management libraries in DIR])]) - - pmix_check_opamgt_save_CPPFLAGS="$CPPFLAGS" - pmix_check_opamgt_save_LDFLAGS="$LDFLAGS" - pmix_check_opamgt_save_LIBS="$LIBS" - - pmix_check_opamgt_libdir= - pmix_check_opamgt_dir= - - AC_MSG_CHECKING([if opamgt requested]) - AS_IF([test "$with_opamgt" == "no"], - [AC_MSG_RESULT([no]) - pmix_check_opamgt_happy=no], - [AC_MSG_RESULT([yes]) - PMIX_CHECK_WITHDIR([opamgt-libdir], [$with_opamgt_libdir], [libopamgt.*]) - AS_IF([test ! -z "$with_opamgt" && test "$with_opamgt" != "yes"], - [pmix_check_opamgt_dir="$with_opamgt" - AS_IF([test ! -d "$pmix_check_opamgt_dir" || test ! -f "$pmix_check_opamgt_dir/opamgt.h"], - [$pmix_check_opamgt_dir=$pmix_check_opamgt_dir/include - AS_IF([test ! -d "$pmix_check_opamgt_dir" || test ! -f "$pmix_check_opamgt_dir/opamgt.h"], - [$pmix_check_opamgt_dir=$pmix_check_opamgt_dir/opamgt - AS_IF([test ! -d "$pmix_check_opamgt_dir" || test ! -f "$pmix_check_opamgt_dir/opamgt.h"], - [AC_MSG_WARN([OmniPath Fabric Management support requested, but]) - AC_MSG_WARN([required header file opamgt.h not found. Locations tested:]) - AC_MSG_WARN([ $with_opamgt]) - AC_MSG_WARN([ $with_opamgt/include]) - AC_MSG_WARN([ $with_opamgt/include/opamgt]) - AC_MSG_ERROR([Cannot continue])])])])], - [pmix_check_opamgt_dir="/usr/include/opamgt"]) - - AS_IF([test ! -z "$with_opamgt_libdir" && test "$with_opamgt_libdir" != "yes"], - [pmix_check_opamgt_libdir="$with_opamgt_libdir"]) - - # no easy way to check this, so let's ensure that the - # full opamgt install was done, including the iba support - AS_IF([test ! -d "$pmix_check_opamgt_dir/iba" || test ! -f "$pmix_check_opamgt_dir/iba/vpi.h"], - [pmix_check_opamgt_happy="no"], - [PMIX_CHECK_PACKAGE([pnet_opamgt], - [opamgt.h], - [opamgt], - [omgt_query_sa], - [], - [$pmix_check_opamgt_dir], - [$pmix_check_opamgt_libdir], - [pmix_check_opamgt_happy="yes" - pnet_opa_CFLAGS="$pnet_opa_CFLAGS $pnet_opamgt_CFLAGS" - pnet_opa_CPPFLAGS="$pnet_opa_CPPFLAGS $pnet_opamgt_CPPFLAGS" - pnet_opa_LDFLAGS="$pnet_opa_LDFLAGS $pnet_opamgt_LDFLAGS" - pnet_opa_LIBS="$pnet_opa_LIBS $pnet_opamgt_LIBS"], - [pmix_check_opamgt_happy="no"])]) - ]) - - AS_IF([test "$pmix_check_opamgt_happy" = "yes"], - [pmix_want_opamgt=1], - [pmix_want_opamgt=0]) - AC_DEFINE_UNQUOTED([PMIX_WANT_OPAMGT], [$pmix_want_opamgt], - [Whether or not to include OmniPath Fabric Manager support]) - - CPPFLAGS="$pmix_check_opamgt_save_CPPFLAGS" - LDFLAGS="$pmix_check_opamgt_save_LDFLAGS" - LIBS="$pmix_check_opamgt_save_LIBS" - - AS_IF([test "$pnet_opa_happy" = "yes"], - [$1], - [$2]) - - # substitute in the things needed to build psm2 - AC_SUBST([pnet_opa_CFLAGS]) - AC_SUBST([pnet_opa_CPPFLAGS]) - AC_SUBST([pnet_opa_LDFLAGS]) - AC_SUBST([pnet_opa_LIBS]) -])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.c deleted file mode 100644 index 0684d3b86d1..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif -#include - -#include - -#include "src/mca/base/pmix_mca_base_var.h" -#include "src/include/pmix_socket_errno.h" -#include "src/include/pmix_globals.h" -#include "src/class/pmix_list.h" -#include "src/util/alfg.h" -#include "src/util/argv.h" -#include "src/util/error.h" -#include "src/util/output.h" -#include "src/util/pmix_environ.h" - -#include "src/mca/pnet/pnet.h" -#include "pnet_opa.h" - -static pmix_status_t opa_init(void); -static void opa_finalize(void); -static pmix_status_t setup_app(char *nspace, pmix_list_t *ilist); -static pmix_status_t setup_local_network(char *nspace, - pmix_info_t info[], - size_t ninfo); -static pmix_status_t setup_fork(const pmix_proc_t *peer, char ***env); -static void child_finalized(pmix_peer_t *peer); -static void local_app_finalized(char *nspace); - -pmix_pnet_module_t pmix_opa_module = { - .init = opa_init, - .finalize = opa_finalize, - .setup_app = setup_app, - .setup_local_network = setup_local_network, - .setup_fork = setup_fork, - .child_finalized = child_finalized, - .local_app_finalized = local_app_finalized -}; - -static pmix_status_t opa_init(void) -{ - pmix_output_verbose(2, pmix_globals.debug_output, - "pnet: opa init"); - return PMIX_SUCCESS; -} - -static void opa_finalize(void) -{ - pmix_output_verbose(2, pmix_globals.debug_output, - "pnet: opa finalize"); -} - -/* some network transports require a little bit of information to - * "pre-condition" them - i.e., to setup their individual transport - * connections so they can generate their endpoint addresses. This - * function provides a means for doing so. The resulting info is placed - * into the app_context's env array so it will automatically be pushed - * into the environment of every MPI process when launched. - */ - -static inline void transports_use_rand(uint64_t* unique_key) { - pmix_rng_buff_t rng; - pmix_srand(&rng,(unsigned int)time(NULL)); - unique_key[0] = pmix_rand(&rng); - unique_key[1] = pmix_rand(&rng); -} - -static char* transports_print(uint64_t *unique_key) -{ - unsigned int *int_ptr; - size_t i, j, string_key_len, written_len; - char *string_key = NULL, *format = NULL; - - /* string is two 64 bit numbers printed in hex with a dash between - * and zero padding. - */ - string_key_len = (sizeof(uint64_t) * 2) * 2 + strlen("-") + 1; - string_key = (char*) malloc(string_key_len); - if (NULL == string_key) { - return NULL; - } - - string_key[0] = '\0'; - written_len = 0; - - /* get a format string based on the length of an unsigned int. We - * want to have zero padding for sizeof(unsigned int) * 2 - * characters -- when printing as a hex number, each byte is - * represented by 2 hex characters. Format will contain something - * that looks like %08lx, where the number 8 might be a different - * number if the system has a different sized long (8 would be for - * sizeof(int) == 4)). - */ - if (0 > asprintf(&format, "%%0%dx", (int)(sizeof(unsigned int)) * 2)) { - return NULL; - } - - /* print the first number */ - int_ptr = (unsigned int*) &unique_key[0]; - for (i = 0 ; i < sizeof(uint64_t) / sizeof(unsigned int) ; ++i) { - if (0 == int_ptr[i]) { - /* inject some energy */ - for (j=0; j < sizeof(unsigned int); j++) { - int_ptr[i] |= j << j; - } - } - snprintf(string_key + written_len, - string_key_len - written_len, - format, int_ptr[i]); - written_len = strlen(string_key); - } - - /* print the middle dash */ - snprintf(string_key + written_len, string_key_len - written_len, "-"); - written_len = strlen(string_key); - - /* print the second number */ - int_ptr = (unsigned int*) &unique_key[1]; - for (i = 0 ; i < sizeof(uint64_t) / sizeof(unsigned int) ; ++i) { - if (0 == int_ptr[i]) { - /* inject some energy */ - for (j=0; j < sizeof(unsigned int); j++) { - int_ptr[i] |= j << j; - } - } - snprintf(string_key + written_len, - string_key_len - written_len, - format, int_ptr[i]); - written_len = strlen(string_key); - } - free(format); - - return string_key; -} - -static pmix_status_t setup_app(char *nspace, pmix_list_t *ilist) -{ - uint64_t unique_key[2]; - char *string_key, *cs_env; - int fd_rand; - size_t bytes_read; - pmix_kval_t *kv; - - /* put the number here - or else create an appropriate string. this just needs to - * eventually be a string variable - */ - if(-1 == (fd_rand = open("/dev/urandom", O_RDONLY))) { - transports_use_rand(unique_key); - } else { - bytes_read = read(fd_rand, (char *) unique_key, 16); - if(bytes_read != 16) { - transports_use_rand(unique_key); - } - close(fd_rand); - } - - if (NULL == (string_key = transports_print(unique_key))) { - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - return PMIX_ERR_OUT_OF_RESOURCE; - } - - if (PMIX_SUCCESS != pmix_mca_base_var_env_name("pmix_precondition_transports", &cs_env)) { - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - free(string_key); - return PMIX_ERR_OUT_OF_RESOURCE; - } - - kv = PMIX_NEW(pmix_kval_t); - if (NULL == kv) { - free(string_key); - free(cs_env); - return PMIX_ERR_OUT_OF_RESOURCE; - } - kv->key = strdup(PMIX_SET_ENVAR); - kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - if (NULL == kv->value) { - free(string_key); - free(cs_env); - PMIX_RELEASE(kv); - return PMIX_ERR_OUT_OF_RESOURCE; - } - kv->value->type = PMIX_STRING; - if (0 > asprintf(&kv->value->data.string, "%s=%s", cs_env, string_key)) { - free(string_key); - free(cs_env); - PMIX_RELEASE(kv); - return PMIX_ERR_OUT_OF_RESOURCE; - } - pmix_list_append(ilist, &kv->super); - free(cs_env); - free(string_key); - - return PMIX_SUCCESS; -} - -static pmix_status_t setup_local_network(char *nspace, - pmix_info_t info[], - size_t ninfo) -{ - return PMIX_SUCCESS; -} - -static pmix_status_t setup_fork(const pmix_proc_t *peer, char ***env) -{ - return PMIX_SUCCESS; -} - -static void child_finalized(pmix_peer_t *peer) -{ - -} - -static void local_app_finalized(char *nspace) -{ - -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h deleted file mode 100644 index 0ca3ba5e39b..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef PMIX_PNET_OPA_H -#define PMIX_PNET_OPA_H - -#include - - -#include "src/mca/pnet/pnet.h" - -BEGIN_C_DECLS - -/* the component must be visible data for the linker to find it */ -PMIX_EXPORT extern pmix_pnet_base_component_t mca_pnet_opa_component; -extern pmix_pnet_module_t pmix_opa_module; - -END_C_DECLS - -#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa_component.c deleted file mode 100644 index 92dec15ca6b..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa_component.c +++ /dev/null @@ -1,84 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include -#include "pmix_common.h" - - -#include "src/mca/pnet/pnet.h" -#include "pnet_opa.h" - -static pmix_status_t component_open(void); -static pmix_status_t component_close(void); -static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ -pmix_pnet_base_component_t mca_pnet_opa_component = { - .base = { - PMIX_PNET_BASE_VERSION_1_0_0, - - /* Component name and version */ - .pmix_mca_component_name = "opa", - PMIX_MCA_BASE_MAKE_VERSION(component, - PMIX_MAJOR_VERSION, - PMIX_MINOR_VERSION, - PMIX_RELEASE_VERSION), - - /* Component open and close functions */ - .pmix_mca_open_component = component_open, - .pmix_mca_close_component = component_close, - .pmix_mca_query_component = component_query, - }, - .data = { - /* The component is checkpoint ready */ - PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT - } -}; - - -static int component_open(void) -{ - return PMIX_SUCCESS; -} - - -static int component_query(pmix_mca_base_module_t **module, int *priority) -{ - *priority = 10; - *module = (pmix_mca_base_module_t *)&pmix_opa_module; - return PMIX_SUCCESS; -} - - -static int component_close(void) -{ - return PMIX_SUCCESS; -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c index 587ce5ab2cc..6529c1fa4a8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c @@ -82,9 +82,9 @@ static int _mmap_segment_create(pmix_pshmem_seg_t *sm_seg, const char *file_name if (ENOSPC == rc) { rc = PMIX_ERR_OUT_OF_RESOURCE; goto out; - } else if ((ENOTSUP != rc) + } else if (EINVAL != rc && ENOTSUP != rc #ifdef EOPNOTSUPP - && (EOPNOTSUPP != rc) + && EOPNOTSUPP != rc #endif ){ rc = PMIX_ERROR; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c index 04e9eaf87a0..4b9d75057f7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -131,7 +131,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, char *p, *p2, *server_nspace = NULL, *rendfile = NULL; int sd, rc; size_t n; - char myhost[PMIX_MAXHOSTNAMELEN]; + char myhost[PMIX_MAXHOSTNAMELEN] = {0}; bool system_level = false; bool system_level_only = false; pid_t pid = 0, mypid; @@ -305,7 +305,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, /* mark that we are using the V2 protocol */ pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2; - gethostname(myhost, sizeof(myhost)); + gethostname(myhost, sizeof(myhost)-1); /* if we were given a URI via MCA param, then look no further */ if (NULL != suri) { if (NULL != server_nspace) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index 9379a4fd652..d3f32749831 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -244,6 +244,7 @@ static int component_register(void) } static char *urifile = NULL; +static bool created_rendezvous_file = false; static pmix_status_t component_open(void) { @@ -301,7 +302,9 @@ pmix_status_t component_close(void) free(mca_ptl_tcp_component.pid_filename); } if (NULL != mca_ptl_tcp_component.rendezvous_filename) { - unlink(mca_ptl_tcp_component.rendezvous_filename); + if (created_rendezvous_file) { + unlink(mca_ptl_tcp_component.rendezvous_filename); + } free(mca_ptl_tcp_component.rendezvous_filename); } if (NULL != urifile) { @@ -357,8 +360,8 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, bool session_tool = false; bool system_tool = false; pmix_socklen_t addrlen; - char *prefix, myhost[PMIX_MAXHOSTNAMELEN]; - char myconnhost[PMIX_MAXHOSTNAMELEN]; + char *prefix, myhost[PMIX_MAXHOSTNAMELEN] = {0}; + char myconnhost[PMIX_MAXHOSTNAMELEN] = {0}; int myport; pmix_kval_t *urikv; @@ -618,17 +621,17 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, goto sockerror; } - gethostname(myhost, sizeof(myhost)); + gethostname(myhost, sizeof(myhost)-1); if (AF_INET == mca_ptl_tcp_component.connection.ss_family) { prefix = "tcp4://"; myport = ntohs(((struct sockaddr_in*) &mca_ptl_tcp_component.connection)->sin_port); inet_ntop(AF_INET, &((struct sockaddr_in*) &mca_ptl_tcp_component.connection)->sin_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else if (AF_INET6 == mca_ptl_tcp_component.connection.ss_family) { prefix = "tcp6://"; myport = ntohs(((struct sockaddr_in6*) &mca_ptl_tcp_component.connection)->sin6_port); inet_ntop(AF_INET6, &((struct sockaddr_in6*) &mca_ptl_tcp_component.connection)->sin6_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else { goto sockerror; } @@ -708,6 +711,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, mca_ptl_tcp_component.rendezvous_filename = NULL; goto sockerror; } + created_rendezvous_file = true; } /* if we are going to support tools, then drop contact file(s) */ @@ -873,7 +877,7 @@ static char **split_and_resolve(char **orig_str, char *name) { int i, ret, save, if_index; char **argv, *str, *tmp; - char if_name[IF_NAMESIZE]; + char if_name[PMIX_IF_NAMESIZE]; struct sockaddr_storage argv_inaddr, if_inaddr; uint32_t argv_prefix; @@ -1322,7 +1326,7 @@ static void connection_handler(int sd, short args, void *cbdata) * of local clients. So let's start by searching for * the nspace object */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; @@ -1460,7 +1464,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we know this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; @@ -1759,7 +1763,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) if (5 != pnd->flag) { PMIX_RETAIN(nptr); nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); info = PMIX_NEW(pmix_rank_info_t); info->pname.nspace = strdup(nptr->nspace); info->pname.rank = cd->proc.rank; @@ -1787,7 +1791,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) peer->nptr->compat.psec = pmix_psec_base_assign_module(pnd->psec); if (NULL == peer->nptr->compat.psec) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1802,7 +1806,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) PMIX_INFO_DESTRUCT(&ginfo); if (NULL == peer->nptr->compat.gds) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1839,7 +1843,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) "validation of tool credentials failed: %s", PMIx_Error_string(rc)); PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1852,7 +1856,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); PMIX_RELEASE(cd); PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object /* probably cannot send an error reply if we are out of memory */ return; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c index 6b9fb03b5d1..f65d49ffde7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -560,7 +560,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we know this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c index ebd653e72d3..4613fe2d768 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c @@ -13,8 +13,8 @@ * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2016-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -111,6 +111,8 @@ void pmix_rte_finalize(void) } } PMIX_DESTRUCT(&pmix_globals.notifications); + free(pmix_globals.hostname); + PMIX_LIST_DESTRUCT(&pmix_globals.nspaces); /* now safe to release the event base */ if (!pmix_globals.external_evbase) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c index 35bdf5277e5..9fe7bfeeb2a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c @@ -97,7 +97,7 @@ int pmix_rte_init(pmix_proc_type_t type, int ret, debug_level; char *error = NULL, *evar; size_t n; - char hostname[PMIX_MAXHOSTNAMELEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; if( ++pmix_initialized != 1 ) { if( pmix_initialized < 1 ) { @@ -158,7 +158,7 @@ int pmix_rte_init(pmix_proc_type_t type, } /* setup the globals structure */ - gethostname(hostname, PMIX_MAXHOSTNAMELEN); + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); pmix_globals.hostname = strdup(hostname); memset(&pmix_globals.myid.nspace, 0, PMIX_MAX_NSLEN+1); pmix_globals.myid.rank = PMIX_RANK_INVALID; @@ -171,6 +171,8 @@ int pmix_rte_init(pmix_proc_type_t type, ret = pmix_hotel_init(&pmix_globals.notifications, pmix_globals.max_events, pmix_globals.evbase, pmix_globals.event_eviction_time, _notification_eviction_cbfunc); + PMIX_CONSTRUCT(&pmix_globals.nspaces, pmix_list_t); + if (PMIX_SUCCESS != ret) { error = "notification hotel init"; goto return_error; diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 6fb30026291..a0d3645fcf0 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -96,7 +96,6 @@ pmix_status_t pmix_server_initialize(void) PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.events, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); - PMIX_CONSTRUCT(&pmix_server_globals.nspaces, pmix_list_t); pmix_output_verbose(2, pmix_server_globals.base_output, "pmix:server init called"); @@ -348,7 +347,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_globals.mypeer->nptr = PMIX_NEW(pmix_namespace_t); /* ensure our own nspace is first on the list */ PMIX_RETAIN(pmix_globals.mypeer->nptr); - pmix_list_prepend(&pmix_server_globals.nspaces, &pmix_globals.mypeer->nptr->super); + pmix_list_prepend(&pmix_globals.nspaces, &pmix_globals.mypeer->nptr->super); } pmix_globals.mypeer->nptr->nspace = strdup(pmix_globals.myid.nspace); rinfo->pname.nspace = strdup(pmix_globals.mypeer->nptr->nspace); @@ -446,13 +445,12 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); PMIX_LIST_DESTRUCT(&pmix_server_globals.gdata); PMIX_LIST_DESTRUCT(&pmix_server_globals.events); - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { /* ensure that we do the specified cleanup - if this is an * abnormal termination, then the nspace object may not be * at zero refcount */ pmix_execute_epilog(&ns->epilog); } - PMIX_LIST_DESTRUCT(&pmix_server_globals.nspaces); if (NULL != security_mode) { free(security_mode); @@ -516,7 +514,7 @@ static void _register_nspace(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { nptr = tmp; break; @@ -529,7 +527,7 @@ static void _register_nspace(int sd, short args, void *cbdata) goto release; } nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } nptr->nlocalprocs = cd->nlocalprocs; @@ -719,12 +717,12 @@ static void _deregister_nspace(int sd, short args, void *cbdata) pmix_server_purge_events(NULL, &cd->proc); /* release this nspace */ - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (PMIX_CHECK_NSPACE(tmp->nspace, cd->proc.nspace)) { /* perform any nspace-level epilog */ pmix_execute_epilog(&tmp->epilog); /* remove and release it */ - pmix_list_remove_item(&pmix_server_globals.nspaces, &tmp->super); + pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super); PMIX_RELEASE(tmp); break; } @@ -952,7 +950,7 @@ static void _register_client(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, cd->proc.nspace)) { nptr = ns; break; @@ -965,7 +963,7 @@ static void _register_client(int sd, short args, void *cbdata) goto cleanup; } nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } /* setup a peer object for this client - since the host server * only deals with the original processes and not any clones, @@ -1006,7 +1004,7 @@ static void _register_client(int sd, short args, void *cbdata) * if the nspaces are all defined */ if (all_def) { /* so far, they have all been defined - check this one */ - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 < ns->nlocalprocs && 0 == strcmp(trk->pcs[i].nspace, ns->nspace)) { all_def = ns->all_registered; @@ -1121,7 +1119,7 @@ static void _deregister_client(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { nptr = tmp; break; @@ -1323,15 +1321,15 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_ACQUIRE_OBJECT(cd); pmix_output_verbose(2, pmix_server_globals.base_output, - "DMODX LOOKING FOR %s:%d", - cd->proc.nspace, cd->proc.rank); + "DMODX LOOKING FOR %s", + PMIX_NAME_PRINT(&cd->proc)); /* this should be one of my clients, but a race condition * could cause this request to arrive prior to us having * been informed of it - so first check to see if we know * about this nspace yet */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, cd->proc.nspace)) { nptr = ns; break; @@ -1457,8 +1455,9 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, } pmix_output_verbose(2, pmix_server_globals.base_output, - "pmix:server dmodex request%s:%d", - proc->nspace, proc->rank); + "%s pmix:server dmodex request for proc %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(proc)); cd = PMIX_NEW(pmix_setup_caddy_t); pmix_strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c index d798b27bf95..647f76adc35 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -51,6 +51,7 @@ #include "src/mca/gds/gds.h" #include "src/util/argv.h" #include "src/util/error.h" +#include "src/util/name_fns.h" #include "src/util/output.h" #include "src/util/pmix_environ.h" @@ -126,6 +127,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_dmdx_request_t *req; bool local; bool localonly = false; + bool diffnspace = false; struct timeval tv = {0, 0}; pmix_buffer_t pbkt, pkt; pmix_byte_object_t bo; @@ -133,10 +135,10 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_proc_t proc; char *data; size_t sz, n; - pmix_peer_t *peer; pmix_output_verbose(2, pmix_server_globals.get_output, - "recvd GET"); + "%s:%u recvd GET", + pmix_globals.myid.nspace, pmix_globals.myid.rank); /* setup */ memset(nspace, 0, sizeof(nspace)); @@ -191,13 +193,19 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, /* find the nspace object for this client */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(nspace, ns->nspace)) { nptr = ns; break; } } + /* check if the nspace of the requestor is different from + * the nspace of the target process */ + if (!PMIX_CHECK_NSPACE(nspace, cd->peer->info->pname.nspace)) { + diffnspace = true; + } + pmix_output_verbose(2, pmix_server_globals.get_output, "%s:%d EXECUTE GET FOR %s:%d ON BEHALF OF %s:%d", pmix_globals.myid.nspace, @@ -294,10 +302,10 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, return PMIX_SUCCESS; } - /* this nspace is known, so we can process the request. - * if the rank is wildcard, then they are asking for the - * job-level info for this nspace - provide it */ - if (PMIX_RANK_WILDCARD == rank) { + /* the target nspace is known, so we can process the request. + * if the rank is wildcard, or the nspace is different, then + * they are asking for the job-level info for this nspace - provide it */ + if (PMIX_RANK_WILDCARD == rank || diffnspace) { /* see if we have the job-level info - we won't have it * if we have no local procs and haven't already asked * for it, so there is no guarantee we have it */ @@ -309,21 +317,32 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, * can retrieve the info from that GDS. Otherwise, * we need to retrieve it from our own */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - peer = pmix_globals.mypeer; /* this data is for a local client, so give the gds the * option of returning a complete copy of the data, * or returning a pointer to local storage */ cb.proc = &proc; cb.scope = PMIX_SCOPE_UNDEF; cb.copy = false; - PMIX_GDS_FETCH_KV(rc, peer, &cb); + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); if (PMIX_SUCCESS != rc) { PMIX_DESTRUCT(&cb); return rc; } + /* if the requested rank is not WILDCARD, then retrieve the + * job-specific data for that rank - a scope of UNDEF + * will direct the GDS to provide it. Anything found will + * simply be added to the cb.kvs list */ + if (PMIX_RANK_WILDCARD != rank) { + proc.rank = rank; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&cb); + return rc; + } + } PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, peer, &proc, &cb.kvs, &pkt, cd); + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&cb); @@ -333,7 +352,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, PMIX_DESTRUCT(&pkt); /* pack it into the payload */ PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); free(bo.bytes); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); @@ -604,6 +623,7 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, char *data = NULL; size_t sz = 0; pmix_scope_t scope = PMIX_SCOPE_UNDEF; + bool diffnspace = false; pmix_output_verbose(2, pmix_server_globals.get_output, "%s:%d SATISFY REQUEST CALLED", @@ -617,10 +637,18 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); pmix_strncpy(proc.nspace, nptr->nspace, PMIX_MAX_NSLEN); - /* if we have local clients of this nspace, then we use - * the corresponding GDS to retrieve the data. Otherwise, - * the data will have been stored under our GDS */ - if (0 < nptr->nlocalprocs) { + if (!PMIX_CHECK_NSPACE(nptr->nspace, cd->peer->info->pname.nspace)) { + diffnspace = true; + } + + /* if rank is PMIX_RANK_UNDEF, then it was stored in our GDS */ + if (PMIX_RANK_UNDEF == rank) { + scope = PMIX_GLOBAL; // we have to search everywhere + peer = pmix_globals.mypeer; + } else if (0 < nptr->nlocalprocs) { + /* if we have local clients of this nspace, then we use + * the corresponding GDS to retrieve the data. Otherwise, + * the data will have been stored under our GDS */ if (local) { *local = true; } @@ -660,8 +688,7 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, /* if they are asking about a rank from an nspace different * from their own, or they gave a rank of "wildcard", then * include a copy of the job-level info */ - if (PMIX_RANK_WILDCARD == rank || - 0 != strncmp(nptr->nspace, cd->peer->info->pname.nspace, PMIX_MAX_NSLEN)) { + if (PMIX_RANK_WILDCARD == rank || diffnspace) { proc.rank = PMIX_RANK_WILDCARD; PMIX_CONSTRUCT(&cb, pmix_cb_t); /* this data is requested by a local client, so give the gds the option @@ -674,7 +701,7 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, if (PMIX_SUCCESS == rc) { PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); if (rc != PMIX_SUCCESS) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&pkt); @@ -720,7 +747,8 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, /* retrieve the data for the specific rank they are asking about */ if (PMIX_RANK_WILDCARD != rank) { - if (!PMIX_PROC_IS_SERVER(peer) && !peer->commit_cnt) { + if (!PMIX_PROC_IS_SERVER(peer) && 0 == peer->commit_cnt) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); /* this condition works only for local requests, server does * count commits for local ranks, and check this count when * local request. @@ -743,7 +771,11 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, found = true; PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + if (PMIX_RANK_UNDEF == rank || diffnspace) { + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); + } else { + PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + } if (rc != PMIX_SUCCESS) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&pkt); @@ -789,6 +821,7 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, } PMIX_DESTRUCT(&cb); } + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); PMIX_DESTRUCT(&pbkt); @@ -896,7 +929,7 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) /* find the nspace object for the proc whose data is being received */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(caddy->lcd->proc.nspace, ns->nspace)) { nptr = ns; break; @@ -910,7 +943,7 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) nptr = PMIX_NEW(pmix_namespace_t); nptr->nspace = strdup(caddy->lcd->proc.nspace); /* add to the list */ - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } /* if the request was successfully satisfied, then store the data. diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 2c82c501b9f..a97201ae80e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -423,7 +423,7 @@ static pmix_server_trkr_t* new_tracker(char *id, pmix_proc_t *procs, } /* is this nspace known to us? */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(procs[i].nspace, ns->nspace)) { nptr = ns; break; @@ -2433,7 +2433,6 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } cd->cbdata = cbdata; - /* unpack the number of targets */ cnt = 1; PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ntargets, &cnt, PMIX_SIZE); if (PMIX_SUCCESS != rc) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c index 7da60671c57..4db8e9452c7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c @@ -215,7 +215,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, { pmix_kval_t *kptr; pmix_status_t rc; - char hostname[PMIX_MAX_NSLEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; char *evar, *nspace = NULL; pmix_rank_t rank = PMIX_RANK_UNDEF; bool gdsfound, do_not_connect = false; @@ -848,7 +848,11 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, * and load it from there */ /* hostname */ - gethostname(hostname, PMIX_MAX_NSLEN); + if (NULL != pmix_globals.hostname) { + pmix_strncpy(hostname, pmix_globals.hostname, PMIX_MAXHOSTNAMELEN); + } else { + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); + } kptr = PMIX_NEW(pmix_kval_t); kptr->key = strdup(PMIX_HOSTNAME); PMIX_VALUE_CREATE(kptr->value, 1); diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/name_fns.c b/opal/mca/pmix/pmix2x/pmix/src/util/name_fns.c index ca00f4f8c82..96b46ea9d58 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/name_fns.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/name_fns.c @@ -98,6 +98,7 @@ char* pmix_util_print_name_args(const pmix_proc_t *name) { pmix_print_args_buffers_t *ptr; char *rank; + int index; /* get the next buffer */ ptr = get_print_name_buffer(); @@ -105,29 +106,36 @@ char* pmix_util_print_name_args(const pmix_proc_t *name) PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); return pmix_print_args_null; } - /* cycle around the ring */ - if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } /* protect against NULL names */ if (NULL == name) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]"); - return ptr->buffers[ptr->cntr-1]; + index = ptr->cntr; + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]"); + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } + return ptr->buffers[index]; } rank = pmix_util_print_rank(name->rank); - snprintf(ptr->buffers[ptr->cntr++], + index = ptr->cntr; + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, - "[%s,%s]", name->nspace, rank); + "[%s:%s]", name->nspace, rank); + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } - return ptr->buffers[ptr->cntr-1]; + return ptr->buffers[index]; } char* pmix_util_print_rank(const pmix_rank_t vpid) { pmix_print_args_buffers_t *ptr; + int index; ptr = get_print_name_buffer(); @@ -136,19 +144,19 @@ char* pmix_util_print_rank(const pmix_rank_t vpid) return pmix_print_args_null; } - /* cycle around the ring */ - if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } - + index = ptr->cntr; if (PMIX_RANK_UNDEF == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "UNDEF"); + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "UNDEF"); } else if (PMIX_RANK_WILDCARD == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD"); + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD"); } else { - snprintf(ptr->buffers[ptr->cntr++], + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "%ld", (long)vpid); } - return ptr->buffers[ptr->cntr-1]; + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } + return ptr->buffers[index]; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/output.c b/opal/mca/pmix/pmix2x/pmix/src/util/output.c index 5441cf9c12c..dca7e180fe8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/output.c @@ -125,7 +125,7 @@ PMIX_CLASS_INSTANCE(pmix_output_stream_t, pmix_object_t, construct, destruct); bool pmix_output_init(void) { int i; - char hostname[PMIX_MAXHOSTNAMELEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; char *str; if (initialized) { @@ -176,7 +176,7 @@ bool pmix_output_init(void) } else { verbose.lds_want_stderr = true; } - gethostname(hostname, sizeof(hostname)); + gethostname(hostname, sizeof(hostname)-1); hostname[sizeof(hostname)-1] = '\0'; if (0 > asprintf(&verbose.lds_prefix, "[%s:%05d] ", hostname, getpid())) { return PMIX_ERR_NOMEM; @@ -256,7 +256,7 @@ bool pmix_output_switch(int output_id, bool enable) void pmix_output_reopen_all(void) { char *str; - char hostname[PMIX_MAXHOSTNAMELEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; str = getenv("PMIX_OUTPUT_STDERR_FD"); if (NULL != str) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/pif.h b/opal/mca/pmix/pmix2x/pmix/src/util/pif.h index fb9f1b79a24..57ed1bfd749 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/pif.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/pif.h @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,9 +38,7 @@ #include #endif -#ifndef IF_NAMESIZE -#define IF_NAMESIZE 32 -#endif +#define PMIX_IF_NAMESIZE 256 BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.c b/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.c index 1e1cfaaa880..2662a86bff7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.c @@ -12,8 +12,10 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,10 +32,12 @@ #include #include "src/util/printf.h" +#include "src/util/error.h" #include "src/util/argv.h" #include "src/util/pmix_environ.h" #define PMIX_DEFAULT_TMPDIR "/tmp" +#define PMIX_MAX_ENVAR_LENGTH 100000 /* * Merge two environ-like char arrays, ensuring that there are no @@ -74,7 +78,7 @@ char **pmix_environ_merge(char **minor, char **major) pmix_setenv(minor[i], NULL, false, &ret); } else { - /* strdup minor[i] in case it's a constat string */ + /* strdup minor[i] in case it's a constant string */ name = strdup(minor[i]); value = name + (value - minor[i]); @@ -99,9 +103,60 @@ char **pmix_environ_merge(char **minor, char **major) int i; char *newvalue, *compare; size_t len; + bool valid; - /* Make the new value */ + /* Check the bozo case */ + if( NULL == env ) { + return PMIX_ERR_BAD_PARAM; + } + if (NULL != value) { + /* check the string for unacceptable length - i.e., ensure + * it is NULL-terminated */ + valid = false; + for (i=0; i < PMIX_MAX_ENVAR_LENGTH; i++) { + if ('\0' == value[i]) { + valid = true; + break; + } + } + if (!valid) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + } + + /* If this is the "environ" array, use putenv or setenv */ + if (*env == environ) { + /* THIS IS POTENTIALLY A MEMORY LEAK! But I am doing it + because so that we don't violate the law of least + astonishmet for PMIX developers (i.e., those that don't + check the return code of pmix_setenv() and notice that we + returned an error if you passed in the real environ) */ +#if defined (HAVE_SETENV) + if (NULL == value) { + /* this is actually an unsetenv request */ + unsetenv(name); + } else { + setenv(name, value, overwrite); + } +#else + /* Make the new value */ + if (NULL == value) { + i = asprintf(&newvalue, "%s=", name); + } else { + i = asprintf(&newvalue, "%s=%s", name, value); + } + if (NULL == newvalue || 0 > i) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + putenv(newvalue); + /* cannot free it as putenv doesn't copy the value */ +#endif + return PMIX_SUCCESS; + } + + /* Make the new value */ if (NULL == value) { i = asprintf(&newvalue, "%s=", name); } else { @@ -111,28 +166,13 @@ char **pmix_environ_merge(char **minor, char **major) return PMIX_ERR_OUT_OF_RESOURCE; } - /* Check the bozo case */ - - if( NULL == env ) { - return PMIX_ERR_BAD_PARAM; - } else if (NULL == *env) { + if (NULL == *env) { i = 0; pmix_argv_append(&i, env, newvalue); free(newvalue); return PMIX_SUCCESS; } - /* If this is the "environ" array, use putenv */ - if( *env == environ ) { - /* THIS IS POTENTIALLY A MEMORY LEAK! But I am doing it - because so that we don't violate the law of least - astonishmet for PMIX developers (i.e., those that don't - check the return code of pmix_setenv() and notice that we - returned an error if you passed in the real environ) */ - putenv(newvalue); - return PMIX_SUCCESS; - } - /* Make something easy to compare to */ i = asprintf(&compare, "%s=", name); diff --git a/opal/mca/pmix/pmix2x/pmix/test/cli_stages.c b/opal/mca/pmix/pmix2x/pmix/test/cli_stages.c index 593bc319ae1..78919f6e3ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/cli_stages.c +++ b/opal/mca/pmix/pmix2x/pmix/test/cli_stages.c @@ -105,7 +105,7 @@ void cli_disconnect(cli_info_t *cli) } if( NULL == cli->ev ){ - TEST_ERROR(("Bad ev = NULL of rank = %d ", cli->sd, cli_rank(cli))); + TEST_ERROR(("Bad ev = NULL of rank = %d ", cli_rank(cli))); test_abort = true; } else { TEST_VERBOSE(("remove event of rank %d from event queue", cli_rank(cli))); diff --git a/opal/mca/pmix/pmix2x/pmix/test/pmi_client.c b/opal/mca/pmix/pmix2x/pmix/test/pmi_client.c index ad21f6db3cc..819429b4fd7 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/pmi_client.c +++ b/opal/mca/pmix/pmix2x/pmix/test/pmi_client.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -24,6 +24,9 @@ static int _legacy = 0; /* Verbose level 0-silent, 1-fatal, 2-error, 3+ debug*/ static int _verbose = 1; +static int spawned, size, rank=-1, appnum; +static char jobid[255]; + static void log_fatal(const char *format, ...) { va_list arglist; @@ -36,7 +39,7 @@ static void log_fatal(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "FATAL: %s", output); + fprintf(stderr, "%d:FATAL: %s", rank, output); free(output); } va_end(arglist); @@ -54,7 +57,7 @@ static void log_error(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "ERROR: %s", output); + fprintf(stderr, "%d:ERROR: %s", rank, output); free(output); } va_end(arglist); @@ -72,7 +75,7 @@ static void log_info(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "INFO: %s", output); + fprintf(stderr, "%d:INFO: %s", rank, output); free(output); } va_end(arglist); @@ -81,7 +84,7 @@ static void log_info(const char *format, ...) #define log_assert(e, msg) \ do { \ if (!(e)) { \ - log_fatal("%s at %s:%d\n", msg, __func__, __LINE__); \ + log_fatal("%d:%s at %s:%d\n", rank, msg, __func__, __LINE__); \ rc = -1; \ } \ } while (0) @@ -99,10 +102,6 @@ static int test_item5(void); static int test_item6(void); static int test_item7(void); -static int spawned, size, rank, appnum; -static char jobid[255]; - - int main(int argc, char **argv) { int ret = 0; @@ -372,21 +371,24 @@ static int test_item6(void) { int rc = 0; char val[100]; - const char *tkey = __func__; + char *tkey; const char *tval = __FILE__; + asprintf(&tkey, "%d:%s", rank, __func__); if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { log_fatal("PMI_KVS_Put %d\n", rc); + free(tkey); return rc; } if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { log_fatal("PMI_KVS_Get %d\n", rc); + free(tkey); return rc; } log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); - + free(tkey); log_assert(!strcmp(tval, val), "value does not meet expectation"); return rc; @@ -398,16 +400,16 @@ static int test_item7(void) char tkey[100]; char tval[100]; char val[100]; - int i = 0; + int i = 0, j; + +log_info("TEST7\n"); for (i = 0; i < size; i++) { - sprintf(tkey, "KEY-%d", i); + sprintf(tkey, "%d:KEY-%d", rank, i); sprintf(tval, "VALUE-%d", i); - if (i == rank) { - if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { - log_fatal("PMI_KVS_Put [%s=%s] %d\n", tkey, tval, rc); - return rc; - } + if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { + log_fatal("PMI_KVS_Put [%s=%s] %d\n", tkey, tval, rc); + return rc; } } @@ -416,22 +418,27 @@ static int test_item7(void) return rc; } + + log_info("BARRIER\n"); if (PMI_SUCCESS != (rc = PMI_Barrier())) { log_fatal("PMI_Barrier %d\n", rc); return rc; } for (i = 0; i < size; i++) { - sprintf(tkey, "KEY-%d", i); - sprintf(tval, "VALUE-%d", i); - if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { - log_fatal("PMI_KVS_Get [%s=?] %d\n", tkey, rc); - return rc; - } + for (j=0; j < size; j++) { + sprintf(tkey, "%d:KEY-%d", i, j); + sprintf(tval, "VALUE-%d", j); + log_info("Get key %s\n", tkey); + if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { + log_fatal("PMI_KVS_Get [%s=?] %d\n", tkey, rc); + return rc; + } - log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); + log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); - log_assert(!strcmp(tval, val), "value does not meet expectation"); + log_assert(!strcmp(tval, val), "value does not meet expectation"); + } } return rc; diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am b/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am index 731fb5d7d86..70e9d6696bf 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am @@ -85,7 +85,7 @@ simplegacy_SOURCES = \ simplegacy.c simplegacy_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simplegacy_LDADD = \ - $(top_builddir)/src/libpmix.la + $(top_builddir)/src/libpmi.la simptimeout_SOURCES = \ simptimeout.c diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/quietclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/quietclient.c index 674490b855f..0a578c78204 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/quietclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/quietclient.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -115,12 +115,15 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t nprocs, n; + uint32_t nprocs, n, k, nlocal; int cnt, j; volatile bool active; pmix_info_t *iptr; size_t ninfo; pmix_status_t code; + char **peers; + bool all_local, local; + pmix_rank_t *locals = NULL; /* init us and declare we are a test programming model */ PMIX_INFO_CREATE(iptr, 2); @@ -164,11 +167,11 @@ int main(int argc, char **argv) usleep(10); } - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } @@ -185,6 +188,27 @@ int main(int argc, char **argv) goto done; } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (cnt=0; NULL != peers[cnt]; cnt++) { + locals[cnt] = strtoul(peers[cnt], NULL, 10); + } + } + pmix_argv_free(peers); + for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; @@ -225,42 +249,67 @@ int main(int argc, char **argv) for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; - (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + if (local) { + (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", + myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); + continue; + } + if (NULL == val) { + pmix_output(0, "Client ns %s rank %d: NULL value returned", + myproc.nspace, myproc.rank); + break; + } + if (PMIX_UINT64 != val->type) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + if (1234 != val->data.uint64) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } PMIX_VALUE_RELEASE(val); free(tmp); - continue; - } - PMIX_VALUE_RELEASE(val); - free(tmp); - if (n != myproc.rank) { + /* now check that we don't get data for a remote proc - note that we + * always can get our own remote data as we published it */ + if (proc.rank != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + pmix_output(0, "ERROR: Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + } + PMIX_VALUE_RELEASE(val); + free(tmp); + } + } else { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - /* this data should _not_ be found as we are on the same node - * and the data was "put" with a PMIX_REMOTE scope */ - continue; + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed for remote proc", + myproc.nspace, myproc.rank, j, tmp); + } + if (NULL != val) { + PMIX_VALUE_RELEASE(val); } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", - myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); free(tmp); } } diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c index 710ae4b7b60..eabedd8872b 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c @@ -15,6 +15,8 @@ * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -113,7 +115,7 @@ static void model_registration_callback(pmix_status_t status, size_t evhandler_ref, void *cbdata) { - volatile int *active = (volatile int*)cbdata; + volatile bool *active = (volatile bool*)cbdata; fprintf(stderr, "simpclient EVENT HANDLER REGISTRATION RETURN STATUS %d, ref=%lu\n", status, (unsigned long)evhandler_ref); @@ -127,13 +129,16 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t nprocs, n; + uint32_t nprocs, n, k, nlocal; int cnt, j; bool doabort = false; volatile bool active; pmix_info_t info, *iptr; size_t ninfo; pmix_status_t code; + char **peers; + bool all_local, local; + pmix_rank_t *locals = NULL; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { @@ -194,17 +199,17 @@ int main(int argc, char **argv) } - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); @@ -216,6 +221,27 @@ int main(int argc, char **argv) goto done; } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (cnt=0; NULL != peers[cnt]; cnt++) { + locals[cnt] = strtoul(peers[cnt], NULL, 10); + } + } + pmix_argv_free(peers); + for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; @@ -256,43 +282,71 @@ int main(int argc, char **argv) for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; - (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + if (local) { + (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", + myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); + continue; + } + if (NULL == val) { + pmix_output(0, "Client ns %s rank %d: NULL value returned", + myproc.nspace, myproc.rank); + break; + } + if (PMIX_UINT64 != val->type) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + if (1234 != val->data.uint64) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); - continue; - } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); - if (n != myproc.rank) { + /* now check that we don't get data for a remote proc - note that we + * always can get our own remote data as we published it */ + if (proc.rank != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); + } else { + pmix_output(0, "ERROR: Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + } + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + free(tmp); + } + } else { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - /* this data should _not_ be found as we are on the same node - * and the data was "put" with a PMIX_REMOTE scope */ + if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - continue; + } else { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed for remote proc", + myproc.nspace, myproc.rank, j, tmp); } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", - myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } @@ -330,7 +384,7 @@ int main(int argc, char **argv) /* log something */ PMIX_INFO_CONSTRUCT(&info); - PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg", PMIX_STRING); + PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg\n", PMIX_STRING); active = true; rc = PMIx_Log_nb(&info, 1, NULL, 0, opcbfunc, (void*)&active); if (PMIX_SUCCESS != rc) { diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c index 7e0e98fca5c..cb0ae490227 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c @@ -97,16 +97,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c index 56f71815bce..2a7e067d148 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c @@ -98,9 +98,12 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t n, num_gets; + uint32_t n, num_gets, k, nlocal; bool active; bool dofence = true; + bool local, all_local; + char **peers; + pmix_rank_t *locals; if (NULL != getenv("PMIX_SIMPDMODEX_ASYNC")) { dofence = false; @@ -113,16 +116,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); @@ -174,24 +178,60 @@ int main(int argc, char **argv) } } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (n=0; NULL != peers[n]; n++) { + locals[n] = strtoul(peers[n], NULL, 10); + } + } + pmix_argv_free(peers); + /* get the committed data - ask for someone who doesn't exist as well */ num_gets = 0; for (n=0; n < nprocs; n++) { - (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n); - proc.rank = n; - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, - NULL, 0, valcbfunc, tmp))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); - goto done; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - ++num_gets; - (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n); - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, - NULL, 0, valcbfunc, tmp))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); - goto done; + if (local) { + (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n); + proc.rank = n; + if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, + NULL, 0, valcbfunc, tmp))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); + goto done; + } + ++num_gets; + } else { + (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n); + if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, + NULL, 0, valcbfunc, tmp))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); + goto done; + } + ++num_gets; } - ++num_gets; } if (dofence) { diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c index 3c8ff689283..ef5286dd6e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c @@ -62,16 +62,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); @@ -85,19 +86,12 @@ int main(int argc, char **argv) /* rank=0 calls spawn */ if (0 == myproc.rank) { PMIX_APP_CREATE(app, 1); - app->cmd = strdup("gumby"); + app->cmd = strdup("./simpclient"); app->maxprocs = 2; - pmix_argv_append_nosize(&app->argv, "gumby"); + pmix_argv_append_nosize(&app->argv, "simpclient"); pmix_argv_append_nosize(&app->argv, "-n"); pmix_argv_append_nosize(&app->argv, "2"); pmix_setenv("PMIX_ENV_VALUE", "3", true, &app->env); - PMIX_INFO_CREATE(app->info, 2); - (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); - app->info[0].value.type = PMIX_INT8; - app->info[0].value.data.int8 = 12; - (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); - app->info[1].value.type = PMIX_DOUBLE; - app->info[1].value.data.dval = 12.34; pmix_output(0, "Client ns %s rank %d: calling PMIx_Spawn", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { @@ -106,25 +100,18 @@ int main(int argc, char **argv) } PMIX_APP_FREE(app, 1); - /* check to see if we got the expected info back */ - if (0 != strncmp(nsp2, "DYNSPACE", PMIX_MAX_NSLEN)) { - pmix_output(0, "Client ns %s rank %d: PMIx_Spawn returned incorrect nspace: %s", myproc.nspace, myproc.rank, nsp2); - goto done; - } else { - pmix_output(0, "Client ns %s rank %d: PMIx_Spawn succeeded returning nspace: %s", myproc.nspace, myproc.rank, nsp2); - } - /* get their universe size */ + /* get their job size */ (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; val = NULL; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val)) || NULL == val) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + pmix_output(0, "Client ns %s rank %d: PMIx_Get job %s size failed: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); + pmix_output(0, "Client %s:%d job %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } /* just cycle the connect/disconnect functions */ diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c index 57a6bfc8c6b..a6acf5f89ca 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -83,16 +83,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpjctrl.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpjctrl.c index c9ac506520a..037f7eae383 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpjctrl.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpjctrl.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -133,14 +133,17 @@ int main(int argc, char **argv) (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + /* get our job size */ + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); + fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs); /* inform the RM that we are preemptible, and that our checkpoint methods are * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */ diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simplegacy.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simplegacy.c index 1b15366cd4a..98f40a15dd4 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simplegacy.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simplegacy.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -24,230 +24,90 @@ */ #include -#include +#include #include #include #include #include -#include "src/class/pmix_object.h" -#include "src/util/output.h" -#include "src/util/printf.h" - #define MAXCNT 3 -static volatile bool completed = false; -static pmix_proc_t myproc; - -static void notification_fn(size_t evhdlr_registration_id, - pmix_status_t status, - const pmix_proc_t *source, - pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, - void *cbdata) -{ - pmix_output(0, "Client %s:%d NOTIFIED with status %s", myproc.nspace, myproc.rank, PMIx_Error_string(status)); - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } - completed = true; -} - -static void errhandler_reg_callbk(pmix_status_t status, - size_t errhandler_ref, - void *cbdata) -{ - volatile bool *active = (volatile bool*)cbdata; - - pmix_output(0, "Client: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", - status, (unsigned long)errhandler_ref); - *active = false; -} - -/* this is an event notification function that we explicitly request - * be called when the PMIX_MODEL_DECLARED notification is issued. - * We could catch it in the general event notification function and test - * the status to see if the status matched, but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a model is declared as a means - * of testing server self-notification */ -static void model_callback(size_t evhdlr_registration_id, - pmix_status_t status, - const pmix_proc_t *source, - pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, - void *cbdata) -{ - size_t n; - - /* just let us know it was received */ - fprintf(stderr, "%s:%d Model event handler called with status %d(%s)\n", - myproc.nspace, myproc.rank, status, PMIx_Error_string(status)); - for (n=0; n < ninfo; n++) { - if (PMIX_STRING == info[n].value.type) { - fprintf(stderr, "%s:%d\t%s:\t%s\n", - myproc.nspace, myproc.rank, - info[n].key, info[n].value.data.string); - } - } - - /* we must NOT tell the event handler state machine that we - * are the last step as that will prevent it from notifying - * anyone else that might be listening for declarations */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } -} - -/* event handler registration is done asynchronously */ -static void model_registration_callback(pmix_status_t status, - size_t evhandler_ref, - void *cbdata) -{ - volatile int *active = (volatile int*)cbdata; - - fprintf(stderr, "simpclient EVENT HANDLER REGISTRATION RETURN STATUS %d, ref=%lu\n", - status, (unsigned long)evhandler_ref); - *active = false; -} - int main(int argc, char **argv) { - int rc; - pmix_value_t value; - pmix_value_t *val = &value; + int rc, j, n; char *tmp; - pmix_proc_t proc; - uint32_t nprocs, n; - int cnt, j; - volatile bool active; - pmix_info_t info, *iptr; - size_t ninfo; - pmix_status_t code; - - /* init us and declare we are a test programming model */ - PMIX_INFO_CREATE(iptr, 2); - PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); - PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + int spawned; + int rank; + int nprocs; + char value[1024]; + + fprintf(stderr, "Client calling init\n"); + if (PMI_SUCCESS != (rc = PMI_Init(&spawned))) { + fprintf(stderr, "Client PMI_Init failed: %d\n", rc); exit(rc); } - PMIX_INFO_FREE(iptr, 2); - pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); + fprintf(stderr, "Client Running\n"); /* test something */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + if (PMI_SUCCESS != (rc = PMI_Get_rank(&rank))) { + fprintf(stderr, "Client PMI_Get_rank failed: %d\n", rc); exit(rc); } - nprocs = val->data .uint32; - PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); - - /* register a handler specifically for when models declare */ - active = true; - ninfo = 1; - PMIX_INFO_CREATE(iptr, ninfo); - PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); - code = PMIX_MODEL_DECLARED; - PMIx_Register_event_handler(&code, 1, iptr, ninfo, - model_callback, model_registration_callback, (void*)&active); - while (active) { - usleep(10); - } - PMIX_INFO_FREE(iptr, ninfo); - - /* register our errhandler */ - active = true; - PMIx_Register_event_handler(NULL, 0, NULL, 0, - notification_fn, errhandler_reg_callbk, (void*)&active); - while (active) { - usleep(10); + if (PMI_SUCCESS != (rc = PMI_Get_universe_size(&nprocs))) { + fprintf(stderr, "Client %d: PMI_Get_universe_size failed: %d\n", rank, rc); + exit(rc); } + fprintf(stderr, "Client %d job size %d\n", rank, nprocs); - memset(&info, 0, sizeof(pmix_info_t)); - (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); - info.value.type = PMIX_UNDEF; - info.value.data.flag = 1; - - for (cnt=0; cnt < MAXCNT; cnt++) { - pmix_output(0, "EXECUTING LOOP %d", cnt); - for (j=0; j < 10; j++) { - (void)asprintf(&tmp, "%s-%d-gasnet-%d-%d", myproc.nspace, myproc.rank, cnt, j); - value.type = PMIX_UINT64; - value.data.uint64 = 1234; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, tmp, &value))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Put failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); - goto done; - } - free(tmp); - } - - if (PMIX_SUCCESS != (rc = PMIx_Commit())) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Commit failed: %s", - myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); + for (j=0; j < 10; j++) { + (void)asprintf(&tmp, "%d-gasnet-0-%d", rank, j); + if (PMI_SUCCESS != (rc = PMI_KVS_Put("foobar", tmp, "myvalue"))) { + fprintf(stderr, "Client %d: j %d PMI_KVS_Put failed: %d\n", + rank, j, rc); goto done; } + free(tmp); + } - /* call fence to ensure the data is received */ - if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, &info, 1))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Fence failed: %s", - myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); + if (PMIX_SUCCESS != (rc = PMI_KVS_Commit("foobar"))) { + fprintf(stderr, "Client %d: PMI_KVS_Commit failed: %d\n", rank, rc); goto done; - } + } - /* check the returned data */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - for (j=0; j < 10; j++) { - for (n=0; n < nprocs; n++) { - (void)asprintf(&tmp, "%s-%d-gasnet-%d-%d", myproc.nspace, n, cnt, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; - } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; - } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); + fprintf(stderr, "Client rank %d: CALLING PMI_Barrier\n", rank); + + /* call fence to ensure the data is received */ + if (PMI_SUCCESS != (rc = PMI_Barrier())) { + fprintf(stderr, "Client %d: PMI_Barrier failed: %d\n", rank, rc); + goto done; + } + + /* check the returned data */ + for (j=0; j < 10; j++) { + for (n=0; n < nprocs; n++) { + (void)asprintf(&tmp, "%d-gasnet-0-%d", n, j); + fprintf(stderr, "Client %d: Calling get\n", rank); + if (PMI_SUCCESS != (rc = PMI_KVS_Get("foobar", tmp, value, 1024))) { + fprintf(stderr, "Client %d: PMI_Get failed: %d\n", rank, rc); + continue; } + if (0 == strcmp(value, "myvalue")) { + fprintf(stderr, "Client %d: PMI_Get returned correct value\n", rank); + } else { + fprintf(stderr, "Client %d: PMI_Get returned incorrect value\n", rank); + } + free(tmp); } } done: /* finalize us */ - pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + fprintf(stderr, "Client rank %d: Finalizing\n", rank); + if (PMI_SUCCESS != (rc = PMI_Finalize())) { + fprintf(stderr, "Client rank %d: finalize failed %d\n", rank, rc); } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); + fprintf(stderr, "Client %d:PMI_Finalize successfully completed\n", rank); } fflush(stderr); return(rc); diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c index 12d6c68735e..2ccf9b258f4 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -54,16 +54,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c index b6a08d5dbfe..c3d341724e2 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c @@ -38,7 +38,6 @@ #include #include #include -#include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" #include "src/util/pmix_environ.h" @@ -195,6 +194,7 @@ static pmix_event_t handler; static pmix_list_t children; static bool istimeouttest = false; static mylock_t globallock; +static bool arrays = false; static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, myxfer_t *x); @@ -221,7 +221,6 @@ static void opcbfunc(pmix_status_t status, void *cbdata) DEBUG_WAKEUP_THREAD(&x->lock); } - /* this is an event notification function that we explicitly request * be called when the PMIX_MODEL_DECLARED notification is issued. * We could catch it in the general event notification function and test @@ -342,12 +341,22 @@ int main(int argc, char **argv) fprintf(stderr, " -e foo Name of the client executable to run (default: simpclient\n"); fprintf(stderr, " -x Test cross-version support\n"); fprintf(stderr, " -u Enable legacy usock support\n"); + fprintf(stderr, " -arrays Use the job session array to pass registration info\n"); exit(0); + } else if (0 == strcmp("-arrays", argv[n]) || + 0 == strcmp("--arrays", argv[n])) { + arrays = true; } } if (NULL == executable) { executable = strdup("./simpclient"); } + /* check for executable existence and permissions */ + if (0 != access(executable, X_OK)) { + fprintf(stderr, "Executable %s not found or missing executable permissions\n", executable); + exit(1); + } + if (cross_version && nprocs < 2) { fprintf(stderr, "Cross-version testing requires at least two clients\n"); exit(1); @@ -372,6 +381,7 @@ int main(int argc, char **argv) /* setup the server library and tell it to support tool connections */ ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_SERVER_TOOL_SUPPORT, NULL, PMIX_BOOL); if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, ninfo))) { @@ -414,9 +424,9 @@ int main(int argc, char **argv) /* setup to see sigchld on the forked tests */ PMIX_CONSTRUCT(&children, pmix_list_t); - event_assign(&handler, pmix_globals.evbase, SIGCHLD, - EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); - event_add(&handler, NULL); + pmix_event_assign(&handler, pmix_globals.evbase, SIGCHLD, + EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); + pmix_event_add(&handler, NULL); /* we have a single namespace for all clients */ atmp = NULL; @@ -541,24 +551,6 @@ int main(int argc, char **argv) DEBUG_DESTRUCT_LOCK(&globallock); PMIX_INFO_FREE(info, ninfo); -#if 0 - fprintf(stderr, "TEST NONDEFAULT NOTIFICATION\n"); - /* verify that notifications don't recirculate */ - ninfo = 1; - PMIX_INFO_CREATE(info, ninfo); - /* mark that it is not to go to any default handlers */ - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); - PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE, - &pmix_globals.myid, PMIX_RANGE_LOCAL, - info, ninfo, NULL, NULL); - PMIX_INFO_FREE(info, ninfo); - /* wait a little in case we get notified */ - for (ninfo=0; ninfo < 100000; ninfo++) { - struct timespec t = {0, 100}; - nanosleep(&t, NULL); - } -#endif - /* deregister the event handlers */ PMIx_Deregister_event_handler(0, NULL, NULL); @@ -587,42 +579,150 @@ static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, myxfer_t *x) { char *regex, *ppn; - char hostname[PMIX_MAXHOSTNAMELEN]; + int n, m, k; + pmix_data_array_t *array; + pmix_info_t *info, *iptr; - gethostname(hostname, sizeof(hostname)); - x->ninfo = 7; + if (arrays) { + x->ninfo = 15 + nprocs; + } else { + x->ninfo = 16 + nprocs; + } PMIX_INFO_CREATE(x->info, x->ninfo); - (void)strncpy(x->info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); - x->info[0].value.type = PMIX_UINT32; - x->info[0].value.data.uint32 = nprocs; - - (void)strncpy(x->info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); - x->info[1].value.type = PMIX_UINT32; - x->info[1].value.data.uint32 = 0; - - (void)strncpy(x->info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); - x->info[2].value.type = PMIX_UINT32; - x->info[2].value.data.uint32 = nprocs; - - (void)strncpy(x->info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); - x->info[3].value.type = PMIX_STRING; - x->info[3].value.data.string = strdup(ranks); - - PMIx_generate_regex(hostname, ®ex); - (void)strncpy(x->info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); - x->info[4].value.type = PMIX_STRING; - x->info[4].value.data.string = regex; - - PMIx_generate_ppn(ranks, &ppn); - (void)strncpy(x->info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); - x->info[5].value.type = PMIX_STRING; - x->info[5].value.data.string = ppn; - - (void)strncpy(x->info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); - x->info[6].value.type = PMIX_UINT32; - x->info[6].value.data.uint32 = nprocs; + n = 0; + + PMIx_generate_regex("test000,test001,test002", ®ex); + PMIx_generate_ppn("0;1;2", &ppn); + + if (arrays) { + (void)strncpy(x->info[n].key, PMIX_JOB_INFO_ARRAY, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(x->info[n].value.data.darray, 2, PMIX_INFO); + iptr = (pmix_info_t*)x->info[n].value.data.darray->array; + (void)strncpy(iptr[0].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + iptr[0].value.type = PMIX_STRING; + iptr[0].value.data.string = regex; + (void)strncpy(iptr[1].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + iptr[1].value.type = PMIX_STRING; + iptr[1].value.data.string = ppn; + ++n; + } else { + (void)strncpy(x->info[n].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = regex; + ++n; + + /* if we have some empty nodes, then fill their spots */ + (void)strncpy(x->info[n].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = ppn; + ++n; + } + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup(ranks); + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOBID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup("1234"); + ++n; + + (void)strncpy(x->info[n].key, PMIX_NPROC_OFFSET, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODE_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NUM_NODES, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_MAX_PROCS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_NUM_APPS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCALLDR, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_PROC_RANK; + x->info[n].value.data.uint32 = 0; + ++n; + + /* add the proc-specific data */ + for (m=0; m < nprocs; m++) { + (void)strncpy(x->info[n].key, PMIX_PROC_DATA, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(array, 5, PMIX_INFO); + x->info[n].value.data.darray = array; + info = (pmix_info_t*)array->array; + k = 0; + (void)strncpy(info[k].key, PMIX_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_GLOBAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_LOCAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODE_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT32; + info[k].value.data.uint32 = 0; + ++k; + /* move to next proc */ + ++n; + } PMIx_server_register_nspace(nspace, nprocs, x->info, x->ninfo, cbfunc, x); } @@ -650,8 +750,6 @@ static void errhandler_reg_callbk (pmix_status_t status, { mylock_t *lock = (mylock_t*)cbdata; - pmix_output(0, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", - status, (unsigned long)errhandler_ref); lock->status = status; DEBUG_WAKEUP_THREAD(lock); } @@ -664,8 +762,6 @@ static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: FINALIZED %s:%d WAKEUP %d", - proc->nspace, proc->rank, wakeup); return PMIX_OPERATION_SUCCEEDED; } @@ -741,7 +837,6 @@ static pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs, { pmix_shift_caddy_t *scd; - pmix_output(0, "SERVER: FENCENB"); scd = PMIX_NEW(pmix_shift_caddy_t); scd->status = PMIX_SUCCESS; scd->data = data; @@ -759,8 +854,6 @@ static pmix_status_t dmodex_fn(const pmix_proc_t *proc, { pmix_shift_caddy_t *scd; - pmix_output(0, "SERVER: DMODEX"); - /* if this is a timeout test, then do nothing */ if (istimeouttest) { return PMIX_SUCCESS; @@ -783,8 +876,6 @@ static pmix_status_t publish_fn(const pmix_proc_t *proc, pmix_locdat_t *p; size_t n; - pmix_output(0, "SERVER: PUBLISH"); - for (n=0; n < ninfo; n++) { p = PMIX_NEW(pmix_locdat_t); (void)strncpy(p->pdata.proc.nspace, proc->nspace, PMIX_MAX_NSLEN); @@ -825,8 +916,6 @@ static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, pmix_status_t ret = PMIX_ERR_NOT_FOUND; lkobj_t *lk; - pmix_output(0, "SERVER: LOOKUP"); - PMIX_CONSTRUCT(&results, pmix_list_t); for (n=0; NULL != keys[n]; n++) { @@ -876,8 +965,6 @@ static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, pmix_locdat_t *p, *p2; size_t n; - pmix_output(0, "SERVER: UNPUBLISH"); - for (n=0; NULL != keys[n]; n++) { PMIX_LIST_FOREACH_SAFE(p, p2, &pubdata, pmix_locdat_t) { if (0 == strncmp(keys[n], p->pdata.key, PMIX_MAX_KEYLEN)) { @@ -909,8 +996,6 @@ static pmix_status_t spawn_fn(const pmix_proc_t *proc, pmix_proc_t *pptr; bool spawned; - pmix_output(0, "SERVER: SPAWN"); - /* check the job info for parent and spawned keys */ for (n=0; n < ninfo; n++) { if (0 == strncmp(job_info[n].key, PMIX_PARENT_ID, PMIX_MAX_KEYLEN)) { @@ -944,8 +1029,6 @@ static pmix_status_t connect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: CONNECT"); - /* in practice, we would pass this request to the local * resource manager for handling */ @@ -959,8 +1042,6 @@ static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: DISCONNECT"); - return PMIX_OPERATION_SUCCEEDED; } @@ -983,7 +1064,6 @@ static pmix_status_t notify_event(pmix_status_t code, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: NOTIFY EVENT"); return PMIX_OPERATION_SUCCEEDED; } @@ -1012,8 +1092,6 @@ static pmix_status_t query_fn(pmix_proc_t *proct, pmix_info_t *info; query_data_t qd; - pmix_output(0, "SERVER: QUERY"); - if (NULL == cbfunc) { return PMIX_ERROR; } @@ -1041,8 +1119,6 @@ static void tool_connect_fn(pmix_info_t *info, size_t ninfo, { pmix_proc_t proc; - pmix_output(0, "SERVER: TOOL CONNECT"); - /* just pass back an arbitrary nspace */ (void)strncpy(proc.nspace, "TOOL", PMIX_MAX_NSLEN); proc.rank = 0; @@ -1070,8 +1146,6 @@ static void log_fn(const pmix_proc_t *client, { mylog_t *lg = (mylog_t *)malloc(sizeof(mylog_t)); - pmix_output(0, "SERVER: LOG"); - lg->cbfunc = cbfunc; lg->cbdata = cbdata; PMIX_THREADSHIFT(lg, foobar); diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptimeout.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptimeout.c index a62ab99f1ae..10835d68abf 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptimeout.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptimeout.c @@ -103,17 +103,17 @@ int main(int argc, char **argv) usleep(10); } - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* if we are rank=0, then do a fence with timeout */ if (0 == myproc.rank) { diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_common.h b/opal/mca/pmix/pmix2x/pmix/test/test_common.h index 8057e478d71..4b2d88391eb 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_common.h +++ b/opal/mca/pmix/pmix2x/pmix/test/test_common.h @@ -284,9 +284,9 @@ typedef struct { } \ if (PMIX_SUCCESS == rc) { \ if( PMIX_SUCCESS != cbdata.status ){ \ - if( !( (cbdata.status == PMIX_ERR_NOT_FOUND || cbdata.status == PMIX_ERR_PROC_ENTRY_NOT_FOUND) && ok_notfnd ) ){ \ - TEST_ERROR(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ - my_nspace, my_rank, rc, my_nspace, r)); \ + if( !( (cbdata.status == PMIX_ERR_NOT_FOUND || cbdata.status == PMIX_ERR_PROC_ENTRY_NOT_FOUND) && ok_notfnd ) ){ \ + TEST_ERROR(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ + my_nspace, my_rank, rc, my_nspace, r, key)); \ } \ rc = PMIX_ERROR; \ } else if (NULL == val) { \ diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_error.c b/opal/mca/pmix/pmix2x/pmix/test/test_error.c index f5217f0657c..24a63da4917 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_error.c +++ b/opal/mca/pmix/pmix2x/pmix/test/test_error.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,7 +38,7 @@ static void timeout_errhandler(size_t evhdlr_registration_id, void *cbdata) { TEST_ERROR(("timeout errhandler called for error status = %d ninfo = %d", - status, ninfo)); + status, (int)ninfo)); if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); } @@ -58,7 +58,7 @@ static void errhandler_reg_callbk1 (pmix_status_t status, size_t *ref = (size_t*) cbdata; *ref = errhandler_ref; TEST_VERBOSE(("PMIX client ERRHANDLER REGISTRATION CALLED WITH STATUS %d, ref=%lu", - status, *ref, (unsigned long)errhandler_ref)); + status, (unsigned long)errhandler_ref)); }