diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index 81c76765e3f..8759fd6a2be 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -165,8 +165,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, sizeof(ompi_proc_t *)); for (i=0 ; igrp_proc_count ; i++) { if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; + OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); + rc = OMPI_ERR_NOT_FOUND; free(proc_list); goto exit; } @@ -672,10 +672,10 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], for (i = 0; i < count; ++i) { app = OBJ_NEW(opal_pmix_app_t); if (NULL == app) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); OPAL_LIST_DESTRUCT(&apps); opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_OUT_OF_RESOURCE; } /* add the app to the job data */ opal_list_append(&apps, &app->super); @@ -900,9 +900,9 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); if ( flag ) { if (0 == strcmp(stdin_target, "all")) { - ui32 = ORTE_VPID_WILDCARD; + ui32 = OPAL_VPID_WILDCARD; } else if (0 == strcmp(stdin_target, "none")) { - ui32 = ORTE_VPID_INVALID; + ui32 = OPAL_VPID_INVALID; } else { ui32 = strtoul(stdin_target, NULL, 10); } @@ -918,7 +918,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], */ if ( !have_wdir ) { if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) { - ORTE_ERROR_LOG(rc); + OMPI_ERROR_LOG(rc); OPAL_LIST_DESTRUCT(&apps); opal_progress_event_users_decrement(); return rc; diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index 6eac3757e2b..e3e1cd2528e 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -96,7 +96,8 @@ enum { OPAL_ERR_PROC_MIGRATE = (OPAL_ERR_BASE - 65), OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66), OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67), - OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68) + OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68), + OPAL_ERR_MODEL_DECLARED = (OPAL_ERR_BASE - 69) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index 4c499ff5d1d..5ca6241ce77 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -14,7 +14,7 @@ #include "opal_config.h" #include "opal/types.h" - +#include "opal/threads/threads.h" #include "opal/mca/mca.h" #include "opal/mca/base/mca_base_framework.h" @@ -55,13 +55,133 @@ OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info, OPAL_DECLSPEC void opal_pmix_base_set_evbase(opal_event_base_t *evbase); +#define opal_pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t opal_pmix_condition_t; +#define opal_pmix_condition_broadcast(a) pthread_cond_broadcast(a) +#define opal_pmix_condition_signal(a) pthread_cond_signal(a) +#define OPAL_PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + +typedef struct { + opal_mutex_t mutex; + opal_pmix_condition_t cond; + volatile bool active; +} opal_pmix_lock_t; + + typedef struct { opal_event_base_t *evbase; int timeout; + int initialized; + opal_pmix_lock_t lock; } opal_pmix_base_t; extern opal_pmix_base_t opal_pmix_base; +#define OPAL_PMIX_CONSTRUCT_LOCK(l) \ + do { \ + OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define OPAL_PMIX_DESTRUCT_LOCK(l) \ + do { \ + OBJ_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = true; \ + } while(0) +#else +#define OPAL_PMIX_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + (lck)->active = true; \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + OPAL_ACQUIRE_OBJECT(&lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define OPAL_PMIX_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + OPAL_ACQUIRE_OBJECT(lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_RELEASE_THREAD(lck) \ + do { \ + if (opal_debug_threads) { \ + opal_output(0, "Releasing thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = false; \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define OPAL_PMIX_RELEASE_THREAD(lck) \ + do { \ + assert(0 != opal_mutex_trylock(&(lck)->mutex)); \ + (lck)->active = false; \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#define OPAL_PMIX_WAKEUP_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) + END_C_DECLS #endif diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index 7dd6752d531..97be9c381d0 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -92,39 +92,6 @@ int opal_pmix_base_notify_event(int status, return OPAL_SUCCESS; } -struct lookup_caddy_t { - volatile bool active; - int status; - opal_pmix_pdata_t *pdat; -}; - -/******** DATA EXCHANGE ********/ -static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata) -{ - struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; - cd->status = status; - if (OPAL_SUCCESS == status && NULL != data) { - opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data); - if (NULL != p) { - cd->pdat->proc = p->proc; - if (p->value.type == cd->pdat->value.type) { - if (NULL != cd->pdat->value.key) { - free(cd->pdat->value.key); - } - (void)opal_value_xfer(&cd->pdat->value, &p->value); - } - } - } - cd->active = false; -} - -static void opcbfunc(int status, void *cbdata) -{ - struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; - cd->status = status; - cd->active = false; -} - int opal_pmix_base_exchange(opal_value_t *indat, opal_pmix_pdata_t *outdat, int timeout) @@ -133,8 +100,6 @@ int opal_pmix_base_exchange(opal_value_t *indat, opal_list_t ilist, mlist; opal_value_t *info; opal_pmix_pdata_t *pdat; - struct lookup_caddy_t caddy; - char **keys; /* protect the incoming value */ opal_dss.copy((void**)&info, indat, OPAL_VALUE); @@ -148,31 +113,10 @@ int opal_pmix_base_exchange(opal_value_t *indat, opal_list_append(&ilist, &info->super); /* publish it with "session" scope */ - if (NULL == opal_pmix.publish_nb) { - rc = opal_pmix.publish(&ilist); - OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - return rc; - } - } else { - caddy.status = -1; - caddy.active = true; - caddy.pdat = NULL; - rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&ilist); - return rc; - } - while (caddy.active) { - usleep(10); - } - OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != caddy.status) { - OPAL_ERROR_LOG(caddy.status); - return caddy.status; - } + rc = opal_pmix.publish(&ilist); + OPAL_LIST_DESTRUCT(&ilist); + if (OPAL_SUCCESS != rc) { + return rc; } /* lookup the other side's info - if a non-blocking form @@ -206,45 +150,20 @@ int opal_pmix_base_exchange(opal_value_t *indat, /* if a non-blocking version of lookup isn't * available, then use the blocking version */ - if (NULL == opal_pmix.lookup_nb) { - OBJ_CONSTRUCT(&ilist, opal_list_t); - opal_list_append(&ilist, &pdat->super); - rc = opal_pmix.lookup(&ilist, &mlist); - OPAL_LIST_DESTRUCT(&mlist); + OBJ_CONSTRUCT(&ilist, opal_list_t); + opal_list_append(&ilist, &pdat->super); + rc = opal_pmix.lookup(&ilist, &mlist); + OPAL_LIST_DESTRUCT(&mlist); + if (OPAL_SUCCESS != rc) { OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - return rc; - } - } else { - caddy.status = -1; - caddy.active = true; - caddy.pdat = pdat; - keys = NULL; - opal_argv_append_nosize(&keys, pdat->value.key); - rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&mlist); - opal_argv_free(keys); - return rc; - } - while (caddy.active) { - usleep(10); - } - opal_argv_free(keys); - OPAL_LIST_DESTRUCT(&mlist); - if (OPAL_SUCCESS != caddy.status) { - OPAL_ERROR_LOG(caddy.status); - return caddy.status; - } + return rc; } /* pass back the result */ outdat->proc = pdat->proc; free(outdat->value.key); rc = opal_value_xfer(&outdat->value, &pdat->value); - OBJ_RELEASE(pdat); + OPAL_LIST_DESTRUCT(&ilist); return rc; } diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index f767391249c..eaec152edc9 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -13,6 +13,7 @@ #include "opal/constants.h" #include "opal/mca/mca.h" +#include "opal/threads/thread_usage.h" #include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" @@ -35,7 +36,16 @@ opal_pmix_base_module_t opal_pmix = { 0 }; bool opal_pmix_collect_all_data = true; int opal_pmix_verbose_output = -1; bool opal_pmix_base_async_modex = false; -opal_pmix_base_t opal_pmix_base = {0}; +opal_pmix_base_t opal_pmix_base = { + .evbase = NULL, + .timeout = 0, + .initialized = 0, + .lock = { + .mutex = OPAL_MUTEX_STATIC_INIT, + .cond = OPAL_PMIX_CONDITION_STATIC_INIT, + .active = false + } +}; static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) { diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 756128b0698..00f32923f6e 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -34,7 +34,7 @@ static char cray_pmi_version[128]; -static int cray_init(void); +static int cray_init(opal_list_t *ilist); static int cray_fini(void); static int cray_initialized(void); static int cray_abort(int flat, const char *msg, @@ -282,7 +282,7 @@ static void cray_get_more_info(void) return; } -static int cray_init(void) +static int cray_init(opal_list_t *ilist) { int i, spawned, size, rank, appnum, my_node; int rc, ret = OPAL_ERROR; diff --git a/opal/mca/pmix/ext1x/configure.m4 b/opal/mca/pmix/ext1x/configure.m4 index 922652d62d3..4b87d41ffaa 100644 --- a/opal/mca/pmix/ext1x/configure.m4 +++ b/opal/mca/pmix/ext1x/configure.m4 @@ -13,7 +13,7 @@ # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2015-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -31,23 +31,22 @@ AC_DEFUN([MCA_opal_pmix_ext1x_CONFIG],[ AC_CONFIG_FILES([opal/mca/pmix/ext1x/Makefile]) AS_IF([test "$opal_external_pmix_happy" = "yes"], - [AS_IF([test "$opal_event_external_support" != "yes"], - [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) - AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) - AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) - AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) - AS_IF([test "$opal_hwloc_external_support" != "yes"], - [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL HWLOC]) - AC_MSG_WARN([LIBRARY THIS LIBRARY MUST POINT TO THE SAME ONE USED ]) - AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) - AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) - - # check for the 1.x version ( >= 1.1.4 ?) + [ # check for the 1.x version ( >= 1.1.4 ?) AC_MSG_CHECKING([if external component is version 1.x]) AS_IF([test "$opal_external_pmix_version" = "11" || test "$opal_external_pmix_version" = "12" || test "$opal_external_pmix_version" = "1x"], [AC_MSG_RESULT([yes]) + AS_IF([test "$opal_event_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) + AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + AS_IF([test "$opal_hwloc_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL HWLOC]) + AC_MSG_WARN([LIBRARY THIS LIBRARY MUST POINT TO THE SAME ONE USED ]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) opal_pmix_external_1x_happy=yes], [AC_MSG_RESULT([no]) opal_pmix_external_1x_happy=no]) diff --git a/opal/mca/pmix/ext1x/pmix1x.h b/opal/mca/pmix/ext1x/pmix1x.h index 28a6a9966c9..3bcaa9c4938 100644 --- a/opal/mca/pmix/ext1x/pmix1x.h +++ b/opal/mca/pmix/ext1x/pmix1x.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science @@ -90,7 +90,7 @@ OBJ_CLASS_DECLARATION(pmix1_opalcaddy_t); /**** CLIENT FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix1_client_init(void); +OPAL_MODULE_DECLSPEC int pmix1_client_init(opal_list_t *ilist); OPAL_MODULE_DECLSPEC int pmix1_client_finalize(void); OPAL_MODULE_DECLSPEC int pmix1_initialized(void); OPAL_MODULE_DECLSPEC int pmix1_abort(int flag, const char *msg, diff --git a/opal/mca/pmix/ext1x/pmix1x_client.c b/opal/mca/pmix/ext1x/pmix1x_client.c index 8f8bb830405..26ef030dbb6 100644 --- a/opal/mca/pmix/ext1x/pmix1x_client.c +++ b/opal/mca/pmix/ext1x/pmix1x_client.c @@ -100,7 +100,7 @@ static void errreg_cbfunc (pmix_status_t status, status, errhandler_ref); } -int pmix1_client_init(void) +int pmix1_client_init(opal_list_t *ilist) { opal_process_name_t pname; pmix_status_t rc; diff --git a/opal/mca/pmix/ext1x/pmix1x_server_south.c b/opal/mca/pmix/ext1x/pmix1x_server_south.c index 2117c58a672..1f1eb923476 100644 --- a/opal/mca/pmix/ext1x/pmix1x_server_south.c +++ b/opal/mca/pmix/ext1x/pmix1x_server_south.c @@ -1,10 +1,10 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ @@ -115,6 +115,13 @@ static void errreg_cbfunc(pmix_status_t status, status, errhandler_ref); } +static void op2cbfunc(pmix_status_t status, void *cbdata) +{ + volatile bool *active = (volatile bool*)cbdata; + if (active) + *active = false; +} + int pmix1_server_init(opal_pmix_server_module_t *module, opal_list_t *info) { @@ -123,6 +130,8 @@ int pmix1_server_init(opal_pmix_server_module_t *module, opal_value_t *kv; pmix_info_t *pinfo; size_t sz, n; + opal_pmix1_jobid_trkr_t *job; + volatile bool active; if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); @@ -144,6 +153,13 @@ int pmix1_server_init(opal_pmix_server_module_t *module, pinfo = NULL; } + /* insert this into our list of jobids - it will be the + * first, and so we'll check it first */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)opal_snprintf_jobid(job->nspace, PMIX_MAX_NSLEN, OPAL_PROC_MY_NAME.jobid); + job->jobid = OPAL_PROC_MY_NAME.jobid; + opal_list_append(&mca_pmix_ext1x_component.jobids, &job->super); + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { PMIX_INFO_FREE(pinfo, sz); return pmix1_convert_rc(rc); @@ -155,6 +171,13 @@ int pmix1_server_init(opal_pmix_server_module_t *module, /* register the errhandler */ PMIx_Register_errhandler(NULL, 0, myerr, errreg_cbfunc, NULL); + + /* as we might want to use some client-side functions, be sure + * to register our own nspace */ + active = true; + PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active); + PMIX_WAIT_FOR_COMPLETION(active); + return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/ext2x/configure.m4 b/opal/mca/pmix/ext2x/configure.m4 index a320eb65db5..171f735f3b7 100644 --- a/opal/mca/pmix/ext2x/configure.m4 +++ b/opal/mca/pmix/ext2x/configure.m4 @@ -12,10 +12,12 @@ # All rights reserved. # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. -# Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved. -# Copyright (c) 2015-2016 Research Organization for Information Science +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2014-2015 Mellanox Technologies, Inc. +# All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -28,33 +30,34 @@ AC_DEFUN([MCA_opal_pmix_ext2x_CONFIG],[ AC_CONFIG_FILES([opal/mca/pmix/ext2x/Makefile]) - # check to see - # if we are linking to an external v2.x library. If not, then - # do not use this component. - AC_MSG_CHECKING([if external v2.x component is to be used]) AS_IF([test "$opal_external_pmix_happy" = "yes"], - [AS_IF([test "$opal_external_pmix_version" = "2x"], - [AC_MSG_RESULT([yes - using an external v2.x library]) - opal_pmix_ext2x_happy=1 - # Build flags for our Makefile.am - opal_pmix_ext2x_CPPFLAGS=$opal_external_pmix_CPPFLAGS - opal_pmix_ext2x_LDFLAGS=$opal_external_pmix_LDFLAGS - opal_pmix_ext2x_LIBS=$opal_external_pmix_LIBS - # setup wrapper flags + [ # check for the 2.x version + AC_MSG_CHECKING([if external component is version 2.x]) + AS_IF([test "$opal_external_pmix_version" = "2x"], + [AC_MSG_RESULT([yes]) + AS_IF([test "$opal_event_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) + AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + opal_pmix_external_2x_happy=yes], + [AC_MSG_RESULT([no]) + opal_pmix_external_2x_happy=no]) + + AS_IF([test "$opal_pmix_external_2x_happy" = "yes"], + [$1 + # need to set the wrapper flags for static builds pmix_ext2x_WRAPPER_EXTRA_LDFLAGS=$opal_external_pmix_LDFLAGS pmix_ext2x_WRAPPER_EXTRA_LIBS=$opal_external_pmix_LIBS], - [AC_MSG_RESULT([no - disqualifying this component]) - opal_pmix_ext2x_happy=0])], - [AC_MSG_RESULT([no - disqualifying this component]) - opal_pmix_ext2x_happy=0]) + [$2])], + [$2]) - AC_SUBST([opal_pmix_ext2x_LIBS]) - AC_SUBST([opal_pmix_ext2x_CPPFLAGS]) - AC_SUBST([opal_pmix_ext2x_LDFLAGS]) - AC_SUBST([opal_pmix_ext2x_DEPENDENCIES]) + opal_pmix_ext2x_CPPFLAGS=$opal_external_pmix_CPPFLAGS + opal_pmix_ext2x_LDFLAGS=$opal_external_pmix_LDFLAGS + opal_pmix_ext2x_LIBS=$opal_external_pmix_LIBS - AS_IF([test $opal_pmix_ext2x_happy -eq 1], - [$1], - [$2]) + AC_SUBST([opal_pmix_ext2x_CPPFLAGS]) + AC_SUBST([opal_pmix_ext2x_LDFLAGS]) + AC_SUBST([opal_pmix_ext2x_LIBS]) ])dnl diff --git a/opal/mca/pmix/ext2x/pmix2x.c b/opal/mca/pmix/ext2x/pmix2x.c index 253276fca6e..ab650a7ee1d 100644 --- a/opal/mca/pmix/ext2x/pmix2x.c +++ b/opal/mca/pmix/ext2x/pmix2x.c @@ -1,11 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -47,8 +50,8 @@ /* These are functions used by both client and server to * access common functions in the embedded PMIx library */ -static const char *pmix2x_get_nspace(opal_jobid_t jobid); -static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace); +static const char *ext2x_get_nspace(opal_jobid_t jobid); +static void ext2x_register_jobid(opal_jobid_t jobid, const char *nspace); static void register_handler(opal_list_t *event_codes, opal_list_t *info, opal_pmix_notification_fn_t evhandler, @@ -62,332 +65,194 @@ static int notify_event(int status, opal_pmix_data_range_t range, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -static void pmix2x_query(opal_list_t *queries, +static void ext2x_query(opal_list_t *queries, opal_pmix_info_cbfunc_t cbfunc, void *cbdata); -static void pmix2x_log(opal_list_t *info, +static void ext2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); const opal_pmix_base_module_t opal_pmix_ext2x_module = { /* client APIs */ - .init = pmix2x_client_init, - .finalize = pmix2x_client_finalize, - .initialized = pmix2x_initialized, - .abort = pmix2x_abort, - .commit = pmix2x_commit, - .fence = pmix2x_fence, - .fence_nb = pmix2x_fencenb, - .put = pmix2x_put, - .get = pmix2x_get, - .get_nb = pmix2x_getnb, - .publish = pmix2x_publish, - .publish_nb = pmix2x_publishnb, - .lookup = pmix2x_lookup, - .lookup_nb = pmix2x_lookupnb, - .unpublish = pmix2x_unpublish, - .unpublish_nb = pmix2x_unpublishnb, - .spawn = pmix2x_spawn, - .spawn_nb = pmix2x_spawnnb, - .connect = pmix2x_connect, - .connect_nb = pmix2x_connectnb, - .disconnect = pmix2x_disconnect, - .disconnect_nb = pmix2x_disconnectnb, - .resolve_peers = pmix2x_resolve_peers, - .resolve_nodes = pmix2x_resolve_nodes, - .query = pmix2x_query, - .log = pmix2x_log, + .init = ext2x_client_init, + .finalize = ext2x_client_finalize, + .initialized = ext2x_initialized, + .abort = ext2x_abort, + .commit = ext2x_commit, + .fence = ext2x_fence, + .fence_nb = ext2x_fencenb, + .put = ext2x_put, + .get = ext2x_get, + .get_nb = ext2x_getnb, + .publish = ext2x_publish, + .publish_nb = ext2x_publishnb, + .lookup = ext2x_lookup, + .lookup_nb = ext2x_lookupnb, + .unpublish = ext2x_unpublish, + .unpublish_nb = ext2x_unpublishnb, + .spawn = ext2x_spawn, + .spawn_nb = ext2x_spawnnb, + .connect = ext2x_connect, + .connect_nb = ext2x_connectnb, + .disconnect = ext2x_disconnect, + .disconnect_nb = ext2x_disconnectnb, + .resolve_peers = ext2x_resolve_peers, + .resolve_nodes = ext2x_resolve_nodes, + .query = ext2x_query, + .log = ext2x_log, /* server APIs */ - .server_init = pmix2x_server_init, - .server_finalize = pmix2x_server_finalize, - .generate_regex = pmix2x_server_gen_regex, - .generate_ppn = pmix2x_server_gen_ppn, - .server_register_nspace = pmix2x_server_register_nspace, - .server_deregister_nspace = pmix2x_server_deregister_nspace, - .server_register_client = pmix2x_server_register_client, - .server_deregister_client = pmix2x_server_deregister_client, - .server_setup_fork = pmix2x_server_setup_fork, - .server_dmodex_request = pmix2x_server_dmodex, - .server_notify_event = pmix2x_server_notify_event, + .server_init = ext2x_server_init, + .server_finalize = ext2x_server_finalize, + .generate_regex = ext2x_server_gen_regex, + .generate_ppn = ext2x_server_gen_ppn, + .server_register_nspace = ext2x_server_register_nspace, + .server_deregister_nspace = ext2x_server_deregister_nspace, + .server_register_client = ext2x_server_register_client, + .server_deregister_client = ext2x_server_deregister_client, + .server_setup_fork = ext2x_server_setup_fork, + .server_dmodex_request = ext2x_server_dmodex, + .server_notify_event = ext2x_server_notify_event, /* utility APIs */ .get_version = PMIx_Get_version, .register_evhandler = register_handler, .deregister_evhandler = deregister_handler, .notify_event = notify_event, - .store_local = pmix2x_store_local, - .get_nspace = pmix2x_get_nspace, - .register_jobid = pmix2x_register_jobid + .store_local = ext2x_store_local, + .get_nspace = ext2x_get_nspace, + .register_jobid = ext2x_register_jobid }; -static const char *pmix2x_get_nspace(opal_jobid_t jobid) +static void opcbfunc(pmix_status_t status, void *cbdata) { - opal_pmix2x_jobid_trkr_t *jptr; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(op); + + if (NULL != op->opcbfunc) { + op->opcbfunc(ext2x_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + + +static const char *ext2x_get_nspace(opal_jobid_t jobid) +{ + opal_ext2x_jobid_trkr_t *jptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return jptr->nspace; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return NULL; } -static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) +static void ext2x_register_jobid(opal_jobid_t jobid, const char *nspace) { - opal_pmix2x_jobid_trkr_t *jptr; + opal_ext2x_jobid_trkr_t *jptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); /* if we don't already have it, add this to our jobid tracker */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } } - jptr = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + jptr = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); jptr->jobid = jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &jptr->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } -static void completion_handler(int status, void *cbdata) +static void event_hdlr_complete(pmix_status_t status, void *cbdata) { - opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)cbdata; - if (NULL != chain->info) { - OPAL_LIST_RELEASE(chain->info); - } + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + + OBJ_RELEASE(op); } -static void progress_local_event_hdlr(int status, - opal_list_t *results, - opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata, - void *notification_cbdata) +static void return_local_event_hdlr(int status, opal_list_t *results, + opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata, + void *notification_cbdata) { - opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)notification_cbdata; + ext2x_threadshift_t *cd = (ext2x_threadshift_t*)notification_cbdata; + ext2x_opcaddy_t *op; + opal_value_t *kv; + pmix_status_t pstatus; size_t n; - opal_list_item_t *nxt; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - - /* if the caller indicates that the chain is completed, then stop here */ - if (OPAL_ERR_HANDLERS_COMPLETE == status) { - goto complete; - } - - /* if any results were provided, then add them here */ - if (NULL != results) { - while (NULL != (nxt = opal_list_remove_first(results))) { - opal_list_append(results, nxt); - } - } - /* see if we need to continue, starting with the single code events */ - if (NULL != chain->sing) { - /* the last handler was for a single code - see if there are - * any others that match this event */ - while (opal_list_get_end(&mca_pmix_ext2x_component.single_events) != (nxt = opal_list_get_next(&chain->sing->super))) { - sing = (opal_pmix2x_single_event_t*)nxt; - if (sing->code == chain->status) { - OBJ_RETAIN(chain); - chain->sing = sing; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PROGRESS CALLING SINGLE EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - sing->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - goto complete; - } - } - /* if we get here, then there are no more single code - * events that match */ - chain->sing = NULL; - /* pickup the beginning of the multi-code event list */ - if (0 < opal_list_get_size(&mca_pmix_ext2x_component.multi_events)) { - chain->multi = (opal_pmix2x_multi_event_t*)opal_list_get_begin(&mca_pmix_ext2x_component.multi_events); - } - } - - /* see if we need to continue with the multi code events */ - if (NULL != chain->multi) { - while (opal_list_get_end(&mca_pmix_ext2x_component.multi_events) != (nxt = opal_list_get_next(&chain->multi->super))) { - multi = (opal_pmix2x_multi_event_t*)nxt; - for (n=0; n < multi->ncodes; n++) { - if (multi->codes[n] == chain->status) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - OBJ_RETAIN(chain); - chain->multi = multi; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PROGRESS CALLING MULTI EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - multi->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - goto complete; + OPAL_ACQUIRE_OBJECT(cd); + if (NULL != cd->pmixcbfunc) { + op = OBJ_NEW(ext2x_opcaddy_t); + + if (NULL != results) { + /* convert the list of results to an array of info */ + op->ninfo = opal_list_get_size(results); + if (0 < op->ninfo) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, kv); + ++n; } } } - /* if we get here, then there are no more multi-mode - * events that match */ - chain->multi = NULL; - /* pickup the beginning of the default event list */ - if (0 < opal_list_get_size(&mca_pmix_ext2x_component.default_events)) { - chain->def = (opal_pmix2x_default_event_t*)opal_list_get_begin(&mca_pmix_ext2x_component.default_events); - } - } - - /* if they didn't want it to go to a default handler, then we are done */ - if (chain->nondefault) { - goto complete; + /* convert the status */ + pstatus = ext2x_convert_opalrc(status); + /* call the library's callback function */ + cd->pmixcbfunc(pstatus, op->info, op->ninfo, event_hdlr_complete, op, cd->cbdata); } - if (NULL != chain->def) { - if (opal_list_get_end(&mca_pmix_ext2x_component.default_events) != (nxt = opal_list_get_next(&chain->def->super))) { - def = (opal_pmix2x_default_event_t*)nxt; - OBJ_RETAIN(chain); - chain->def = def; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PROGRESS CALLING DEFAULT EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - def->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - } + /* release the threadshift object */ + if (NULL != cd->info) { + OPAL_LIST_RELEASE(cd->info); } + OBJ_RELEASE(cd); - complete: - /* we still have to call their final callback */ - if (NULL != chain->final_cbfunc) { - chain->final_cbfunc(OPAL_SUCCESS, chain->final_cbdata); - } - /* maintain acctng */ - OBJ_RELEASE(chain); - /* let the caller know that we are done with their callback */ + /* release the caller */ if (NULL != cbfunc) { cbfunc(OPAL_SUCCESS, thiscbdata); } } -static void _event_hdlr(int sd, short args, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - size_t n; - opal_pmix2x_event_chain_t *chain; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR RECEIVED NOTIFICATION OF STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), cd->status); - - chain = OBJ_NEW(opal_pmix2x_event_chain_t); - /* point it at our final callback */ - chain->final_cbfunc = completion_handler; - chain->final_cbdata = chain; - - /* carry across provided info */ - chain->status = cd->status; - chain->source = cd->pname; - chain->info = cd->info; - chain->nondefault = cd->nondefault; - - /* cycle thru the single-event registrations first */ - OPAL_LIST_FOREACH(sing, &mca_pmix_ext2x_component.single_events, opal_pmix2x_single_event_t) { - if (sing->code == chain->status) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain->sing = sing; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING SINGLE EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - sing->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - return; - } - } - - /* if we didn't find any match in the single-event registrations, - * then cycle thru the multi-event registrations next */ - OPAL_LIST_FOREACH(multi, &mca_pmix_ext2x_component.multi_events, opal_pmix2x_multi_event_t) { - for (n=0; n < multi->ncodes; n++) { - if (multi->codes[n] == chain->status) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain->multi = multi; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING MULTI EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - multi->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - return; - } - } - } - - /* if they didn't want it to go to a default handler, then we are done */ - if (chain->nondefault) { - /* if we get here, then we need to cache this event in case they - * register for it later - we cannot lose individual events */ - opal_list_append(&mca_pmix_ext2x_component.cache, &chain->super); - return; - } - - /* we are done with the threadshift caddy */ - OBJ_RELEASE(cd); - - /* finally, pass it to any default handlers */ - if (0 < opal_list_get_size(&mca_pmix_ext2x_component.default_events)) { - def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_ext2x_component.default_events); - chain->def = def; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING DEFAULT EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - def->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - return; - } - - /* we still have to call their final callback */ - if (NULL != chain->final_cbfunc) { - chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata); - } - - OBJ_RELEASE(chain); - - return; -} - /* this function will be called by the PMIx client library * whenever it receives notification of an event. The * notification can come from an ORTE daemon (when launched * by mpirun), directly from a RM (when direct launched), or * from another process (via the local daemon). * The call will occur in the PMIx event base */ -void pmix2x_event_hdlr(size_t evhdlr_registration_id, +void ext2x_event_hdlr(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t results[], size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { - pmix2x_threadshift_t *cd; + ext2x_threadshift_t *cd; int rc; opal_value_t *iptr; size_t n; - - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + opal_ext2x_event_t *event; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s RECEIVED NOTIFICATION OF STATUS %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); - cd = OBJ_NEW(pmix2x_threadshift_t); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + cd = OBJ_NEW(ext2x_threadshift_t); + cd->id = evhdlr_registration_id; + cd->pmixcbfunc = cbfunc; + cd->cbdata = cbdata; /* convert the incoming status */ - cd->status = pmix2x_convert_rc(status); + cd->status = ext2x_convert_rc(status); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s CONVERTED STATUS %d TO STATUS %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status, cd->status); @@ -400,21 +265,19 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&cd->pname.jobid, source->nspace))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(cd); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } - cd->pname.vpid = pmix2x_convert_rank(source->rank); + cd->pname.vpid = ext2x_convert_rank(source->rank); } /* convert the array of info */ if (NULL != info) { cd->info = OBJ_NEW(opal_list_t); for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - cd->nondefault = true; - } iptr = OBJ_NEW(opal_value_t); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(iptr); continue; @@ -422,20 +285,53 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, opal_list_append(cd->info, &iptr->super); } } - /* now push it into the local thread */ - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _event_hdlr, cd); - event_active(&cd->ev, EV_WRITE, 1); - - /* we don't need any of the data they provided, - * so let them go - also tell them that we will handle - * everything from this point forward */ + + /* convert the array of prior results */ + if (NULL != results) { + for (n=0; n < nresults; n++) { + iptr = OBJ_NEW(opal_value_t); + iptr->key = strdup(results[n].key); + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &results[n].value))) { + OPAL_ERROR_LOG(rc); + OBJ_RELEASE(iptr); + continue; + } + opal_list_append(&cd->results, &iptr->super); + } + } + + /* cycle thru the registrations */ + OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_ext2x_event_t) { + if (evhdlr_registration_id == event->index) { + /* found it - invoke the handler, pointing its + * callback function to our callback function */ + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s _EVENT_HDLR CALLING EVHDLR", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL != event->handler) { + OBJ_RETAIN(event); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + event->handler(cd->status, &cd->pname, + cd->info, &cd->results, + return_local_event_hdlr, cd); + OBJ_RELEASE(event); + return; + } + } + } + + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* if we didn't find a match, we still have to call their final callback */ if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); } + OPAL_LIST_RELEASE(cd->info); + OBJ_RELEASE(cd); + return; } -opal_vpid_t pmix2x_convert_rank(int rank) +opal_vpid_t ext2x_convert_rank(pmix_rank_t rank) { switch(rank) { case PMIX_RANK_UNDEF: @@ -447,7 +343,7 @@ opal_vpid_t pmix2x_convert_rank(int rank) } } -pmix_rank_t pmix2x_convert_opalrank(opal_vpid_t vpid) +pmix_rank_t ext2x_convert_opalrank(opal_vpid_t vpid) { switch(vpid) { case OPAL_VPID_WILDCARD: @@ -459,7 +355,7 @@ pmix_rank_t pmix2x_convert_opalrank(opal_vpid_t vpid) } } -pmix_status_t pmix2x_convert_opalrc(int rc) +pmix_status_t ext2x_convert_opalrc(int rc) { switch (rc) { case OPAL_ERR_DEBUGGER_RELEASE: @@ -531,16 +427,19 @@ pmix_status_t pmix2x_convert_opalrc(int rc) case OPAL_ERR_PARTIAL_SUCCESS: return PMIX_QUERY_PARTIAL_SUCCESS; + case OPAL_ERR_MODEL_DECLARED: + return PMIX_MODEL_DECLARED; + case OPAL_ERROR: return PMIX_ERROR; case OPAL_SUCCESS: return PMIX_SUCCESS; default: - return PMIX_ERROR; + return rc; } } -int pmix2x_convert_rc(pmix_status_t rc) +int ext2x_convert_rc(pmix_status_t rc) { switch (rc) { case PMIX_ERR_DEBUGGER_RELEASE: @@ -615,16 +514,26 @@ int pmix2x_convert_rc(pmix_status_t rc) case PMIX_QUERY_PARTIAL_SUCCESS: return OPAL_ERR_PARTIAL_SUCCESS; + case PMIX_MONITOR_HEARTBEAT_ALERT: + return OPAL_ERR_HEARTBEAT_ALERT; + + case PMIX_MONITOR_FILE_ALERT: + return OPAL_ERR_FILE_ALERT; + + case PMIX_MODEL_DECLARED: + return OPAL_ERR_MODEL_DECLARED; + + case PMIX_ERROR: return OPAL_ERROR; case PMIX_SUCCESS: return OPAL_SUCCESS; default: - return OPAL_ERROR; + return rc; } } -opal_pmix_scope_t pmix2x_convert_scope(pmix_scope_t scope) +opal_pmix_scope_t ext2x_convert_scope(pmix_scope_t scope) { switch(scope) { case PMIX_SCOPE_UNDEF: @@ -640,7 +549,7 @@ opal_pmix_scope_t pmix2x_convert_scope(pmix_scope_t scope) } } -pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t scope) { +pmix_scope_t ext2x_convert_opalscope(opal_pmix_scope_t scope) { switch(scope) { case OPAL_PMIX_LOCAL: return PMIX_LOCAL; @@ -653,7 +562,7 @@ pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t scope) { } } -pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range) { +pmix_data_range_t ext2x_convert_opalrange(opal_pmix_data_range_t range) { switch(range) { case OPAL_PMIX_RANGE_UNDEF: return PMIX_RANGE_UNDEF; @@ -672,7 +581,7 @@ pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range) { } } -opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range) { +opal_pmix_data_range_t ext2x_convert_range(pmix_data_range_t range) { switch(range) { case PMIX_RANGE_UNDEF: return OPAL_PMIX_RANGE_UNDEF; @@ -691,7 +600,7 @@ opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range) { } } -opal_pmix_persistence_t pmix2x_convert_persist(pmix_persistence_t persist) +opal_pmix_persistence_t ext2x_convert_persist(pmix_persistence_t persist) { switch(persist) { case PMIX_PERSIST_INDEF: @@ -709,7 +618,7 @@ opal_pmix_persistence_t pmix2x_convert_persist(pmix_persistence_t persist) } } -pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t persist) +pmix_persistence_t ext2x_convert_opalpersist(opal_pmix_persistence_t persist) { switch(persist) { case OPAL_PMIX_PERSIST_INDEF: @@ -727,14 +636,32 @@ pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t persist) } } +char* ext2x_convert_jobid(opal_jobid_t jobid) +{ + opal_ext2x_jobid_trkr_t *jptr; + + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return jptr->nspace; + } + } + return NULL; +} + /**** RHC: NEED TO ADD SUPPORT FOR NEW PMIX DATA TYPES, INCLUDING **** CONVERSION OF PROC STATES ****/ -void pmix2x_value_load(pmix_value_t *v, +void ext2x_value_load(pmix_value_t *v, opal_value_t *kv) { - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_jobid_trkr_t *job; bool found; + opal_list_t *list; + opal_value_t *val; + pmix_info_t *info; + size_t n; switch(kv->type) { case OPAL_UNDEF: @@ -826,7 +753,7 @@ void pmix2x_value_load(pmix_value_t *v, break; case OPAL_VPID: v->type = PMIX_PROC_RANK; - v->data.rank = pmix2x_convert_opalrank(kv->data.name.vpid); + v->data.rank = ext2x_convert_opalrank(kv->data.name.vpid); break; case OPAL_NAME: v->type = PMIX_PROC; @@ -834,7 +761,7 @@ void pmix2x_value_load(pmix_value_t *v, PMIX_PROC_CREATE(v->data.proc, 1); /* see if this job is in our list of known nspaces */ found = false; - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (job->jobid == kv->data.name.jobid) { (void)strncpy(v->data.proc->nspace, job->nspace, PMIX_MAX_NSLEN); found = true; @@ -844,7 +771,7 @@ void pmix2x_value_load(pmix_value_t *v, if (!found) { (void)opal_snprintf_jobid(v->data.proc->nspace, PMIX_MAX_NSLEN, kv->data.name.vpid); } - v->data.proc->rank = pmix2x_convert_opalrank(kv->data.name.vpid); + v->data.proc->rank = ext2x_convert_opalrank(kv->data.name.vpid); break; case OPAL_BYTE_OBJECT: v->type = PMIX_BYTE_OBJECT; @@ -859,15 +786,15 @@ void pmix2x_value_load(pmix_value_t *v, break; case OPAL_PERSIST: v->type = PMIX_PERSIST; - v->data.persist = pmix2x_convert_opalpersist(kv->data.uint8); + v->data.persist = ext2x_convert_opalpersist((opal_pmix_persistence_t)kv->data.uint8); break; case OPAL_SCOPE: v->type = PMIX_SCOPE; - v->data.scope = pmix2x_convert_opalscope(kv->data.uint8); + v->data.scope = ext2x_convert_opalscope((opal_pmix_scope_t)kv->data.uint8); break; case OPAL_DATA_RANGE: v->type = PMIX_DATA_RANGE; - v->data.range = pmix2x_convert_opalrange(kv->data.uint8); + v->data.range = ext2x_convert_opalrange((opal_pmix_data_range_t)kv->data.uint8); break; case OPAL_PROC_STATE: v->type = PMIX_PROC_STATE; @@ -876,8 +803,22 @@ void pmix2x_value_load(pmix_value_t *v, memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t)); break; case OPAL_PTR: - v->type = PMIX_POINTER; - v->data.ptr = kv->data.ptr; + /* if someone returned a pointer, it must be to a list of + * opal_value_t's that we need to convert to a pmix_data_array + * of pmix_info_t structures */ + list = (opal_list_t*)kv->data.ptr; + v->type = PMIX_DATA_ARRAY; + v->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + v->data.darray->type = PMIX_INFO; + v->data.darray->size = opal_list_get_size(list); + PMIX_INFO_CREATE(info, v->data.darray->size); + v->data.darray->array = info; + n=0; + OPAL_LIST_FOREACH(val, list, opal_value_t) { + (void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&info[n].value, val); + ++n; + } break; default: /* silence warnings */ @@ -885,12 +826,15 @@ void pmix2x_value_load(pmix_value_t *v, } } -int pmix2x_value_unload(opal_value_t *kv, +int ext2x_value_unload(opal_value_t *kv, const pmix_value_t *v) { int rc=OPAL_SUCCESS; bool found; - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_jobid_trkr_t *job; + opal_list_t *lt; + opal_value_t *ival; + size_t n; switch(v->type) { case PMIX_UNDEF: @@ -980,13 +924,13 @@ int pmix2x_value_unload(opal_value_t *kv, break; case PMIX_PROC_RANK: kv->type = OPAL_VPID; - kv->data.name.vpid = pmix2x_convert_rank(v->data.rank); + kv->data.name.vpid = ext2x_convert_rank(v->data.rank); break; case PMIX_PROC: kv->type = OPAL_NAME; /* see if this job is in our list of known nspaces */ found = false; - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (0 == strncmp(job->nspace, v->data.proc->nspace, PMIX_MAX_NSLEN)) { kv->data.name.jobid = job->jobid; found = true; @@ -995,10 +939,10 @@ int pmix2x_value_unload(opal_value_t *kv, } if (!found) { if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&kv->data.name.jobid, v->data.proc->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } - kv->data.name.vpid = pmix2x_convert_rank(v->data.proc->rank); + kv->data.name.vpid = ext2x_convert_rank(v->data.proc->rank); break; case PMIX_BYTE_OBJECT: kv->type = OPAL_BYTE_OBJECT; @@ -1013,15 +957,15 @@ int pmix2x_value_unload(opal_value_t *kv, break; case PMIX_PERSIST: kv->type = OPAL_PERSIST; - kv->data.uint8 = pmix2x_convert_persist(v->data.persist); + kv->data.uint8 = ext2x_convert_persist(v->data.persist); break; case PMIX_SCOPE: kv->type = OPAL_SCOPE; - kv->data.uint8 = pmix2x_convert_scope(v->data.scope); + kv->data.uint8 = ext2x_convert_scope(v->data.scope); break; case PMIX_DATA_RANGE: kv->type = OPAL_DATA_RANGE; - kv->data.uint8 = pmix2x_convert_range(v->data.range); + kv->data.uint8 = ext2x_convert_range(v->data.range); break; case PMIX_PROC_STATE: kv->type = OPAL_PROC_STATE; @@ -1033,6 +977,31 @@ int pmix2x_value_unload(opal_value_t *kv, kv->type = OPAL_PTR; kv->data.ptr = v->data.ptr; break; + case PMIX_DATA_ARRAY: + if (NULL == v->data.darray || NULL == v->data.darray->array) { + kv->data.ptr = NULL; + break; + } + lt = OBJ_NEW(opal_list_t); + kv->type = OPAL_PTR; + kv->data.ptr = (void*)lt; + for (n=0; n < v->data.darray->size; n++) { + ival = OBJ_NEW(opal_value_t); + opal_list_append(lt, &ival->super); + /* handle the various types */ + if (PMIX_INFO == v->data.darray->type) { + pmix_info_t *iptr = (pmix_info_t*)v->data.darray->array; + ival->key = strdup(iptr[n].key); + rc = ext2x_value_unload(ival, &iptr[n].value); + if (OPAL_SUCCESS != rc) { + OPAL_LIST_RELEASE(lt); + kv->type = OPAL_UNDEF; + kv->data.ptr = NULL; + break; + } + } + } + break; default: /* silence warnings */ rc = OPAL_ERROR; @@ -1041,323 +1010,326 @@ int pmix2x_value_unload(opal_value_t *kv, return rc; } -static void _reg_hdlr(int sd, short args, void *cbdata) +static void errreg_cbfunc (pmix_status_t status, + size_t errhandler_ref, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_event_chain_t *chain; - opal_pmix2x_single_event_t *sing = NULL; - opal_pmix2x_multi_event_t *multi = NULL; - opal_pmix2x_default_event_t *def = NULL; - opal_value_t *kv; - int i; - bool prepend = false; - size_t n; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(op); + op->event->index = errhandler_ref; + opal_output_verbose(5, opal_pmix_base_framework.framework_output, + "PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu", + status, (unsigned long)errhandler_ref); + if (NULL != op->evregcbfunc) { + op->evregcbfunc(ext2x_convert_rc(status), errhandler_ref, op->cbdata); + } + OBJ_RELEASE(op); +} - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s REGISTER HANDLER CODES %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == cd->event_codes) ? "NULL" : "NON-NULL"); +static void register_handler(opal_list_t *event_codes, + opal_list_t *info, + opal_pmix_notification_fn_t evhandler, + opal_pmix_evhandler_reg_cbfunc_t cbfunc, + void *cbdata) +{ + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + size_t n; + opal_value_t *kv; - if (NULL != cd->info) { - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { - if (0 == strcmp(kv->key, OPAL_PMIX_EVENT_ORDER_PREPEND)) { - prepend = true; - break; - } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, 0, cbdata); } + return; } - if (NULL == cd->event_codes) { - /* this is a default handler */ - def = OBJ_NEW(opal_pmix2x_default_event_t); - def->handler = cd->evhandler; - def->index = mca_pmix_ext2x_component.evindex; - if (prepend) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PREPENDING TO DEFAULT EVENTS", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_list_prepend(&mca_pmix_ext2x_component.default_events, &def->super); - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s APPENDING TO DEFAULT EVENTS", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_list_append(&mca_pmix_ext2x_component.default_events, &def->super); - } - } else if (1 == opal_list_get_size(cd->event_codes)) { - /* single handler */ - sing = OBJ_NEW(opal_pmix2x_single_event_t); - kv = (opal_value_t*)opal_list_get_first(cd->event_codes); - sing->code = kv->data.integer; - sing->index = mca_pmix_ext2x_component.evindex; - sing->handler = cd->evhandler; - if (prepend) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PREPENDING TO SINGLE EVENTS WITH CODE %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code); - opal_list_prepend(&mca_pmix_ext2x_component.single_events, &sing->super); - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s APPENDING TO SINGLE EVENTS WITH CODE %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code); - opal_list_append(&mca_pmix_ext2x_component.single_events, &sing->super); - } - } else { - multi = OBJ_NEW(opal_pmix2x_multi_event_t); - multi->ncodes = opal_list_get_size(cd->event_codes); - multi->codes = (int*)malloc(multi->ncodes * sizeof(int)); - i=0; - OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) { - multi->codes[i] = kv->data.integer; - ++i; - } - multi->index = mca_pmix_ext2x_component.evindex; - multi->handler = cd->evhandler; - if (prepend) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PREPENDING TO MULTI EVENTS", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_list_prepend(&mca_pmix_ext2x_component.multi_events, &multi->super); - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s APPENDING TO MULTI EVENTS", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_list_append(&mca_pmix_ext2x_component.multi_events, &multi->super); + op = OBJ_NEW(ext2x_opcaddy_t); + op->evregcbfunc = cbfunc; + op->cbdata = cbdata; + + /* convert the event codes */ + if (NULL != event_codes) { + op->ncodes = opal_list_get_size(event_codes); + op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t)); + n=0; + OPAL_LIST_FOREACH(kv, event_codes, opal_value_t) { + op->pcodes[n] = ext2x_convert_opalrc(kv->data.integer); + ++n; } } - /* release the caller */ - if (NULL != cd->cbfunc) { - cd->cbfunc(OPAL_SUCCESS, mca_pmix_ext2x_component.evindex, cd->cbdata); - } - mca_pmix_ext2x_component.evindex++; - - /* check if any matching notifications have been cached - only nondefault - * events will have been cached*/ - if (NULL == def) { - /* check single code registrations */ - if (NULL != sing) { - OPAL_LIST_FOREACH(chain, &mca_pmix_ext2x_component.cache, opal_pmix2x_event_chain_t) { - if (sing->code == chain->status) { - opal_list_remove_item(&mca_pmix_ext2x_component.cache, &chain->super); - chain->sing = sing; - sing->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; - } - } - } else if (NULL != multi) { - /* check for multi code registrations */ - OPAL_LIST_FOREACH(chain, &mca_pmix_ext2x_component.cache, opal_pmix2x_event_chain_t) { - for (n=0; n < multi->ncodes; n++) { - if (multi->codes[n] == chain->status) { - opal_list_remove_item(&mca_pmix_ext2x_component.cache, &chain->super); - chain->multi = multi; - multi->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; - } - } + /* convert the list of info to an array of pmix_info_t */ + if (NULL != info) { + op->ninfo = opal_list_get_size(info); + if (0 < op->ninfo) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, kv); + ++n; } } } - OBJ_RELEASE(cd); - return; -} -static void register_handler(opal_list_t *event_codes, - opal_list_t *info, - opal_pmix_notification_fn_t evhandler, - opal_pmix_evhandler_reg_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + /* register the event */ + op->event = OBJ_NEW(opal_ext2x_event_t); + op->event->handler = evhandler; + opal_list_append(&mca_pmix_ext2x_component.events, &op->event->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - OPAL_PMIX_THREADSHIFT(event_codes, info, evhandler, _reg_hdlr, cbfunc, cbdata); + PMIx_Register_event_handler(op->pcodes, op->ncodes, + op->info, op->ninfo, + ext2x_event_hdlr, errreg_cbfunc, op); return; } -static void _dereg_hdlr(int sd, short args, void *cbdata) +static void deregister_handler(size_t evhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - - /* check the single events first */ - OPAL_LIST_FOREACH(sing, &mca_pmix_ext2x_component.single_events, opal_pmix2x_single_event_t) { - if (cd->handler == sing->index) { - opal_list_remove_item(&mca_pmix_ext2x_component.single_events, &sing->super); - OBJ_RELEASE(sing); - goto release; - } - } - /* check multi events */ - OPAL_LIST_FOREACH(multi, &mca_pmix_ext2x_component.multi_events, opal_pmix2x_multi_event_t) { - if (cd->handler == multi->index) { - opal_list_remove_item(&mca_pmix_ext2x_component.multi_events, &multi->super); - OBJ_RELEASE(multi); - goto release; + ext2x_opcaddy_t *op; + opal_ext2x_event_t *event; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); } + return; } - /* check default events */ - OPAL_LIST_FOREACH(def, &mca_pmix_ext2x_component.default_events, opal_pmix2x_default_event_t) { - if (cd->handler == def->index) { - opal_list_remove_item(&mca_pmix_ext2x_component.default_events, &def->super); - OBJ_RELEASE(def); + + /* look for this event */ + OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_ext2x_event_t) { + if (evhandler == event->index) { + opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); + OBJ_RELEASE(event); break; } } - release: - if (NULL != cd->opcbfunc) { - cd->opcbfunc(OPAL_SUCCESS, cd->cbdata); - } - OBJ_RELEASE(cd); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + op = OBJ_NEW(ext2x_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + /* tell the library to deregister this handler */ + PMIx_Deregister_event_handler(evhandler, opcbfunc, op); + return; } -static void deregister_handler(size_t evhandler, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) +static void notify_complete(pmix_status_t status, void *cbdata) { - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_OP_THREADSHIFT(evhandler, _dereg_hdlr, cbfunc, cbdata); - return; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + if (NULL != op->opcbfunc) { + op->opcbfunc(ext2x_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); } -static void _notify_event(int sd, short args, void *cbdata) +static int notify_event(int status, + const opal_process_name_t *source, + opal_pmix_data_range_t range, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - size_t i; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - opal_pmix2x_event_chain_t *chain; - - /* check the single events first */ - OPAL_LIST_FOREACH(sing, &mca_pmix_ext2x_component.single_events, opal_pmix2x_single_event_t) { - if (cd->status == sing->code) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain = OBJ_NEW(opal_pmix2x_event_chain_t); - chain->status = cd->status; - chain->range = pmix2x_convert_opalrange(cd->range); - chain->source = *(cd->source); - chain->info = cd->info; - chain->final_cbfunc = cd->opcbfunc; - chain->final_cbdata = cd->cbdata; - chain->sing = sing; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "[%s] CALLING SINGLE EVHDLR FOR STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status); - sing->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; + ext2x_opcaddy_t *op; + opal_value_t *kv; + pmix_proc_t p, *pptr; + pmix_status_t pstatus; + size_t n; + pmix_data_range_t prange; + char *nsptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + op = OBJ_NEW(ext2x_opcaddy_t); + + /* convert the status */ + pstatus = ext2x_convert_opalrc(status); + + /* convert the source */ + if (NULL == source) { + pptr = NULL; + } else { + if (NULL == (nsptr = ext2x_convert_jobid(source->jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = ext2x_convert_opalrank(source->vpid); + pptr = &p; } - /* check multi events */ - OPAL_LIST_FOREACH(multi, &mca_pmix_ext2x_component.multi_events, opal_pmix2x_multi_event_t) { - for (i=0; i < multi->ncodes; i++) { - if (cd->status == multi->codes[i]) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain = OBJ_NEW(opal_pmix2x_event_chain_t); - chain->status = cd->status; - chain->range = pmix2x_convert_opalrange(cd->range); - chain->source = *(cd->source); - chain->info = cd->info; - chain->final_cbfunc = cd->opcbfunc; - chain->final_cbdata = cd->cbdata; - chain->multi = multi; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "[%s] CALLING MULTI EVHDLR FOR STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status); - multi->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* convert the range */ + prange = ext2x_convert_opalrange(range); + + /* convert the list of info */ + if (NULL != info) { + op->ninfo = opal_list_get_size(info); + if (0 < op->ninfo) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, kv); + ++n; } } } - /* check default events */ - if (0 < opal_list_get_size(&mca_pmix_ext2x_component.default_events)) { - def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_ext2x_component.default_events); - chain = OBJ_NEW(opal_pmix2x_event_chain_t); - chain->status = cd->status; - chain->range = pmix2x_convert_opalrange(cd->range); - chain->source = *(cd->source); - chain->info = cd->info; - chain->final_cbfunc = cd->opcbfunc; - chain->final_cbdata = cd->cbdata; - chain->def = def; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "[%s] CALLING DEFAULT EVHDLR FOR STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status); - def->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; - } - /* if we get here, then there are no registered event handlers */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(OPAL_ERR_NOT_FOUND, cd->cbdata); - } - OBJ_RELEASE(cd); - return; + /* ask the library to notify our clients */ + pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op); + + return ext2x_convert_rc(pstatus); } -static int notify_event(int status, - const opal_process_name_t *source, - opal_pmix_data_range_t range, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) +static void relcbfunc(void *cbdata) { - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify_event, cbfunc, cbdata); - return OPAL_SUCCESS; + opal_list_t *results = (opal_list_t*)cbdata; + if (NULL != results) { + OPAL_LIST_RELEASE(results); + } } -static void pmix2x_query(opal_list_t *queries, - opal_pmix_info_cbfunc_t cbfunc, void *cbdata) +static void infocbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) { - if (NULL != cbfunc) { - cbfunc(OPAL_ERR_NOT_SUPPORTED, NULL, cbdata, NULL, NULL); + ext2x_opcaddy_t *cd = (ext2x_opcaddy_t*)cbdata; + int rc = OPAL_SUCCESS; + opal_list_t *results = NULL; + opal_value_t *iptr; + size_t n; + + OPAL_ACQUIRE_OBJECT(cd); + + /* convert the array of pmix_info_t to the list of info */ + if (NULL != info) { + results = OBJ_NEW(opal_list_t); + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(results, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { + OPAL_LIST_RELEASE(results); + results = NULL; + break; + } + } } - return; + + if (NULL != release_fn) { + release_fn(release_cbdata); + } + + /* return the values to the original requestor */ + if (NULL != cd->qcbfunc) { + cd->qcbfunc(rc, results, cd->cbdata, relcbfunc, results); + } + OBJ_RELEASE(cd); } -static void opcbfunc(pmix_status_t status, void *cbdata) +static void ext2x_query(opal_list_t *queries, + opal_pmix_info_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + int rc; + opal_value_t *ival; + size_t n, nqueries, nq; + ext2x_opcaddy_t *cd; + pmix_status_t prc; + opal_pmix_query_t *q; - if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; } - OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* create the caddy */ + cd = OBJ_NEW(ext2x_opcaddy_t); + + /* bozo check */ + if (NULL == queries || 0 == (nqueries = opal_list_get_size(queries))) { + rc = OPAL_ERR_BAD_PARAM; + goto CLEANUP; + } + + /* setup the operation */ + cd->qcbfunc = cbfunc; + cd->cbdata = cbdata; + cd->nqueries = nqueries; + + /* convert the list to an array of query objects */ + PMIX_QUERY_CREATE(cd->queries, cd->nqueries); + n=0; + OPAL_LIST_FOREACH(q, queries, opal_pmix_query_t) { + cd->queries[n].keys = opal_argv_copy(q->keys); + cd->queries[n].nqual = opal_list_get_size(&q->qualifiers); + if (0 < cd->queries[n].nqual) { + PMIX_INFO_CREATE(cd->queries[n].qualifiers, cd->queries[n].nqual); + nq = 0; + OPAL_LIST_FOREACH(ival, &q->qualifiers, opal_value_t) { + (void)strncpy(cd->queries[n].qualifiers[nq].key, ival->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&cd->queries[n].qualifiers[nq].value, ival); + ++nq; + } + } + ++n; + } + + /* pass it down */ + if (PMIX_SUCCESS != (prc = PMIx_Query_info_nb(cd->queries, cd->nqueries, + infocbfunc, cd))) { + /* do not hang! */ + rc = ext2x_convert_rc(prc); + goto CLEANUP; + } + + return; + + CLEANUP: + if (NULL != cbfunc) { + cbfunc(rc, NULL, cbdata, NULL, NULL); + } + OBJ_RELEASE(cd); + return; } -static void pmix2x_log(opal_list_t *info, +static void ext2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; opal_value_t *ival; size_t n, ninfo; - pmix2x_opcaddy_t *cd; + ext2x_opcaddy_t *cd; pmix_status_t prc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ - cd = OBJ_NEW(pmix2x_opcaddy_t); + cd = OBJ_NEW(ext2x_opcaddy_t); /* bozo check */ if (NULL == info || 0 == (ninfo = opal_list_get_size(info))) { @@ -1375,7 +1347,7 @@ static void pmix2x_log(opal_list_t *info, n=0; OPAL_LIST_FOREACH(ival, info, opal_value_t) { (void)strncpy(cd->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&cd->info[n].value, ival); + ext2x_value_load(&cd->info[n].value, ival); ++n; } @@ -1383,7 +1355,7 @@ static void pmix2x_log(opal_list_t *info, if (PMIX_SUCCESS != (prc = PMIx_Log_nb(cd->info, cd->ninfo, NULL, 0, opcbfunc, cd))) { /* do not hang! */ - rc = pmix2x_convert_rc(prc); + rc = ext2x_convert_rc(prc); goto CLEANUP; } @@ -1396,95 +1368,104 @@ static void pmix2x_log(opal_list_t *info, OBJ_RELEASE(cd); } -/**** INSTANTIATE INTERNAL CLASSES ****/ -OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, - opal_list_item_t, - NULL, NULL); - -OBJ_CLASS_INSTANCE(opal_pmix2x_single_event_t, - opal_list_item_t, - NULL, NULL); - -static void mtevcon(opal_pmix2x_multi_event_t *p) -{ - p->codes = NULL; - p->ncodes = 0; -} -static void mtevdes(opal_pmix2x_multi_event_t *p) +opal_pmix_alloc_directive_t ext2x_convert_allocdir(pmix_alloc_directive_t dir) { - if (NULL != p->codes) { - free(p->codes); + switch (dir) { + case PMIX_ALLOC_NEW: + return OPAL_PMIX_ALLOC_NEW; + case PMIX_ALLOC_EXTEND: + return OPAL_PMIX_ALLOC_EXTEND; + case PMIX_ALLOC_RELEASE: + return OPAL_PMIX_ALLOC_RELEASE; + case PMIX_ALLOC_REAQUIRE: + return OPAL_PMIX_ALLOC_REAQCUIRE; + default: + return OPAL_PMIX_ALLOC_UNDEF; } } -OBJ_CLASS_INSTANCE(opal_pmix2x_multi_event_t, - opal_list_item_t, - mtevcon, mtevdes); -OBJ_CLASS_INSTANCE(opal_pmix2x_default_event_t, +/**** INSTANTIATE INTERNAL CLASSES ****/ +OBJ_CLASS_INSTANCE(opal_ext2x_jobid_trkr_t, opal_list_item_t, NULL, NULL); -static void chcon(opal_pmix2x_event_chain_t *p) +static void evcon(opal_ext2x_event_t *p) { - p->nondefault = false; - p->info = NULL; - OBJ_CONSTRUCT(&p->results, opal_list_t); - p->sing = NULL; - p->multi = NULL; - p->def = NULL; - p->final_cbfunc = NULL; - p->final_cbdata = NULL; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); + p->handler = NULL; + p->cbdata = NULL; } -static void chdes(opal_pmix2x_event_chain_t *p) +static void evdes(opal_ext2x_event_t *p) { - OPAL_LIST_DESTRUCT(&p->results); - if (NULL != p->info) { - OPAL_LIST_RELEASE(p->info); - } + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); } -OBJ_CLASS_INSTANCE(opal_pmix2x_event_chain_t, +OBJ_CLASS_INSTANCE(opal_ext2x_event_t, opal_list_item_t, - chcon, chdes); + evcon, evdes); -static void opcon(pmix2x_opcaddy_t *p) +static void opcon(ext2x_opcaddy_t *p) { memset(&p->p, 0, sizeof(pmix_proc_t)); + p->nspace = NULL; p->procs = NULL; p->nprocs = 0; + p->pdata = NULL; + p->npdata = 0; p->error_procs = NULL; p->nerror_procs = 0; p->info = NULL; p->ninfo = 0; p->apps = NULL; p->sz = 0; - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); + p->codes = NULL; + p->pcodes = NULL; + p->ncodes = 0; + p->queries = NULL; + p->nqueries = 0; + p->event = NULL; p->opcbfunc = NULL; p->mdxcbfunc = NULL; p->valcbfunc = NULL; p->lkcbfunc = NULL; p->spcbfunc = NULL; + p->evregcbfunc = NULL; + p->qcbfunc = NULL; p->cbdata = NULL; } -static void opdes(pmix2x_opcaddy_t *p) +static void opdes(ext2x_opcaddy_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->nspace) { + free(p->nspace); + } if (NULL != p->procs) { PMIX_PROC_FREE(p->procs, p->nprocs); } + if (NULL != p->pdata) { + PMIX_PDATA_FREE(p->pdata, p->npdata); + } if (NULL != p->error_procs) { PMIX_PROC_FREE(p->error_procs, p->nerror_procs); } if (NULL != p->info) { - PMIX_INFO_FREE(p->info, p->sz); + PMIX_INFO_FREE(p->info, p->ninfo); } if (NULL != p->apps) { PMIX_APP_FREE(p->apps, p->sz); } + if (NULL != p->pcodes) { + free(p->pcodes); + } + if (NULL != p->queries) { + PMIX_QUERY_FREE(p->queries, p->nqueries); + } } -OBJ_CLASS_INSTANCE(pmix2x_opcaddy_t, +OBJ_CLASS_INSTANCE(ext2x_opcaddy_t, opal_object_t, opcon, opdes); -static void ocadcon(pmix2x_opalcaddy_t *p) +static void ocadcon(ext2x_opalcaddy_t *p) { OBJ_CONSTRUCT(&p->procs, opal_list_t); OBJ_CONSTRUCT(&p->info, opal_list_t); @@ -1500,28 +1481,55 @@ static void ocadcon(pmix2x_opalcaddy_t *p) p->toolcbfunc = NULL; p->ocbdata = NULL; } -static void ocaddes(pmix2x_opalcaddy_t *p) +static void ocaddes(ext2x_opalcaddy_t *p) { OPAL_LIST_DESTRUCT(&p->procs); OPAL_LIST_DESTRUCT(&p->info); OPAL_LIST_DESTRUCT(&p->apps); } -OBJ_CLASS_INSTANCE(pmix2x_opalcaddy_t, +OBJ_CLASS_INSTANCE(ext2x_opalcaddy_t, opal_object_t, ocadcon, ocaddes); -static void tscon(pmix2x_threadshift_t *p) +static void tscon(ext2x_threadshift_t *p) { - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); + p->msg = NULL; + p->strings = NULL; p->source = NULL; p->event_codes = NULL; p->info = NULL; + OBJ_CONSTRUCT(&p->results, opal_list_t); p->evhandler = NULL; p->nondefault = false; p->cbfunc = NULL; p->opcbfunc = NULL; p->cbdata = NULL; } -OBJ_CLASS_INSTANCE(pmix2x_threadshift_t, +static void tsdes(ext2x_threadshift_t *p) +{ + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->strings) { + free(p->strings); + } + OPAL_LIST_DESTRUCT(&p->results); +} +OBJ_CLASS_INSTANCE(ext2x_threadshift_t, opal_object_t, - tscon, NULL); + tscon, tsdes); + +static void dmcon(opal_ext2x_dmx_trkr_t *p) +{ + p->nspace = NULL; + p->cbfunc = NULL; + p->cbdata = NULL; +} +static void dmdes(opal_ext2x_dmx_trkr_t *p) +{ + if (NULL != p->nspace) { + free(p->nspace); + } +} +OBJ_CLASS_INSTANCE(opal_ext2x_dmx_trkr_t, + opal_list_item_t, + dmcon, dmdes); diff --git a/opal/mca/pmix/ext2x/pmix2x.h b/opal/mca/pmix/ext2x/pmix2x.h index c849356d370..78cc4dac9bd 100644 --- a/opal/mca/pmix/ext2x/pmix2x.h +++ b/opal/mca/pmix/ext2x/pmix2x.h @@ -1,9 +1,12 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +31,7 @@ #include "opal/mca/event/event.h" #include "opal/util/proc.h" -#include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/base/base.h" #include "pmix_server.h" #include "pmix_common.h" @@ -39,11 +42,10 @@ typedef struct { opal_list_t jobids; bool native_launch; size_t evindex; - opal_list_t single_events; - opal_list_t multi_events; - opal_list_t default_events; + opal_list_t events; int cache_size; opal_list_t cache; + opal_list_t dmdx; } mca_pmix_ext2x_component_t; OPAL_DECLSPEC extern mca_pmix_ext2x_component_t mca_pmix_ext2x_component; @@ -55,70 +57,59 @@ typedef struct { opal_list_item_t super; opal_jobid_t jobid; char nspace[PMIX_MAX_NSLEN + 1]; -} opal_pmix2x_jobid_trkr_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t); +} opal_ext2x_jobid_trkr_t; +OBJ_CLASS_DECLARATION(opal_ext2x_jobid_trkr_t); typedef struct { opal_list_item_t super; + opal_pmix_lock_t lock; size_t index; - int code; opal_pmix_notification_fn_t handler; -} opal_pmix2x_single_event_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_single_event_t); - -typedef struct { - opal_list_item_t super; - size_t index; - int *codes; - size_t ncodes; - opal_pmix_notification_fn_t handler; -} opal_pmix2x_multi_event_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_multi_event_t); - -typedef struct { - opal_list_item_t super; - size_t index; - opal_pmix_notification_fn_t handler; -} opal_pmix2x_default_event_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_default_event_t); + void *cbdata; +} opal_ext2x_event_t; +OBJ_CLASS_DECLARATION(opal_ext2x_event_t); typedef struct { opal_list_item_t super; - int status; - bool nondefault; - opal_process_name_t source; - pmix_data_range_t range; - opal_list_t *info; - opal_list_t results; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - opal_pmix_op_cbfunc_t final_cbfunc; - void *final_cbdata; -} opal_pmix2x_event_chain_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_event_chain_t); + char *nspace; + pmix_modex_cbfunc_t cbfunc; + void *cbdata; +} opal_ext2x_dmx_trkr_t; +OBJ_CLASS_DECLARATION(opal_ext2x_dmx_trkr_t); typedef struct { opal_object_t super; + opal_event_t ev; pmix_status_t status; + char *nspace; pmix_proc_t p; pmix_proc_t *procs; size_t nprocs; + pmix_pdata_t *pdata; + size_t npdata; pmix_proc_t *error_procs; size_t nerror_procs; pmix_info_t *info; size_t ninfo; pmix_app_t *apps; size_t sz; - volatile bool active; + opal_pmix_lock_t lock; + opal_list_t *codes; + pmix_status_t *pcodes; + size_t ncodes; + pmix_query_t *queries; + size_t nqueries; + opal_ext2x_event_t *event; opal_pmix_op_cbfunc_t opcbfunc; opal_pmix_modex_cbfunc_t mdxcbfunc; opal_pmix_value_cbfunc_t valcbfunc; opal_pmix_lookup_cbfunc_t lkcbfunc; opal_pmix_spawn_cbfunc_t spcbfunc; + opal_pmix_evhandler_reg_cbfunc_t evregcbfunc; + opal_pmix_info_cbfunc_t qcbfunc; void *cbdata; -} pmix2x_opcaddy_t; -OBJ_CLASS_DECLARATION(pmix2x_opcaddy_t); +} ext2x_opcaddy_t; +OBJ_CLASS_DECLARATION(ext2x_opcaddy_t); typedef struct { opal_object_t super; @@ -135,13 +126,15 @@ typedef struct { void *cbdata; opal_pmix_release_cbfunc_t odmdxfunc; void *ocbdata; -} pmix2x_opalcaddy_t; -OBJ_CLASS_DECLARATION(pmix2x_opalcaddy_t); +} ext2x_opalcaddy_t; +OBJ_CLASS_DECLARATION(ext2x_opalcaddy_t); typedef struct { opal_object_t super; opal_event_t ev; - volatile bool active; + opal_pmix_lock_t lock; + const char *msg; + char *strings; size_t id; int status; opal_process_name_t pname; @@ -150,174 +143,179 @@ typedef struct { opal_pmix_data_range_t range; bool nondefault; size_t handler; + opal_value_t *val; opal_list_t *event_codes; opal_list_t *info; + opal_list_t results; opal_pmix_notification_fn_t evhandler; opal_pmix_evhandler_reg_cbfunc_t cbfunc; opal_pmix_op_cbfunc_t opcbfunc; + pmix_event_notification_cbfunc_fn_t pmixcbfunc; + opal_pmix_value_cbfunc_t valcbfunc; + opal_pmix_lookup_cbfunc_t lkcbfunc; void *cbdata; -} pmix2x_threadshift_t; -OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); - -#define OPAL_PMIX_OPCD_THREADSHIFT(i, s, sr, if, nif, fn, cb, cd) \ - do { \ - pmix2x_opalcaddy_t *_cd; \ - _cd = OBJ_NEW(pmix2x_opalcaddy_t); \ - _cd->id = (i); \ - _cd->status = (s); \ - _cd->source = (sr); \ - _cd->info = (i); \ - _cd->evcbfunc = (cb); \ - _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ - } while(0) +} ext2x_threadshift_t; +OBJ_CLASS_DECLARATION(ext2x_threadshift_t); #define OPAL_PMIX_OP_THREADSHIFT(e, fn, cb, cd) \ do { \ - pmix2x_threadshift_t *_cd; \ - _cd = OBJ_NEW(pmix2x_threadshift_t); \ + ext2x_threadshift_t *_cd; \ + _cd = OBJ_NEW(ext2x_threadshift_t); \ _cd->handler = (e); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) #define OPAL_PMIX_THREADSHIFT(e, i, eh, fn, cb, cd) \ do { \ - pmix2x_threadshift_t *_cd; \ - _cd = OBJ_NEW(pmix2x_threadshift_t); \ + ext2x_threadshift_t *_cd; \ + _cd = OBJ_NEW(ext2x_threadshift_t); \ _cd->event_codes = (e); \ _cd->info = (i); \ _cd->evhandler = (eh); \ _cd->cbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) #define OPAL_PMIX_NOTIFY_THREADSHIFT(s, sr, r, i, fn, cb, cd) \ do { \ - pmix2x_threadshift_t *_cd; \ - _cd = OBJ_NEW(pmix2x_threadshift_t); \ + ext2x_threadshift_t *_cd; \ + _cd = OBJ_NEW(ext2x_threadshift_t); \ _cd->status = (s); \ _cd->source = (sr); \ _cd->range = (r); \ _cd->info = (i); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ + } while(0) + +#define OPAL_PMIX2X_THREADSHIFT(p, cb) \ + do { \ + opal_event_assign(&((p)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (cb), (p)); \ + OPAL_POST_OBJECT(p); \ + opal_event_active(&((p)->ev), EV_WRITE, 1); \ } while(0) /**** CLIENT FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_client_init(void); -OPAL_MODULE_DECLSPEC int pmix2x_client_finalize(void); -OPAL_MODULE_DECLSPEC int pmix2x_initialized(void); -OPAL_MODULE_DECLSPEC int pmix2x_abort(int flag, const char *msg, +OPAL_MODULE_DECLSPEC int ext2x_client_init(opal_list_t *ilist); +OPAL_MODULE_DECLSPEC int ext2x_client_finalize(void); +OPAL_MODULE_DECLSPEC int ext2x_initialized(void); +OPAL_MODULE_DECLSPEC int ext2x_abort(int flag, const char *msg, opal_list_t *procs); -OPAL_MODULE_DECLSPEC int pmix2x_commit(void); -OPAL_MODULE_DECLSPEC int pmix2x_fence(opal_list_t *procs, int collect_data); -OPAL_MODULE_DECLSPEC int pmix2x_fencenb(opal_list_t *procs, int collect_data, +OPAL_MODULE_DECLSPEC int ext2x_commit(void); +OPAL_MODULE_DECLSPEC int ext2x_fence(opal_list_t *procs, int collect_data); +OPAL_MODULE_DECLSPEC int ext2x_fencenb(opal_list_t *procs, int collect_data, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_put(opal_pmix_scope_t scope, +OPAL_MODULE_DECLSPEC int ext2x_put(opal_pmix_scope_t scope, opal_value_t *val); -OPAL_MODULE_DECLSPEC int pmix2x_get(const opal_process_name_t *proc, const char *key, +OPAL_MODULE_DECLSPEC int ext2x_get(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_value_t **val); -OPAL_MODULE_DECLSPEC int pmix2x_getnb(const opal_process_name_t *proc, const char *key, +OPAL_MODULE_DECLSPEC int ext2x_getnb(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_publish(opal_list_t *info); -OPAL_MODULE_DECLSPEC int pmix2x_publishnb(opal_list_t *info, +OPAL_MODULE_DECLSPEC int ext2x_publish(opal_list_t *info); +OPAL_MODULE_DECLSPEC int ext2x_publishnb(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_lookup(opal_list_t *data, opal_list_t *info); -OPAL_MODULE_DECLSPEC int pmix2x_lookupnb(char **keys, opal_list_t *info, +OPAL_MODULE_DECLSPEC int ext2x_lookup(opal_list_t *data, opal_list_t *info); +OPAL_MODULE_DECLSPEC int ext2x_lookupnb(char **keys, opal_list_t *info, opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_unpublish(char **keys, opal_list_t *info); -OPAL_MODULE_DECLSPEC int pmix2x_unpublishnb(char **keys, opal_list_t *info, +OPAL_MODULE_DECLSPEC int ext2x_unpublish(char **keys, opal_list_t *info); +OPAL_MODULE_DECLSPEC int ext2x_unpublishnb(char **keys, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid); -OPAL_MODULE_DECLSPEC int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, +OPAL_MODULE_DECLSPEC int ext2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid); +OPAL_MODULE_DECLSPEC int ext2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_connect(opal_list_t *procs); -OPAL_MODULE_DECLSPEC int pmix2x_connectnb(opal_list_t *procs, +OPAL_MODULE_DECLSPEC int ext2x_connect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int ext2x_connectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_disconnect(opal_list_t *procs); -OPAL_MODULE_DECLSPEC int pmix2x_disconnectnb(opal_list_t *procs, +OPAL_MODULE_DECLSPEC int ext2x_disconnect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int ext2x_disconnectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, +OPAL_MODULE_DECLSPEC int ext2x_resolve_peers(const char *nodename, opal_jobid_t jobid, opal_list_t *procs); -OPAL_MODULE_DECLSPEC int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist); +OPAL_MODULE_DECLSPEC int ext2x_resolve_nodes(opal_jobid_t jobid, char **nodelist); /**** COMMON FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_store_local(const opal_process_name_t *proc, +OPAL_MODULE_DECLSPEC int ext2x_store_local(const opal_process_name_t *proc, opal_value_t *val); /**** SERVER SOUTHBOUND FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_server_init(opal_pmix_server_module_t *module, +OPAL_MODULE_DECLSPEC int ext2x_server_init(opal_pmix_server_module_t *module, opal_list_t *info); -OPAL_MODULE_DECLSPEC int pmix2x_server_finalize(void); -OPAL_MODULE_DECLSPEC int pmix2x_server_gen_regex(const char *input, char **regex); -OPAL_MODULE_DECLSPEC int pmix2x_server_gen_ppn(const char *input, char **ppn); -OPAL_MODULE_DECLSPEC int pmix2x_server_register_nspace(opal_jobid_t jobid, +OPAL_MODULE_DECLSPEC int ext2x_server_finalize(void); +OPAL_MODULE_DECLSPEC int ext2x_server_gen_regex(const char *input, char **regex); +OPAL_MODULE_DECLSPEC int ext2x_server_gen_ppn(const char *input, char **ppn); +OPAL_MODULE_DECLSPEC int ext2x_server_register_nspace(opal_jobid_t jobid, int nlocalprocs, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix2x_server_deregister_nspace(opal_jobid_t jobid, +OPAL_MODULE_DECLSPEC void ext2x_server_deregister_nspace(opal_jobid_t jobid, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_server_register_client(const opal_process_name_t *proc, +OPAL_MODULE_DECLSPEC int ext2x_server_register_client(const opal_process_name_t *proc, uid_t uid, gid_t gid, void *server_object, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix2x_server_deregister_client(const opal_process_name_t *proc, +OPAL_MODULE_DECLSPEC void ext2x_server_deregister_client(const opal_process_name_t *proc, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env); -OPAL_MODULE_DECLSPEC int pmix2x_server_dmodex(const opal_process_name_t *proc, +OPAL_MODULE_DECLSPEC int ext2x_server_setup_fork(const opal_process_name_t *proc, char ***env); +OPAL_MODULE_DECLSPEC int ext2x_server_dmodex(const opal_process_name_t *proc, opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_server_notify_event(int status, +OPAL_MODULE_DECLSPEC int ext2x_server_notify_event(int status, const opal_process_name_t *source, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); /**** COMPONENT UTILITY FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC void pmix2x_event_hdlr(size_t evhdlr_registration_id, +OPAL_MODULE_DECLSPEC void ext2x_event_hdlr(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t results[], size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC pmix_status_t pmix2x_convert_opalrc(int rc); -OPAL_MODULE_DECLSPEC int pmix2x_convert_rc(pmix_status_t rc); +OPAL_MODULE_DECLSPEC pmix_status_t ext2x_convert_opalrc(int rc); +OPAL_MODULE_DECLSPEC int ext2x_convert_rc(pmix_status_t rc); -OPAL_MODULE_DECLSPEC opal_vpid_t pmix2x_convert_rank(int rank); -OPAL_MODULE_DECLSPEC pmix_rank_t pmix2x_convert_opalrank(opal_vpid_t vpid); +OPAL_MODULE_DECLSPEC opal_vpid_t ext2x_convert_rank(pmix_rank_t rank); +OPAL_MODULE_DECLSPEC pmix_rank_t ext2x_convert_opalrank(opal_vpid_t vpid); -OPAL_MODULE_DECLSPEC opal_pmix_scope_t pmix2x_convert_scope(pmix_scope_t scope); -OPAL_MODULE_DECLSPEC pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t scope); +OPAL_MODULE_DECLSPEC opal_pmix_scope_t ext2x_convert_scope(pmix_scope_t scope); +OPAL_MODULE_DECLSPEC pmix_scope_t ext2x_convert_opalscope(opal_pmix_scope_t scope); -OPAL_MODULE_DECLSPEC pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range); -OPAL_MODULE_DECLSPEC opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range); +OPAL_MODULE_DECLSPEC pmix_data_range_t ext2x_convert_opalrange(opal_pmix_data_range_t range); +OPAL_MODULE_DECLSPEC opal_pmix_data_range_t ext2x_convert_range(pmix_data_range_t range); -OPAL_MODULE_DECLSPEC opal_pmix_persistence_t pmix2x_convert_persist(pmix_persistence_t scope); -OPAL_MODULE_DECLSPEC pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t scope); +OPAL_MODULE_DECLSPEC opal_pmix_persistence_t ext2x_convert_persist(pmix_persistence_t scope); +OPAL_MODULE_DECLSPEC pmix_persistence_t ext2x_convert_opalpersist(opal_pmix_persistence_t scope); -OPAL_MODULE_DECLSPEC void pmix2x_value_load(pmix_value_t *v, +OPAL_MODULE_DECLSPEC void ext2x_value_load(pmix_value_t *v, opal_value_t *kv); -OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv, +OPAL_MODULE_DECLSPEC int ext2x_value_unload(opal_value_t *kv, const pmix_value_t *v); +OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t ext2x_convert_allocdir(pmix_alloc_directive_t dir); + +OPAL_MODULE_DECLSPEC char* ext2x_convert_jobid(opal_jobid_t jobid); + END_C_DECLS #endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/ext2x/pmix2x_client.c b/opal/mca/pmix/ext2x/pmix2x_client.c index 1589af9ba61..0be3980abfa 100644 --- a/opal/mca/pmix/ext2x/pmix2x_client.c +++ b/opal/mca/pmix/ext2x/pmix2x_client.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. @@ -27,6 +27,7 @@ #endif #include "opal/hash_string.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/proc.h" @@ -36,44 +37,80 @@ static pmix_proc_t my_proc; static char *dbgvalue=NULL; -static size_t errhdler_ref = 0; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - } while (0) - static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - errhdler_ref = errhandler_ref; + opal_ext2x_event_t *event = (opal_ext2x_event_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(event); + + event->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); + OPAL_POST_OBJECT(event); + OPAL_PMIX_WAKEUP_THREAD(&event->lock); } -int pmix2x_client_init(void) +int ext2x_client_init(opal_list_t *ilist) { opal_process_name_t pname; pmix_status_t rc; int dbg; - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_jobid_trkr_t *job; + opal_ext2x_event_t *event; + pmix_info_t *pinfo; + size_t ninfo, n; + opal_value_t *ival; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client init"); - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } } - rc = PMIx_Init(&my_proc, NULL, 0); + /* convert the incoming list to info structs */ + if (NULL != ilist) { + ninfo = opal_list_get_size(ilist); + if (0 < ninfo) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&pinfo[n].value, ival); + ++n; + } + } else { + pinfo = NULL; + ninfo = 0; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_Init(&my_proc, pinfo, ninfo); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, ninfo); + } if (PMIX_SUCCESS != rc) { - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); + } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + ++opal_pmix_base.initialized; + if (1 < opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; } /* store our jobid and rank */ @@ -89,183 +126,223 @@ int pmix2x_client_init(void) } /* insert this into our list of jobids - it will be the * first, and so we'll check it first */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); job->jobid = pname.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); - pname.vpid = pmix2x_convert_rank(my_proc.rank); + pname.vpid = ext2x_convert_rank(my_proc.rank); opal_proc_set_name(&pname); + /* release the thread in case the event handler fires when + * registered */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* register the default event handler */ - PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, NULL); + event = OBJ_NEW(opal_ext2x_event_t); + opal_list_append(&mca_pmix_ext2x_component.events, &event->super); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-DEFAULT", PMIX_STRING); + PMIx_Register_event_handler(NULL, 0, NULL, 0, ext2x_event_hdlr, errreg_cbfunc, event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + PMIX_INFO_FREE(pinfo, 1); + return OPAL_SUCCESS; } -int pmix2x_client_finalize(void) +static void dereg_cbfunc(pmix_status_t st, void *cbdata) +{ + opal_ext2x_event_t *ev = (opal_ext2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); +} + +int ext2x_client_finalize(void) { pmix_status_t rc; + opal_ext2x_event_t *event, *ev2; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client finalize"); - /* deregister the default event handler */ - PMIx_Deregister_event_handler(errhdler_ref, NULL, NULL); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_ext2x_component.events, opal_ext2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); + OBJ_RELEASE(event); + } + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Finalize(NULL, 0); - return pmix2x_convert_rc(rc); + + return ext2x_convert_rc(rc); } -int pmix2x_initialized(void) +int ext2x_initialized(void) { + int init; + opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client initialized"); - return PMIx_Initialized(); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + init = opal_pmix_base.initialized; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + return init; } -int pmix2x_abort(int flag, const char *msg, +int ext2x_abort(int flag, const char *msg, opal_list_t *procs) { pmix_status_t rc; pmix_proc_t *parray=NULL; size_t n, cnt=0; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client abort"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); + parray[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } } - /* call the library abort */ + /* call the library abort - this is a blocking call */ rc = PMIx_Abort(flag, msg, parray, cnt); /* release the array */ PMIX_PROC_FREE(parray, cnt); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) +int ext2x_store_local(const opal_process_name_t *proc, opal_value_t *val) { pmix_value_t kv; pmix_status_t rc; pmix_proc_t p; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); + if (NULL == (nsptr = ext2x_convert_jobid(proc->jobid))) { return OPAL_ERR_NOT_FOUND; } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(proc->vpid); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = ext2x_convert_opalrank(proc->vpid); } else { /* use our name */ (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(OPAL_PROC_MY_NAME.vpid); + p.rank = ext2x_convert_opalrank(OPAL_PROC_MY_NAME.vpid); } PMIX_VALUE_CONSTRUCT(&kv); - pmix2x_value_load(&kv, val); + ext2x_value_load(&kv, val); + /* call the library - this is a blocking call */ rc = PMIx_Store_internal(&p, val->key, &kv); PMIX_VALUE_DESTRUCT(&kv); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_commit(void) +int ext2x_commit(void) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_Commit(); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } static void opcbfunc(pmix_status_t status, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); + op->opcbfunc(ext2x_convert_rc(status), op->cbdata); } OBJ_RELEASE(op); } -int pmix2x_fence(opal_list_t *procs, int collect_data) +int ext2x_fence(opal_list_t *procs, int collect_data) { pmix_status_t rc; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; opal_namelist_t *ptr; + char *nsptr; + size_t cnt, n; + pmix_proc_t *parray = NULL; pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client fence"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); + parray[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (collect_data) { PMIX_INFO_CONSTRUCT(&info); (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); @@ -278,35 +355,35 @@ int pmix2x_fence(opal_list_t *procs, int collect_data) n = 0; } - /* call the library function */ rc = PMIx_Fence(parray, cnt, iptr, n); - - /* release the array */ - PMIX_PROC_FREE(parray, cnt); - if (NULL != iptr) { + if (collect_data) { PMIX_INFO_DESTRUCT(&info); } + if (NULL != parray) { + PMIX_PROC_FREE(parray, cnt); + } - return pmix2x_convert_rc(rc); - + return ext2x_convert_rc(rc); } -int pmix2x_fencenb(opal_list_t *procs, int collect_data, +int ext2x_fencenb(opal_list_t *procs, int collect_data, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_status_t rc; pmix_proc_t *parray=NULL; size_t n, cnt=0; opal_namelist_t *ptr; - pmix2x_opcaddy_t *op; - pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + ext2x_opcaddy_t *op; + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client fence_nb"); + "PMIx_client fencenb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the list of procs to an array * of pmix_proc_t */ @@ -314,228 +391,235 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); + parray[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } } - - if (collect_data) { - PMIX_INFO_CONSTRUCT(&info); - (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); - info.value.type = PMIX_BOOL; - info.value.data.flag = true; - iptr = &info; - n = 1; - } else { - iptr = NULL; - n = 0; - } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; op->procs = parray; op->nprocs = cnt; - /* call the library function */ - rc = PMIx_Fence_nb(parray, cnt, iptr, n, opcbfunc, op); - if (PMIX_SUCCESS != rc) { - OBJ_RELEASE(op); + if (collect_data) { + op->ninfo = 1; + PMIX_INFO_CREATE(op->info, op->ninfo); + PMIX_INFO_LOAD(&op->info[0], PMIX_COLLECT_DATA, NULL, PMIX_BOOL); } - return pmix2x_convert_rc(rc); - + /* call the library function */ + rc = PMIx_Fence_nb(op->procs, op->nprocs, op->info, op->ninfo, opcbfunc, op); + return ext2x_convert_rc(rc); } -int pmix2x_put(opal_pmix_scope_t opal_scope, +int ext2x_put(opal_pmix_scope_t opal_scope, opal_value_t *val) { pmix_value_t kv; - pmix_scope_t pmix_scope = pmix2x_convert_opalscope(opal_scope); + pmix_scope_t pmix_scope = ext2x_convert_opalscope(opal_scope); pmix_status_t rc; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client put"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIX_VALUE_CONSTRUCT(&kv); - pmix2x_value_load(&kv, val); + ext2x_value_load(&kv, val); rc = PMIx_Put(pmix_scope, val->key, &kv); PMIX_VALUE_DESTRUCT(&kv); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_get(const opal_process_name_t *proc, const char *key, +int ext2x_get(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_value_t **val) { - int ret; - pmix_value_t *kv; pmix_status_t rc; - pmix_proc_t p, *pptr; - size_t ninfo, n; - pmix_info_t *pinfo; + pmix_proc_t p; + char *nsptr; + pmix_info_t *pinfo = NULL; + size_t sz = 0, n; opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_value_t *pval = NULL; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "%s PMIx_client get on proc %s key %s", + "%s ext2x:client get on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); - /* prep default response */ - *val = NULL; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(proc->vpid); - pptr = &p; - } else { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { /* if they are asking for our jobid, then return it */ if (0 == strcmp(key, OPAL_PMIX_JOBID)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_UINT32; (*val)->data.uint32 = OPAL_PROC_MY_NAME.jobid; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; - } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_INT; - (*val)->data.integer = pmix2x_convert_rank(my_proc.rank); + (*val)->data.integer = ext2x_convert_rank(my_proc.rank); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - pptr = NULL; } + *val = NULL; + + if (NULL == proc) { + (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + p.rank = ext2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = ext2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = ext2x_convert_opalrank(proc->vpid); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { - ninfo = opal_list_get_size(info); - if (0 < ninfo) { - PMIX_INFO_CREATE(pinfo, ninfo); + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); n=0; OPAL_LIST_FOREACH(ival, info, opal_value_t) { (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, ival); + ext2x_value_load(&pinfo[n].value, ival); ++n; } - } else { - pinfo = NULL; } - } else { - pinfo = NULL; - ninfo = 0; } - /* pass the request down */ - rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); + rc = PMIx_Get(&p, key, pinfo, sz, &pval); if (PMIX_SUCCESS == rc) { - if (NULL == kv) { - ret = OPAL_SUCCESS; - } else { - *val = OBJ_NEW(opal_value_t); - ret = pmix2x_value_unload(*val, kv); - PMIX_VALUE_FREE(kv, 1); - } - } else { - ret = pmix2x_convert_rc(rc); + ival = OBJ_NEW(opal_value_t); + ext2x_value_unload(ival, pval); + *val = ival; + PMIX_VALUE_FREE(pval, 1); } - PMIX_INFO_FREE(pinfo, ninfo); - return ret; + PMIX_INFO_FREE(pinfo, sz); + + return ext2x_convert_rc(rc); } static void val_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; int rc; opal_value_t val, *v=NULL; - rc = pmix2x_convert_opalrc(status); + OPAL_ACQUIRE_OBJECT(op); + OBJ_CONSTRUCT(&val, opal_value_t); + rc = ext2x_convert_opalrc(status); if (PMIX_SUCCESS == status && NULL != kv) { - rc = pmix2x_value_unload(&val, kv); + rc = ext2x_value_unload(&val, kv); v = &val; } if (NULL != op->valcbfunc) { op->valcbfunc(rc, v, op->cbdata); } + OBJ_DESTRUCT(&val); OBJ_RELEASE(op); } -int pmix2x_getnb(const opal_process_name_t *proc, const char *key, +int ext2x_getnb(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; + opal_value_t *val; pmix_status_t rc; + char *nsptr; size_t n; - opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s PMIx_client get_nb on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { + /* if they are asking for our jobid, then return it */ + if (0 == strcmp(key, OPAL_PMIX_JOBID)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_UINT32; + val->data.uint32 = OPAL_PROC_MY_NAME.jobid; + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_INT; + val->data.integer = ext2x_convert_rank(my_proc.rank); + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + } + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->valcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == proc) { + (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + op->p.rank = ext2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = ext2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); - op->p.rank = pmix2x_convert_opalrank(proc->vpid); - } else { - (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); + (void)strncpy(op->p.nspace, nsptr, PMIX_MAX_NSLEN); + op->p.rank = ext2x_convert_opalrank(proc->vpid); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { op->sz = opal_list_get_size(info); if (0 < op->sz) { PMIX_INFO_CREATE(op->info, op->sz); n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, ival); + OPAL_LIST_FOREACH(val, info, opal_value_t) { + (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, val); ++n; } } @@ -547,10 +631,10 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key, OBJ_RELEASE(op); } - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_publish(opal_list_t *info) +int ext2x_publish(opal_list_t *info) { pmix_info_t *pinfo; pmix_status_t ret; @@ -560,6 +644,13 @@ int pmix2x_publish(opal_list_t *info) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } @@ -570,7 +661,7 @@ int pmix2x_publish(opal_list_t *info) n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); + ext2x_value_load(&pinfo[n].value, iptr); ++n; } } else { @@ -582,26 +673,33 @@ int pmix2x_publish(opal_list_t *info) PMIX_INFO_FREE(pinfo, sz); } - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_publishnb(opal_list_t *info, +int ext2x_publishnb(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_status_t ret; opal_value_t *iptr; size_t n; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish_nb"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; @@ -611,66 +709,64 @@ int pmix2x_publishnb(opal_list_t *info, n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, iptr); + ext2x_value_load(&op->info[n].value, iptr); ++n; } } ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op); - if (0 < op->sz) { - PMIX_INFO_FREE(op->info, op->sz); - } - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_lookup(opal_list_t *data, opal_list_t *info) +int ext2x_lookup(opal_list_t *data, opal_list_t *info) { - pmix_pdata_t *pdata; - pmix_info_t *pinfo; - size_t sz, ninfo, n; - int rc; - pmix_status_t ret; opal_pmix_pdata_t *d; + pmix_pdata_t *pdata; + pmix_info_t *pinfo = NULL; + pmix_status_t rc; + size_t cnt, n, sz; opal_value_t *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_ext2x_jobid_trkr_t *jptr, *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup"); + "ext2x:client lookup"); - if (NULL == data) { - return OPAL_ERR_BAD_PARAM; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - sz = opal_list_get_size(data); - PMIX_PDATA_CREATE(pdata, sz); - n=0; + if (NULL == data || 0 == (cnt = opal_list_get_size(data))) { + return OPAL_ERR_BAD_PARAM; + } + PMIX_PDATA_CREATE(pdata, cnt); + n = 0; OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { - (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); + (void)strncpy(pdata[n].key, d->value.key, PMIX_MAX_KEYLEN); + ++n; } if (NULL != info) { - ninfo = opal_list_get_size(info); - PMIX_INFO_CREATE(pinfo, ninfo); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); - ++n; + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&pinfo[n].value, iptr); + ++n; + } } - } else { - pinfo = NULL; - ninfo = 0; } - ret = PMIx_Lookup(pdata, sz, pinfo, ninfo); - PMIX_INFO_FREE(pinfo, ninfo); - - if (PMIX_SUCCESS == ret) { - /* transfer the data back */ + rc = PMIx_Lookup(pdata, cnt, pinfo, sz); + if (PMIX_SUCCESS == rc) { + /* load the answers back into the list */ n=0; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { if (mca_pmix_ext2x_component.native_launch) { /* if we were launched by the OMPI RTE, then @@ -683,54 +779,51 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (jptr->jobid == d->proc.jobid) { job = jptr; break; } } if (NULL == job) { - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, pdata[n].proc.nspace, PMIX_MAX_NSLEN); job->jobid = d->proc.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); } - d->proc.vpid = pmix2x_convert_rank(pdata[n].proc.rank); - rc = pmix2x_value_unload(&d->value, &pdata[n].value); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - PMIX_PDATA_FREE(pdata, sz); - return OPAL_ERR_BAD_PARAM; - } - ++n; + d->proc.vpid = ext2x_convert_rank(pdata[n].proc.rank); + ext2x_value_unload(&d->value, &pdata[n].value); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - - return pmix2x_convert_rc(ret); + PMIX_PDATA_FREE(pdata, cnt); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, sz); + } + return ext2x_convert_rc(rc); } static void lk_cbfunc(pmix_status_t status, pmix_pdata_t data[], size_t ndata, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; opal_pmix_pdata_t *d; opal_list_t results, *r = NULL; int rc; size_t n; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_ext2x_jobid_trkr_t *job, *jptr; - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + OPAL_ACQUIRE_OBJECT(op); if (NULL == op->lkcbfunc) { OBJ_RELEASE(op); return; } - rc = pmix2x_convert_rc(status); + rc = ext2x_convert_rc(op->status); if (OPAL_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OBJ_CONSTRUCT(&results, opal_list_t); for (n=0; n < ndata; n++) { d = OBJ_NEW(opal_pmix_pdata_t); @@ -746,30 +839,33 @@ static void lk_cbfunc(pmix_status_t status, } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (jptr->jobid == d->proc.jobid) { job = jptr; break; } } if (NULL == job) { - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, data[n].proc.nspace, PMIX_MAX_NSLEN); job->jobid = d->proc.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); } - d->proc.vpid = pmix2x_convert_rank(data[n].proc.rank); + d->proc.vpid = ext2x_convert_rank(data[n].proc.rank); d->value.key = strdup(data[n].key); - rc = pmix2x_value_unload(&d->value, &data[n].value); + rc = ext2x_value_unload(&d->value, &data[n].value); if (OPAL_SUCCESS != rc) { rc = OPAL_ERR_BAD_PARAM; OPAL_ERROR_LOG(rc); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); goto release; } } r = &results; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } -release: + + release: /* execute the callback */ op->lkcbfunc(rc, r, op->cbdata); @@ -779,20 +875,27 @@ static void lk_cbfunc(pmix_status_t status, OBJ_RELEASE(op); } -int pmix2x_lookupnb(char **keys, opal_list_t *info, +int ext2x_lookupnb(char **keys, opal_list_t *info, opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) { pmix_status_t ret; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; opal_value_t *iptr; size_t n; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup_nb"); + "ext2x:client lookup_nb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->lkcbfunc = cbfunc; op->cbdata = cbdata; @@ -803,31 +906,37 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, iptr); + ext2x_value_load(&op->info[n].value, iptr); ++n; } } } - ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_unpublish(char **keys, opal_list_t *info) +int ext2x_unpublish(char **keys, opal_list_t *info) { pmix_status_t ret; size_t ninfo, n; pmix_info_t *pinfo; opal_value_t *iptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != info) { ninfo = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, ninfo); n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); + ext2x_value_load(&pinfo[n].value, iptr); ++n; } } else { @@ -838,19 +947,26 @@ int pmix2x_unpublish(char **keys, opal_list_t *info) ret = PMIx_Unpublish(keys, pinfo, ninfo); PMIX_INFO_FREE(pinfo, ninfo); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_unpublishnb(char **keys, opal_list_t *info, +int ext2x_unpublishnb(char **keys, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_status_t ret; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; opal_value_t *iptr; size_t n; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; @@ -861,7 +977,7 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, iptr); + ext2x_value_load(&op->info[n].value, iptr); ++n; } } @@ -869,26 +985,35 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, ret = PMIx_Unpublish_nb(keys, op->info, op->sz, opcbfunc, op); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) +int ext2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) { - pmix_status_t ret; - pmix_info_t *pinfo = NULL; + pmix_status_t rc; + pmix_info_t *info = NULL; pmix_app_t *papps; - size_t napps, n, m, ninfo = 0; - char nspace[PMIX_MAX_NSLEN+1]; - opal_value_t *info; + size_t ninfo, napps, n, m; + opal_value_t *ival; opal_pmix_app_t *app; - opal_pmix2x_jobid_trkr_t *job; + char nspace[PMIX_MAX_NSLEN+1]; + opal_ext2x_jobid_trkr_t *job; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + *jobid = OPAL_JOBID_INVALID; if (NULL != job_info && 0 < (ninfo = opal_list_get_size(job_info))) { - PMIX_INFO_CREATE(pinfo, ninfo); + PMIX_INFO_CREATE(info, ninfo); n=0; - OPAL_LIST_FOREACH(info, job_info, opal_value_t) { - (void)strncpy(pinfo[n].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, info); + OPAL_LIST_FOREACH(ival, job_info, opal_value_t) { + (void)strncpy(info[n].key, ival->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&info[n].value, ival); ++n; } } @@ -898,23 +1023,28 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { papps[n].cmd = strdup(app->cmd); - papps[n].argv = opal_argv_copy(app->argv); - papps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + papps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + papps[n].env = opal_argv_copy(app->env); + } papps[n].maxprocs = app->maxprocs; if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo); m=0; - OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { - (void)strncpy(papps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&papps[n].info[m].value, info); + OPAL_LIST_FOREACH(ival, &app->info, opal_value_t) { + (void)strncpy(papps[n].info[m].key, ival->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&papps[n].info[m].value, ival); ++m; } } ++n; } - ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); - if (PMIX_SUCCESS == ret) { + rc = PMIx_Spawn(info, ninfo, papps, napps, nspace); + if (PMIX_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_ext2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -925,33 +1055,30 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) OPAL_HASH_JOBID(nspace, *jobid); } /* add this to our jobid tracker */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = *jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - if (0 < ninfo) { - PMIX_INFO_FREE(pinfo, ninfo); - } - PMIX_APP_FREE(papps, napps); - - return pmix2x_convert_rc(ret); + return rc; } static void spcbfunc(pmix_status_t status, char *nspace, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + opal_ext2x_jobid_trkr_t *job; + opal_jobid_t jobid; int rc; - opal_jobid_t jobid=OPAL_JOBID_INVALID; - opal_pmix2x_jobid_trkr_t *job; - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + OPAL_ACQUIRE_OBJECT(op); - rc = pmix2x_convert_rc(status); + rc = ext2x_convert_rc(status); if (PMIX_SUCCESS == status) { + /* this is in the PMIx local thread - need to protect + * the framework-level data */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_ext2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -962,27 +1089,35 @@ static void spcbfunc(pmix_status_t status, OPAL_HASH_JOBID(nspace, jobid); } /* add this to our jobid tracker */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } op->spcbfunc(rc, jobid, op->cbdata); OBJ_RELEASE(op); } -int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, +int ext2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata) { pmix_status_t ret; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; size_t n, m; opal_value_t *info; opal_pmix_app_t *app; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->spcbfunc = cbfunc; op->cbdata = cbdata; @@ -991,7 +1126,7 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, n=0; OPAL_LIST_FOREACH(info, job_info, opal_value_t) { (void)strncpy(op->info[n].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, info); + ext2x_value_load(&op->info[n].value, info); ++n; } } @@ -1001,15 +1136,19 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { op->apps[n].cmd = strdup(app->cmd); - op->apps[n].argv = opal_argv_copy(app->argv); - op->apps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + op->apps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + op->apps[n].env = opal_argv_copy(app->env); + } op->apps[n].maxprocs = app->maxprocs; if (0 < (op->apps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(op->apps[n].info, op->apps[n].ninfo); m=0; OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { (void)strncpy(op->apps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->apps[n].info[m].value, info); + ext2x_value_load(&op->apps[n].info[m].value, info); ++m; } } @@ -1018,215 +1157,236 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, ret = PMIx_Spawn_nb(op->info, op->ninfo, op->apps, op->sz, spcbfunc, op); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_connect(opal_list_t *procs) +int ext2x_connect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "ext2x:client connect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - PMIX_PROC_FREE(parray, cnt); + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Connect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Connect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_connectnb(opal_list_t *procs, +int ext2x_connectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; + ext2x_opcaddy_t *op; opal_namelist_t *ptr; - pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "ext2x:client connect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); + op->procs[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Connect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - - return pmix2x_convert_rc(ret); + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } + return ext2x_convert_rc(ret); } -int pmix2x_disconnect(opal_list_t *procs) +int ext2x_disconnect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "ext2x:client disconnect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Disconnect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Disconnect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_disconnectnb(opal_list_t *procs, +int ext2x_disconnectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; + ext2x_opcaddy_t *op; opal_namelist_t *ptr; - pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "ext2x:client disconnect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); + op->procs[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Disconnect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - - return pmix2x_convert_rc(ret); + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } + return ext2x_convert_rc(ret); } - -int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, +int ext2x_resolve_peers(const char *nodename, + opal_jobid_t jobid, opal_list_t *procs) { + pmix_status_t ret; char *nspace; pmix_proc_t *array=NULL; size_t nprocs, n; opal_namelist_t *nm; - int rc; - pmix_status_t ret; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_ext2x_jobid_trkr_t *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD == jobid) { - nspace = NULL; - } else { - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (OPAL_JOBID_WILDCARD != jobid) { + if (NULL == (nspace = ext2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - nspace = job->nspace; + } else { + nspace = NULL; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs); - rc = pmix2x_convert_rc(ret); if (NULL != array && 0 < nprocs) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); opal_list_append(procs, &nm->super); @@ -1240,53 +1400,38 @@ int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, OPAL_HASH_JOBID(array[n].nspace, nm->name.jobid); } /* if we don't already have it, add this to our jobid tracker */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == nm->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); - (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = jobid; + if (NULL == ext2x_convert_jobid(nm->name.jobid)) { + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); + (void)strncpy(job->nspace, array[n].nspace, PMIX_MAX_NSLEN); + job->jobid = nm->name.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); } - nm->name.vpid = pmix2x_convert_rank(array[n].rank); + nm->name.vpid = ext2x_convert_rank(array[n].rank); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } PMIX_PROC_FREE(array, nprocs); - - return rc; + return ext2x_convert_rc(ret); } -int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist) +int ext2x_resolve_nodes(opal_jobid_t jobid, char **nodelist) { pmix_status_t ret; - char *nspace=NULL; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD != jobid) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - nspace = job->nspace; + if (NULL == (nsptr = ext2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Resolve_nodes(nspace, nodelist); + ret = PMIx_Resolve_nodes(nsptr, nodelist); - return pmix2x_convert_rc(ret);; + return ext2x_convert_rc(ret); } diff --git a/opal/mca/pmix/ext2x/pmix2x_component.c b/opal/mca/pmix/ext2x/pmix2x_component.c index fb1af6a74a6..3860ab677be 100644 --- a/opal/mca/pmix/ext2x/pmix2x_component.c +++ b/opal/mca/pmix/ext2x/pmix2x_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -36,7 +36,6 @@ const char *opal_pmix_ext2x_component_version_string = static int external_open(void); static int external_close(void); static int external_component_query(mca_base_module_t **module, int *priority); -static int external_register(void); /* @@ -66,7 +65,6 @@ mca_pmix_ext2x_component_t mca_pmix_ext2x_component = { .mca_open_component = external_open, .mca_close_component = external_close, .mca_query_component = external_component_query, - .mca_register_component_params = external_register, }, /* Next the MCA v1.0.0 component meta data */ .base_data = { @@ -77,27 +75,12 @@ mca_pmix_ext2x_component_t mca_pmix_ext2x_component = { .native_launch = false }; -static int external_register(void) -{ - mca_pmix_ext2x_component.cache_size = 256; - mca_base_component_var_register(&mca_pmix_ext2x_component.super.base_version, - "cache_size", "Size of the ring buffer cache for events", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_CONSTANT, - &mca_pmix_ext2x_component.cache_size); - - return OPAL_SUCCESS; -} - - static int external_open(void) { mca_pmix_ext2x_component.evindex = 0; OBJ_CONSTRUCT(&mca_pmix_ext2x_component.jobids, opal_list_t); - OBJ_CONSTRUCT(&mca_pmix_ext2x_component.single_events, opal_list_t); - OBJ_CONSTRUCT(&mca_pmix_ext2x_component.multi_events, opal_list_t); - OBJ_CONSTRUCT(&mca_pmix_ext2x_component.default_events, opal_list_t); - OBJ_CONSTRUCT(&mca_pmix_ext2x_component.cache, opal_list_t); + OBJ_CONSTRUCT(&mca_pmix_ext2x_component.events, opal_list_t); + OBJ_CONSTRUCT(&mca_pmix_ext2x_component.dmdx, opal_list_t); return OPAL_SUCCESS; } @@ -105,10 +88,8 @@ static int external_open(void) static int external_close(void) { OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.jobids); - OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.single_events); - OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.multi_events); - OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.default_events); - OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.cache); + OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.events); + OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.dmdx); return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/ext2x/pmix2x_server_north.c b/opal/mca/pmix/ext2x/pmix2x_server_north.c index df23ab27203..f98275f6be8 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_north.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_north.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -29,6 +29,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -45,63 +46,73 @@ /* These are the interfaces used by the embedded PMIx server * to call up into ORTE for service requests */ - static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, - int status, const char msg[], - pmix_proc_t procs[], size_t nprocs, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - char *data, size_t ndata, - pmix_modex_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, - const pmix_info_t info[], size_t ninfo, - pmix_modex_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_publish_fn(const pmix_proc_t *proc, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, +static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, - pmix_lookup_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, - const pmix_info_t job_info[], size_t ninfo, - const pmix_app_t apps[], size_t napps, - pmix_spawn_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_notify_event(pmix_status_t code, - const pmix_proc_t *source, - pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_query(pmix_proc_t *proct, - pmix_query_t *queryies, size_t nqueries, - pmix_info_cbfunc_t cbfunc, +static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_notify_event(pmix_status_t code, + const pmix_proc_t *source, + pmix_data_range_t range, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_query(pmix_proc_t *proct, + pmix_query_t *queryies, size_t nqueries, + pmix_info_cbfunc_t cbfunc, + void *cbdata); +static void server_tool_connection(pmix_info_t *info, size_t ninfo, + pmix_tool_connection_cbfunc_t cbfunc, void *cbdata); - static void server_tool_connection(pmix_info_t *info, size_t ninfo, - pmix_tool_connection_cbfunc_t cbfunc, - void *cbdata); static void server_log(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata); - pmix_server_module_t mymodule = { +static pmix_status_t server_allocate(const pmix_proc_t *client, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +static pmix_status_t server_job_control(const pmix_proc_t *requestor, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +pmix_server_module_t mymodule = { .client_connected = server_client_connected_fn, .client_finalized = server_client_finalized_fn, .abort = server_abort_fn, @@ -118,7 +129,11 @@ static void server_log(const pmix_proc_t *client, .notify_event = server_notify_event, .query = server_query, .tool_connected = server_tool_connection, - .log = server_log + .log = server_log, + .allocate = server_allocate, + .job_control = server_job_control + /* we do not support monitoring, but use the + * PMIx internal monitoring capability */ }; opal_pmix_server_module_t *host_module = NULL; @@ -126,10 +141,11 @@ opal_pmix_server_module_t *host_module = NULL; static void opal_opcbfunc(int status, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(opalcaddy); if (NULL != opalcaddy->opcbfunc) { - opalcaddy->opcbfunc(pmix2x_convert_opalrc(status), opalcaddy->cbdata); + opalcaddy->opcbfunc(ext2x_convert_opalrc(status), opalcaddy->cbdata); } OBJ_RELEASE(opalcaddy); } @@ -139,33 +155,33 @@ static pmix_status_t server_client_connected_fn(const pmix_proc_t *p, void *serv { int rc; opal_process_name_t proc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; if (NULL == host_module || NULL == host_module->client_connected) { return PMIX_SUCCESS; } - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); /* pass it up */ rc = host_module->client_connected(&proc, server_object, opal_opcbfunc, opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; if (NULL == host_module || NULL == host_module->client_finalized) { @@ -174,21 +190,25 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* pass it up */ + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s FINALIZED", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, @@ -200,7 +220,7 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, opal_namelist_t *nm; opal_process_name_t proc; int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; if (NULL == host_module || NULL == host_module->abort) { return PMIX_ERR_NOT_SUPPORTED; @@ -208,12 +228,17 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED ABORT", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -223,9 +248,9 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(procs[n].rank); + nm->name.vpid = ext2x_convert_rank(procs[n].rank); } /* pass it up */ @@ -234,12 +259,12 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void _data_release(void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; if (NULL != opalcaddy->odmdxfunc) { opalcaddy->odmdxfunc(opalcaddy->ocbdata); @@ -251,14 +276,24 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata opal_pmix_release_cbfunc_t relcbfunc, void *relcbdata) { pmix_status_t rc; - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; + opal_ext2x_dmx_trkr_t *dmdx; - rc = pmix2x_convert_rc(status); + rc = ext2x_convert_rc(status); if (NULL != opalcaddy->mdxcbfunc) { opalcaddy->odmdxfunc = relcbfunc; opalcaddy->ocbdata = relcbdata; opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata, _data_release, opalcaddy); + /* if we were collecting all data, then check for any pending + * dmodx requests that we cached and notify them that the + * data has arrived */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + while (NULL != (dmdx = (opal_ext2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_ext2x_component.dmdx))) { + dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL); + OBJ_RELEASE(dmdx); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } else { OBJ_RELEASE(opalcaddy); } @@ -269,18 +304,20 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, char *data, size_t ndata, pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; size_t n; opal_namelist_t *nm; opal_value_t *iptr; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s FENCE CALLED", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL == host_module || NULL == host_module->fence_nb) { return PMIX_ERR_NOT_SUPPORTED; } - /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -290,9 +327,9 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(procs[n].rank); + nm->name.vpid = ext2x_convert_rank(procs[n].rank); } /* convert the array of pmix_info_t to the list of info */ @@ -300,9 +337,9 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, iptr = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -312,7 +349,7 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, @@ -320,10 +357,11 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, pmix_modex_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_value_t *iptr; size_t n; + opal_ext2x_dmx_trkr_t *dmdx; if (NULL == host_module || NULL == host_module->direct_modex) { return PMIX_ERR_NOT_SUPPORTED; @@ -331,23 +369,45 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED DMODX", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; opalcaddy->cbdata = cbdata; + /* this function should only get called if we are in an async modex. + * If we are also collecting data, then the fence_nb will eventually + * complete and return all the required data down to the pmix + * server beneath us. Thus, we only need to track the dmodex_req + * and ensure that the release gets called once the data has + * arrived - this will trigger the pmix server to tell the + * client that the data is available */ + if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + dmdx = OBJ_NEW(opal_ext2x_dmx_trkr_t); + dmdx->cbfunc = cbfunc; + dmdx->cbdata = cbdata; + opal_list_append(&mca_pmix_ext2x_component.dmdx, &dmdx->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return PMIX_SUCCESS; + } + /* convert the array of pmix_info_t to the list of info */ for (n=0; n < ninfo; n++) { iptr = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -359,7 +419,7 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, if (OPAL_ERR_IN_PROCESS == rc) { rc = OPAL_SUCCESS; } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_publish_fn(const pmix_proc_t *p, @@ -368,7 +428,7 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, { int rc; size_t n; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_value_t *oinfo; @@ -378,12 +438,17 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED PUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -392,9 +457,9 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -404,21 +469,21 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void opal_lkupcbfunc(int status, opal_list_t *data, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; pmix_status_t rc; pmix_pdata_t *d=NULL; size_t nd=0, n; opal_pmix_pdata_t *p; if (NULL != opalcaddy->lkupcbfunc) { - rc = pmix2x_convert_opalrc(status); + rc = ext2x_convert_opalrc(status); /* convert any returned data */ if (NULL != data) { nd = opal_list_get_size(data); @@ -427,9 +492,9 @@ static void opal_lkupcbfunc(int status, OPAL_LIST_FOREACH(p, data, opal_pmix_pdata_t) { /* convert the jobid */ (void)opal_snprintf_jobid(d[n].proc.nspace, PMIX_MAX_NSLEN, p->proc.jobid); - d[n].proc.rank = pmix2x_convert_opalrank(p->proc.vpid); + d[n].proc.rank = ext2x_convert_opalrank(p->proc.vpid); (void)strncpy(d[n].key, p->value.key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&d[n].value, &p->value); + ext2x_value_load(&d[n].value, &p->value); } } opalcaddy->lkupcbfunc(rc, d, nd, opalcaddy->cbdata); @@ -443,7 +508,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, pmix_lookup_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_value_t *iptr; size_t n; @@ -454,12 +519,17 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED LOOKUP", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->lkupcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -468,9 +538,9 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, iptr = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -480,7 +550,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } @@ -489,7 +559,7 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_value_t *iptr; size_t n; @@ -500,12 +570,17 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED UNPUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -514,9 +589,9 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, iptr = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -526,17 +601,17 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void opal_spncbfunc(int status, opal_jobid_t jobid, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; pmix_status_t rc; char nspace[PMIX_MAX_NSLEN]; if (NULL != opalcaddy->spwncbfunc) { - rc = pmix2x_convert_opalrc(status); + rc = ext2x_convert_opalrc(status); /* convert the jobid */ (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); opalcaddy->spwncbfunc(rc, nspace, opalcaddy->cbdata); @@ -549,7 +624,7 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, const pmix_app_t apps[], size_t napps, pmix_spawn_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_pmix_app_t *app; opal_value_t *oinfo; @@ -562,12 +637,12 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->spwncbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -576,9 +651,9 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(job_info[k].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &job_info[k].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &job_info[k].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -600,9 +675,9 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&app->info, &oinfo->super); oinfo->key = strdup(apps[n].info[k].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &apps[n].info[k].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &apps[n].info[k].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } } @@ -614,7 +689,7 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } @@ -623,7 +698,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_namelist_t *nm; size_t n; opal_value_t *oinfo; @@ -633,7 +708,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -643,9 +718,9 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(procs[n].rank); + nm->name.vpid = ext2x_convert_rank(procs[n].rank); } /* convert the info */ @@ -653,9 +728,9 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -665,7 +740,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } @@ -674,7 +749,7 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_namelist_t *nm; size_t n; opal_value_t *oinfo; @@ -684,7 +759,7 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -694,9 +769,9 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(procs[n].rank); + nm->name.vpid = ext2x_convert_rank(procs[n].rank); } /* convert the info */ @@ -704,9 +779,9 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -716,20 +791,24 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; size_t n; opal_value_t *oinfo; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s REGISTER EVENTS", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -738,9 +817,9 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -750,12 +829,15 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, pmix_op_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s DEREGISTER EVENTS", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + return PMIX_ERR_NOT_SUPPORTED; } @@ -765,7 +847,7 @@ static pmix_status_t server_notify_event(pmix_status_t code, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t src; size_t n; opal_value_t *oinfo; @@ -776,19 +858,24 @@ static pmix_status_t server_notify_event(pmix_status_t code, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the code */ - status = pmix2x_convert_rc(code); + status = ext2x_convert_rc(code); /* convert the source */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&src.jobid, source->nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - src.vpid = pmix2x_convert_rank(source->rank); + src.vpid = ext2x_convert_rank(source->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED NOTIFY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(src)); /* ignore the range for now */ @@ -797,9 +884,9 @@ static pmix_status_t server_notify_event(pmix_status_t code, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -808,12 +895,12 @@ static pmix_status_t server_notify_event(pmix_status_t code, opal_opcbfunc, opalcaddy))) { OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void _info_rel(void *cbdata) { - pmix2x_opcaddy_t *pcaddy = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *pcaddy = (ext2x_opcaddy_t*)cbdata; OBJ_RELEASE(pcaddy); } @@ -823,15 +910,15 @@ static void info_cbfunc(int status, opal_pmix_release_cbfunc_t release_fn, void *release_cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; - pmix2x_opcaddy_t *pcaddy; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; + ext2x_opcaddy_t *pcaddy; opal_value_t *kv; size_t n; - pcaddy = OBJ_NEW(pmix2x_opcaddy_t); + pcaddy = OBJ_NEW(ext2x_opcaddy_t); /* convert the status */ - pcaddy->status = pmix2x_convert_opalrc(status); + pcaddy->status = ext2x_convert_opalrc(status); /* convert the list to a pmix_info_t array */ if (NULL != info) { @@ -841,7 +928,7 @@ static void info_cbfunc(int status, n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pcaddy->info[n].value, kv); + ext2x_value_load(&pcaddy->info[n].value, kv); } } } @@ -863,7 +950,7 @@ static pmix_status_t server_query(pmix_proc_t *proct, pmix_info_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t requestor; int rc; size_t n, m; @@ -875,16 +962,21 @@ static pmix_status_t server_query(pmix_proc_t *proct, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->infocbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the requestor */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - requestor.vpid = pmix2x_convert_rank(proct->rank); + requestor.vpid = ext2x_convert_rank(proct->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED QUERY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(requestor)); /* convert the queries */ for (n=0; n < nqueries; n++) { @@ -897,9 +989,9 @@ static pmix_status_t server_query(pmix_proc_t *proct, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&q->qualifiers, &oinfo->super); oinfo->key = strdup(queries[n].qualifiers[m].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &queries[n].qualifiers[m].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &queries[n].qualifiers[m].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } } @@ -911,28 +1003,28 @@ static pmix_status_t server_query(pmix_proc_t *proct, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void toolcbfunc(int status, opal_process_name_t proc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; pmix_status_t rc; pmix_proc_t p; - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_jobid_trkr_t *job; /* convert the status */ - rc = pmix2x_convert_opalrc(status); + rc = ext2x_convert_opalrc(status); memset(&p, 0, sizeof(pmix_proc_t)); if (OPAL_SUCCESS == status) { /* convert the process name */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc.jobid); - p.rank = pmix2x_convert_opalrank(proc.vpid); + p.rank = ext2x_convert_opalrank(proc.vpid); /* store this job in our list of known nspaces */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, p.nspace, PMIX_MAX_NSLEN); job->jobid = proc.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); @@ -949,14 +1041,14 @@ static void server_tool_connection(pmix_info_t *info, size_t ninfo, pmix_tool_connection_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; size_t n; opal_value_t *oinfo; int rc; pmix_status_t err; /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->toolcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -965,9 +1057,9 @@ static void server_tool_connection(pmix_info_t *info, size_t ninfo, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - err = pmix2x_convert_opalrc(rc); + err = ext2x_convert_opalrc(rc); if (NULL != cbfunc) { cbfunc(err, NULL, cbdata); } @@ -983,7 +1075,7 @@ static void server_log(const pmix_proc_t *proct, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t requestor; int rc; size_t n; @@ -998,30 +1090,31 @@ static void server_log(const pmix_proc_t *proct, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the requestor */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { OBJ_RELEASE(opalcaddy); - ret = pmix2x_convert_opalrc(rc); + ret = ext2x_convert_opalrc(rc); if (NULL != cbfunc) { cbfunc(ret, cbdata); } return; } - requestor.vpid = pmix2x_convert_rank(proct->rank); + requestor.vpid = ext2x_convert_rank(proct->rank); /* convert the data */ for (n=0; n < ndata; n++) { oinfo = OBJ_NEW(opal_value_t); + oinfo->key = strdup(data[n].key); /* we "borrow" the info field of the caddy as we and the * server function both agree on what will be there */ opal_list_append(&opalcaddy->info, &oinfo->super); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &data[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &data[n].value))) { OBJ_RELEASE(opalcaddy); - ret = pmix2x_convert_opalrc(rc); + ret = ext2x_convert_opalrc(rc); if (NULL != cbfunc) { cbfunc(ret, cbdata); } @@ -1035,9 +1128,9 @@ static void server_log(const pmix_proc_t *proct, /* we "borrow" the apps field of the caddy as we and the * server function both agree on what will be there */ opal_list_append(&opalcaddy->apps, &oinfo->super); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &directives[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &directives[n].value))) { OBJ_RELEASE(opalcaddy); - ret = pmix2x_convert_opalrc(rc); + ret = ext2x_convert_opalrc(rc); if (NULL != cbfunc) { cbfunc(ret, cbdata); } @@ -1051,3 +1144,117 @@ static void server_log(const pmix_proc_t *proct, &opalcaddy->apps, opal_opcbfunc, opalcaddy); } + +static pmix_status_t server_allocate(const pmix_proc_t *proct, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + ext2x_opalcaddy_t *opalcaddy; + opal_process_name_t requestor; + int rc; + size_t n; + opal_value_t *oinfo; + opal_pmix_alloc_directive_t odir; + + if (NULL == host_module || NULL == host_module->allocate) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); + opalcaddy->infocbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the requestor */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { + OBJ_RELEASE(opalcaddy); + return ext2x_convert_opalrc(rc); + } + requestor.vpid = ext2x_convert_rank(proct->rank); + + /* convert the directive */ + odir = ext2x_convert_allocdir(directive); + + /* convert the data */ + for (n=0; n < ndata; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &data[n].value))) { + OBJ_RELEASE(opalcaddy); + return ext2x_convert_opalrc(rc); + } + } + + /* pass the call upwards */ + if (OPAL_SUCCESS != (rc = host_module->allocate(&requestor, odir, + &opalcaddy->info, + info_cbfunc, opalcaddy))) { + OBJ_RELEASE(opalcaddy); + return ext2x_convert_opalrc(rc); + } + + return PMIX_SUCCESS; + +} + +static pmix_status_t server_job_control(const pmix_proc_t *proct, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + ext2x_opalcaddy_t *opalcaddy; + opal_process_name_t requestor; + int rc; + size_t n; + opal_value_t *oinfo; + opal_namelist_t *nm; + + if (NULL == host_module || NULL == host_module->job_control) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); + opalcaddy->infocbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the requestor */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { + OBJ_RELEASE(opalcaddy); + return ext2x_convert_opalrc(rc); + } + requestor.vpid = ext2x_convert_rank(proct->rank); + + /* convert the targets */ + for (n=0; n < ntargets; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, targets[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return ext2x_convert_opalrc(rc); + } + nm->name.vpid = ext2x_convert_rank(targets[n].rank); + } + + /* convert the directives */ + for (n=0; n < ndirs; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &directives[n].value))) { + OBJ_RELEASE(opalcaddy); + return ext2x_convert_opalrc(rc); + } + } + + /* pass the call upwards */ + if (OPAL_SUCCESS != (rc = host_module->job_control(&requestor, + &opalcaddy->procs, + &opalcaddy->info, + info_cbfunc, opalcaddy))) { + OBJ_RELEASE(opalcaddy); + return ext2x_convert_opalrc(rc); + } + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/ext2x/pmix2x_server_south.c b/opal/mca/pmix/ext2x/pmix2x_server_south.c index 187fb81394a..dfa99695bf9 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_south.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_south.c @@ -1,12 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +32,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -49,51 +52,43 @@ extern pmix_server_module_t mymodule; extern opal_pmix_server_module_t *host_module; static char *dbgvalue=NULL; -static size_t errhdler_ref = 0; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - } while (0) static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_ext2x_event_t *ev = (opal_ext2x_event_t*)cbdata; - errhdler_ref = errhandler_ref; + OPAL_ACQUIRE_OBJECT(ev); + ev->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); - *active = false; + OPAL_POST_OBJECT(ev); + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } static void opcbfunc(pmix_status_t status, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(op); if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); - } - if (op->active) { - op->status = status; - op->active = false; - } else { - OBJ_RELEASE(op); + op->opcbfunc(ext2x_convert_rc(status), op->cbdata); } + OBJ_RELEASE(op); } -static void op2cbfunc(pmix_status_t status, void *cbdata) +static void lkcbfunc(pmix_status_t status, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata; - *active = false; + OPAL_POST_OBJECT(lk); + OPAL_PMIX_WAKEUP_THREAD(lk); } -int pmix2x_server_init(opal_pmix_server_module_t *module, +int ext2x_server_init(opal_pmix_server_module_t *module, opal_list_t *info) { pmix_status_t rc; @@ -101,13 +96,19 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, opal_value_t *kv; pmix_info_t *pinfo; size_t sz, n; - volatile bool active; - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_event_t *event; + opal_ext2x_jobid_trkr_t *job; + opal_pmix_lock_t lk; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + if (0 == opal_pmix_base.initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } } + ++opal_pmix_base.initialized; /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -116,7 +117,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, kv); + ext2x_value_load(&pinfo[n].value, kv); ++n; } } else { @@ -126,14 +127,15 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, /* insert ourselves into our list of jobids - it will be the * first, and so we'll check it first */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)opal_snprintf_jobid(job->nspace, PMIX_MAX_NSLEN, OPAL_PROC_MY_NAME.jobid); job->jobid = OPAL_PROC_MY_NAME.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { PMIX_INFO_FREE(pinfo, sz); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } PMIX_INFO_FREE(pinfo, sz); @@ -141,86 +143,127 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, host_module = module; /* register the default event handler */ - active = true; - PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + event = OBJ_NEW(opal_ext2x_event_t); + opal_list_append(&mca_pmix_ext2x_component.events, &event->super); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-SERVER-DEFAULT", PMIX_STRING); + PMIx_Register_event_handler(NULL, 0, pinfo, 1, ext2x_event_hdlr, errreg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + PMIX_INFO_FREE(pinfo, 1); /* as we might want to use some client-side functions, be sure * to register our own nspace */ - active = true; - PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + OPAL_PMIX_CONSTRUCT_LOCK(&lk); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL); + PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, lkcbfunc, (void*)&lk); + OPAL_PMIX_WAIT_THREAD(&lk); + OPAL_PMIX_DESTRUCT_LOCK(&lk); + PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } -static void fincb(pmix_status_t status, void *cbdata) +static void dereg_cbfunc(pmix_status_t st, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; - *active = false; + opal_ext2x_event_t *ev = (opal_ext2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } -int pmix2x_server_finalize(void) +int ext2x_server_finalize(void) { pmix_status_t rc; - volatile bool active; - - /* deregister the default event handler */ - active = true; - PMIx_Deregister_event_handler(errhdler_ref, fincb, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + opal_ext2x_event_t *event, *ev2; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_ext2x_component.events, opal_ext2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); + OBJ_RELEASE(event); + } + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_server_finalize(); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_server_gen_regex(const char *input, char **regex) +int ext2x_server_gen_regex(const char *input, char **regex) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_regex(input, regex); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_server_gen_ppn(const char *input, char **ppn) +int ext2x_server_gen_ppn(const char *input, char **ppn) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_ppn(input, ppn); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -static void _reg_nspace(int sd, short args, void *cbdata) +int ext2x_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_value_t *kv, *k2; pmix_info_t *pinfo = NULL, *pmap; size_t sz, szmap, m, n; char nspace[PMIX_MAX_NSLEN]; pmix_status_t rc; opal_list_t *pmapinfo; - opal_pmix2x_jobid_trkr_t *job; - pmix2x_opcaddy_t op; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_ext2x_jobid_trkr_t *job; + opal_pmix_lock_t lock; + int ret; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the jobid */ - (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, cd->jobid); + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); /* store this job in our list of known nspaces */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = cd->jobid; + job->jobid = jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ - if (NULL != cd->info) { - sz = opal_list_get_size(cd->info); + if (NULL != info) { + sz = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, sz); n = 0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) { pinfo[n].value.type = PMIX_DATA_ARRAY; @@ -236,12 +279,12 @@ static void _reg_nspace(int sd, short args, void *cbdata) m = 0; OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pmap[m].value, k2); + ext2x_value_load(&pmap[m].value, k2); ++m; } OPAL_LIST_RELEASE(pmapinfo); } else { - pmix2x_value_load(&pinfo[n].value, kv); + ext2x_value_load(&pinfo[n].value, kv); } ++n; } @@ -250,114 +293,67 @@ static void _reg_nspace(int sd, short args, void *cbdata) pinfo = NULL; } - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; - rc = PMIx_server_register_nspace(nspace, cd->status, pinfo, sz, - opcbfunc, (void*)&op); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz, + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - } else { - op.status = rc; - } - /* ensure we execute the cbfunc so the caller doesn't hang */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(op.status), cd->cbdata); + OPAL_PMIX_WAIT_THREAD(&lock); } + OPAL_PMIX_DESTRUCT_LOCK(&lock); + if (NULL != pinfo) { PMIX_INFO_FREE(pinfo, sz); } - OBJ_DESTRUCT(&op); - OBJ_RELEASE(cd); -} - -int pmix2x_server_register_nspace(opal_jobid_t jobid, - int nlocalprocs, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->status = nlocalprocs; - cd->info = info; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - /* if the cbfunc is NULL, then the caller is in an event - * and we can directly call the processing function */ - if (NULL == cbfunc) { - _reg_nspace(0, 0, cd); - } else { - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _reg_nspace, cd); - event_active(&cd->ev, EV_WRITE, 1); - } - - return OPAL_SUCCESS; -} -static void tdcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + ret = ext2x_convert_rc(rc); - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(status), cd->cbdata); - } - if (cd->active) { - cd->active = false; - } else { - OBJ_RELEASE(cd); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(ret, cbdata); } + return ret; } -static void _dereg_nspace(int sd, short args, void *cbdata) +void ext2x_server_deregister_nspace(opal_jobid_t jobid, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_jobid_trkr_t *jptr; + opal_ext2x_jobid_trkr_t *jptr; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } /* if we don't already have it, we can ignore this */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->jobid) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { + if (jptr->jobid == jobid) { /* found it - tell the server to deregister */ - cd->active = true; - PMIx_server_deregister_nspace(jptr->nspace, tdcbfunc, cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); - OBJ_RELEASE(cd); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_nspace(jptr->nspace, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); /* now get rid of it from our list */ opal_list_remove_item(&mca_pmix_ext2x_component.jobids, &jptr->super); OBJ_RELEASE(jptr); - return; + break; } } - /* must release the caller */ - tdcbfunc(PMIX_ERR_NOT_FOUND, cd); -} -void pmix2x_server_deregister_nspace(opal_jobid_t jobid, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_nspace(0, 0, cd); - } else { - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_nspace, cd); - event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } -int pmix2x_server_register_client(const opal_process_name_t *proc, +int ext2x_server_register_client(const opal_process_name_t *proc, uid_t uid, gid_t gid, void *server_object, opal_pmix_op_cbfunc_t cbfunc, @@ -365,80 +361,86 @@ int pmix2x_server_register_client(const opal_process_name_t *proc, { pmix_status_t rc; pmix_proc_t p; - pmix2x_opcaddy_t op; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); - p.rank = pmix2x_convert_opalrank(proc->vpid); + p.rank = ext2x_convert_opalrank(proc->vpid); - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; + OPAL_PMIX_CONSTRUCT_LOCK(&lock); rc = PMIx_server_register_client(&p, uid, gid, server_object, - opcbfunc, (void*)&op); + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - rc = op.status; + OPAL_PMIX_WAIT_THREAD(&lock); } - OBJ_DESTRUCT(&op); - return pmix2x_convert_rc(rc); + OPAL_PMIX_DESTRUCT_LOCK(&lock); + return ext2x_convert_rc(rc); } -static void _dereg_client(int sd, short args, void *cbdata) +/* tell the local PMIx server to cleanup this client as it is + * done executing */ +void ext2x_server_deregister_client(const opal_process_name_t *proc, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_jobid_trkr_t *jptr; + opal_ext2x_jobid_trkr_t *jptr; pmix_proc_t p; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } /* if we don't already have it, we can ignore this */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { /* found it - tell the server to deregister */ (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); - cd->active = true; - PMIx_server_deregister_client(&p, tdcbfunc, (void*)cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); + p.rank = ext2x_convert_opalrank(proc->vpid); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_client(&p, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); break; } } - OBJ_RELEASE(cd); -} - -/* tell the local PMIx server to cleanup this client as it is - * done executing */ -void pmix2x_server_deregister_client(const opal_process_name_t *proc, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->source = proc; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_client(0, 0, cd); - } else { - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_client, cd); - event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } /* have the local PMIx server setup the environment for this client */ -int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env) +int ext2x_server_setup_fork(const opal_process_name_t *proc, char ***env) { pmix_status_t rc; pmix_proc_t p; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); - p.rank = pmix2x_convert_opalrank(proc->vpid); + p.rank = ext2x_convert_opalrank(proc->vpid); rc = PMIx_server_setup_fork(&p, env); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } /* this is the call back up from the embedded PMIx server that @@ -447,9 +449,9 @@ int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env) static void dmdx_response(pmix_status_t status, char *data, size_t sz, void *cbdata) { int rc; - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; - rc = pmix2x_convert_rc(status); + rc = ext2x_convert_rc(status); if (NULL != op->mdxcbfunc) { op->mdxcbfunc(rc, data, sz, op->cbdata, NULL, NULL); } @@ -457,31 +459,38 @@ static void dmdx_response(pmix_status_t status, char *data, size_t sz, void *cbd } /* request modex data for a local proc from the PMIx server */ -int pmix2x_server_dmodex(const opal_process_name_t *proc, +int ext2x_server_dmodex(const opal_process_name_t *proc, opal_pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* setup the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->mdxcbfunc = cbfunc; op->cbdata = cbdata; /* convert the jobid */ (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); - op->p.rank = pmix2x_convert_opalrank(proc->vpid); + op->p.rank = ext2x_convert_opalrank(proc->vpid); /* find the internally-cached data for this proc */ rc = PMIx_server_dmodex_request(&op->p, dmdx_response, op); if (PMIX_SUCCESS != rc) { OBJ_RELEASE(op); } - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } /* tell the PMIx server to notify its local clients of an event */ -int pmix2x_server_notify_event(int status, +int ext2x_server_notify_event(int status, const opal_process_name_t *source, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) @@ -490,7 +499,14 @@ int pmix2x_server_notify_event(int status, pmix_info_t *pinfo; size_t sz, n; pmix_status_t rc; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -499,7 +515,7 @@ int pmix2x_server_notify_event(int status, n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, kv); + ext2x_value_load(&pinfo[n].value, kv); ++n; } } else { @@ -507,7 +523,7 @@ int pmix2x_server_notify_event(int status, pinfo = NULL; } /* setup the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->info = pinfo; op->sz = sz; op->opcbfunc = cbfunc; @@ -515,14 +531,14 @@ int pmix2x_server_notify_event(int status, /* convert the jobid */ if (NULL == source) { (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, OPAL_JOBID_INVALID); - op->p.rank = pmix2x_convert_opalrank(OPAL_VPID_INVALID); + op->p.rank = ext2x_convert_opalrank(OPAL_VPID_INVALID); } else { (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, source->jobid); - op->p.rank = pmix2x_convert_opalrank(source->vpid); + op->p.rank = ext2x_convert_opalrank(source->vpid); } - rc = pmix2x_convert_opalrc(status); + rc = ext2x_convert_opalrc(status); /* the range is irrelevant here as the server is passing * the event down to its local clients */ rc = PMIx_Notify_event(rc, &op->p, PMIX_RANGE_LOCAL, @@ -530,5 +546,5 @@ int pmix2x_server_notify_event(int status, if (PMIX_SUCCESS != rc) { OBJ_RELEASE(op); } - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } diff --git a/opal/mca/pmix/flux/pmix_flux.c b/opal/mca/pmix/flux/pmix_flux.c index a110962bf7c..187108bcc7d 100644 --- a/opal/mca/pmix/flux/pmix_flux.c +++ b/opal/mca/pmix/flux/pmix_flux.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -35,7 +35,7 @@ #include "opal/mca/pmix/base/pmix_base_hash.h" #include "pmix_flux.h" -static int flux_init(void); +static int flux_init(opal_list_t *ilist); static int flux_fini(void); static int flux_initialized(void); static int flux_abort(int flag, const char msg[], @@ -359,7 +359,7 @@ static int cache_put_string (opal_process_name_t *id, return ret; } -static int flux_init(void) +static int flux_init(opal_list_t *ilist) { int initialized; int spawned; @@ -372,6 +372,10 @@ static int flux_init(void) opal_process_name_t wildcard_rank; char *str; + if (0 < pmix_init_count) { + return OPAL_SUCCESS; + } + if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { OPAL_PMI_ERROR(rc, "PMI_Initialized"); return OPAL_ERROR; diff --git a/opal/mca/pmix/isolated/pmix_isolated.c b/opal/mca/pmix/isolated/pmix_isolated.c index 08860ef895f..2680496bc38 100644 --- a/opal/mca/pmix/isolated/pmix_isolated.c +++ b/opal/mca/pmix/isolated/pmix_isolated.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All * rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -38,47 +38,47 @@ #include "opal/mca/pmix/base/pmix_base_hash.h" -static int isolated_init(void); +static int isolated_init(opal_list_t *ilist); static int isolated_fini(void); static int isolated_initialized(void); static int isolated_abort(int flat, const char *msg, - opal_list_t *procs); + opal_list_t *procs); static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid); static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, - opal_pmix_spawn_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata); static int isolated_job_connect(opal_list_t *procs); static int isolated_job_disconnect(opal_list_t *procs); static int isolated_job_disconnect_nb(opal_list_t *procs, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); static int isolated_resolve_peers(const char *nodename, - opal_jobid_t jobid, - opal_list_t *procs); + opal_jobid_t jobid, + opal_list_t *procs); static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist); static int isolated_put(opal_pmix_scope_t scope, opal_value_t *kv); static int isolated_fence(opal_list_t *procs, int collect_data); static int isolated_fence_nb(opal_list_t *procs, int collect_data, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static int isolated_commit(void); static int isolated_get(const opal_process_name_t *id, - const char *key, opal_list_t *info, - opal_value_t **kv); + const char *key, opal_list_t *info, + opal_value_t **kv); static int isolated_get_nb(const opal_process_name_t *id, const char *key, - opal_list_t *info, - opal_pmix_value_cbfunc_t cbfunc, void *cbdata); + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); static int isolated_publish(opal_list_t *info); static int isolated_publish_nb(opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static int isolated_lookup(opal_list_t *data, opal_list_t *info); static int isolated_lookup_nb(char **keys, opal_list_t *info, - opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); static int isolated_unpublish(char **keys, opal_list_t *info); static int isolated_unpublish_nb(char **keys, opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static const char *isolated_get_version(void); static int isolated_store_local(const opal_process_name_t *proc, - opal_value_t *val); + opal_value_t *val); static const char *isolated_get_nspace(opal_jobid_t jobid); static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace); @@ -118,11 +118,15 @@ const opal_pmix_base_module_t opal_pmix_isolated_module = { static int isolated_init_count = 0; static opal_process_name_t isolated_pname; -static int isolated_init(void) +static int isolated_init(opal_list_t *ilist) { int rc; opal_value_t kv; + if (0 < isolated_init_count) { + return OPAL_SUCCESS; + } + ++isolated_init_count; /* store our name in the opal_proc_t so that @@ -133,8 +137,8 @@ static int isolated_init(void) isolated_pname.vpid = 0; opal_proc_set_name(&isolated_pname); opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:isolated: assigned tmp name %d %d", - OPAL_NAME_PRINT(isolated_pname),isolated_pname.jobid,isolated_pname.vpid); + "%s pmix:isolated: assigned tmp name %d %d", + OPAL_NAME_PRINT(isolated_pname),isolated_pname.jobid,isolated_pname.vpid); // setup hash table opal_pmix_base_hash_init(); @@ -145,9 +149,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 1; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -157,9 +161,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 0; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -168,9 +172,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 1; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -179,9 +183,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 1; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -191,9 +195,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 1; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -202,9 +206,9 @@ static int isolated_init(void) kv.type = OPAL_STRING; kv.data.string = strdup("0"); if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -214,9 +218,9 @@ static int isolated_init(void) kv.type = OPAL_UINT64; kv.data.uint64 = 0; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } /* save our local rank */ @@ -225,9 +229,9 @@ static int isolated_init(void) kv.type = OPAL_UINT16; kv.data.uint16 = 0; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } /* and our node rank */ @@ -236,26 +240,26 @@ static int isolated_init(void) kv.type = OPAL_UINT16; kv.data.uint16 = 0; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); return OPAL_SUCCESS; -err_exit: + err_exit: return rc; } static int isolated_fini(void) { if (0 == isolated_init_count) { - return OPAL_SUCCESS; + return OPAL_SUCCESS; } if (0 != --isolated_init_count) { - return OPAL_SUCCESS; + return OPAL_SUCCESS; } opal_pmix_base_hash_finalize(); return OPAL_SUCCESS; @@ -264,13 +268,13 @@ static int isolated_fini(void) static int isolated_initialized(void) { if (0 < isolated_init_count) { - return 1; + return 1; } return 0; } static int isolated_abort(int flag, const char *msg, - opal_list_t *procs) + opal_list_t *procs) { return OPAL_SUCCESS; } @@ -281,8 +285,8 @@ static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t } static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, - opal_pmix_spawn_cbfunc_t cbfunc, - void *cbdata) + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -298,15 +302,15 @@ static int isolated_job_disconnect(opal_list_t *procs) } static int isolated_job_disconnect_nb(opal_list_t *procs, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } static int isolated_resolve_peers(const char *nodename, - opal_jobid_t jobid, - opal_list_t *procs) + opal_jobid_t jobid, + opal_list_t *procs) { return OPAL_ERR_NOT_IMPLEMENTED; } @@ -317,16 +321,16 @@ static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist) } static int isolated_put(opal_pmix_scope_t scope, - opal_value_t *kv) + opal_value_t *kv) { int rc; opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:isolated isolated_put key %s scope %d\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); + "%s pmix:isolated isolated_put key %s scope %d\n", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); if (!isolated_init_count) { - return OPAL_ERROR; + return OPAL_ERROR; } rc = opal_pmix_base_store(&isolated_pname, kv); @@ -345,39 +349,39 @@ static int isolated_fence(opal_list_t *procs, int collect_data) } static int isolated_fence_nb(opal_list_t *procs, int collect_data, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_IMPLEMENTED; } static int isolated_get(const opal_process_name_t *id, - const char *key, opal_list_t *info, - opal_value_t **kv) + const char *key, opal_list_t *info, + opal_value_t **kv) { int rc; opal_list_t vals; opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:isolated getting value for proc %s key %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(*id), key); + "%s pmix:isolated getting value for proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(*id), key); OBJ_CONSTRUCT(&vals, opal_list_t); rc = opal_pmix_base_fetch(id, key, &vals); if (OPAL_SUCCESS == rc) { - *kv = (opal_value_t*)opal_list_remove_first(&vals); - return OPAL_SUCCESS; + *kv = (opal_value_t*)opal_list_remove_first(&vals); + return OPAL_SUCCESS; } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:isolated fetch from dstore failed: %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc); + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated fetch from dstore failed: %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc); } OPAL_LIST_DESTRUCT(&vals); return rc; } static int isolated_get_nb(const opal_process_name_t *id, const char *key, - opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) + opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_IMPLEMENTED; } @@ -388,7 +392,7 @@ static int isolated_publish(opal_list_t *info) } static int isolated_publish_nb(opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -399,7 +403,7 @@ static int isolated_lookup(opal_list_t *data, opal_list_t *info) } static int isolated_lookup_nb(char **keys, opal_list_t *info, - opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -410,7 +414,7 @@ static int isolated_unpublish(char **keys, opal_list_t *info) } static int isolated_unpublish_nb(char **keys, opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -421,7 +425,7 @@ static const char *isolated_get_version(void) } static int isolated_store_local(const opal_process_name_t *proc, - opal_value_t *val) + opal_value_t *val) { opal_pmix_base_store(proc, val); diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index 28da8fb9164..7e7e13fda5d 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -146,7 +146,7 @@ extern int opal_pmix_base_exchange(opal_value_t *info, OPAL_NAME_PRINT(*(p)), (s))); \ OBJ_CONSTRUCT(&(_ilist), opal_list_t); \ _info = OBJ_NEW(opal_value_t); \ - _info->key = strdup(OPAL_PMIX_OPTIONAL); \ + _info->key = strdup(OPAL_PMIX_IMMEDIATE); \ _info->type = OPAL_BOOL; \ _info->data.flag = true; \ opal_list_append(&(_ilist), &(_info)->super); \ @@ -284,7 +284,7 @@ extern int opal_pmix_base_exchange(opal_value_t *info, * If the information is not found, or the server connection fails, then * an appropriate error constant will be returned. */ -typedef int (*opal_pmix_base_module_init_fn_t)(void); +typedef int (*opal_pmix_base_module_init_fn_t)(opal_list_t *ilist); /* Finalize the PMIx client, closing the connection to the local server. * An error code will be returned if, for some reason, the connection diff --git a/opal/mca/pmix/pmix2x/pmix/AUTHORS b/opal/mca/pmix/pmix2x/pmix/AUTHORS index c429d324c00..581a22ec73a 100644 --- a/opal/mca/pmix/pmix2x/pmix/AUTHORS +++ b/opal/mca/pmix/pmix2x/pmix/AUTHORS @@ -9,22 +9,31 @@ Email Name Affiliation(s) alinask Elena Shipunova Mellanox annu13 Annapurna Dasari Intel artpol84 Artem Polyakov Mellanox +ashleypittman Ashley Pittman Intel dsolt Dave Solt IBM +garlick Jim Garlick LLNL ggouaillardet Gilles Gouaillardet RIST hjelmn Nathan Hjelm LANL igor-ivanov Igor Ivanov Mellanox jladd-mlnx Joshua Ladd Mellanox -jsquyres Jeff Squyres Cisco, IU +jjhursey Joshua Hursey IBM +jsquyres Jeff Squyres Cisco +karasevb Boris Karasev Mellanox +kawashima-fj Takahiro Kawashima Fujitsu nkogteva Nadezhda Kogteva Mellanox -rhc54 Ralph Castain LANL, Cisco, Intel +nysal Nysal Jan KA IBM +PHHargrove Paul Hargrove LBNL +rhc54 Ralph Castain Intel ------------------------------- --------------------------- ------------------- Affiliation abbreviations: -------------------------- Cisco = Cisco Systems, Inc. +Fujitsu = Fujitsu IBM = International Business Machines, Inc. Intel = Intel, Inc. -IU = Indiana University LANL = Los Alamos National Laboratory +LBNL = Lawrence Berkeley National Laboratory +LLNL = Lawrence Livermore National Laboratory Mellanox = Mellanox RIST = Research Organization for Information Science and Technology diff --git a/opal/mca/pmix/pmix2x/pmix/INSTALL b/opal/mca/pmix/pmix2x/pmix/INSTALL index 005301463ff..e1fc5e3f6db 100644 --- a/opal/mca/pmix/pmix2x/pmix/INSTALL +++ b/opal/mca/pmix/pmix2x/pmix/INSTALL @@ -9,7 +9,7 @@ Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. -Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +Copyright (c) 2013-2017 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -24,7 +24,7 @@ This file is a *very* short overview of building and installing the PMIx library. Much more information is available on the PMIx web site (e.g., see the FAQ section): - http://pmix.github.io/pmix/master + http://pmix.github.io/pmix/pmix Developer Builds @@ -34,7 +34,7 @@ If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked out from Git), you should read the HACKING file before attempting to build PMIx. You must then run: -shell$ ./autogen.sh +shell$ ./autogen.pl You will need very recent versions of GNU Autoconf, Automake, and Libtool. If autogen.sh fails, read the HACKING file. If anything @@ -85,4 +85,3 @@ shell$ make install Parallel make is generally only helpful in the build phase; the installation process is mostly serial and does not benefit much from parallel make. - diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index 86f4438f1bb..4df8ad3aae6 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -24,6 +24,65 @@ current release as well as the "stable" bug fix release branch. Master (not on release branches yet) ------------------------------------ + +2.0.0 +------ +**** NOTE: This release implements the complete PMIX v2.0 Standard +**** and therefore includes a number of new APIs and features. These +**** can be tracked by their RFC's in the RFC repository at: +**** https://github.com/pmix/RFCs. A formal standards document will +**** be included in a later v2.x release. Some of the changes are +**** identified below. +- Added the Modular Component Architecture (MCA) plugin manager and + converted a number of operations to plugins, thereby allowing easy + customization and extension (including proprietary offerings) +- Added support for TCP sockets instead of Unix domain sockets for + client-server communications +- Added support for on-the-fly Allocation requests, including requests + for additional resources, extension of time for currently allocated + resources, and return of identified allocated resources to the scheduler + (RFC 0005 - https://github.com/pmix/RFCs/blob/master/RFC0005.md) +- Tightened rules on the processing of PMIx_Get requests, including + reservation of the "pmix" prefix for attribute keys and specifying + behaviors associated with the PMIX_RANK_WILDCARD value + (RFC 0009 - https://github.com/pmix/RFCs/blob/master/RFC0009.md) +- Extended support for tool interactions with a PMIx server aimed at + meeting the needs of debuggers and other tools. Includes support + for rendezvousing with a system-level PMIx server for interacting + with the system management stack (SMS) outside of an allocated + session, and adds two new APIs: + - PMIx_Query: request general information such as the process + table for a specified job, and available SMS capabilities + - PMIx_Log: log messages (e.g., application progress) to a + system-hosted persistent store + (RFC 0010 - https://github.com/pmix/RFCs/blob/master/RFC0010.md) +- Added support for fabric/network interactions associated with + "instant on" application startup + (RFC 0012 - https://github.com/pmix/RFCs/blob/master/RFC0012.md) +- Added an attribute to support getting the time remaining in an + allocation via the PMIx_Query interface + (RFC 0013 - https://github.com/pmix/RFCs/blob/master/RFC0013.md) +- Added interfaces to support job control and monitoring requests, + including heartbeat and file monitors to detect stalled applications. + Job control interface supports standard signal-related operations + (pause, kill, resume, etc.) as well as checkpoint/restart requests. + The interface can also be used by an application to indicate it is + willing to be pre-empted, with the host RM providing an event + notification when the preemption is desired. + (RFC 0015 - https://github.com/pmix/RFCs/blob/master/RFC0015.md) +- Extended the event notification system to support notifications + across threads in the same process, and the ability to direct + ordering of notifications when registering event handlers. + (RFC 0018 - https://github.com/pmix/RFCs/blob/master/RFC0018.md) +- Expose the buffer manipulation functions via a new set of APIs + to support heterogeneous data transfers within the host RM + environment + (RFC 0020 - https://github.com/pmix/RFCs/blob/master/RFC0020.md) +- Fix a number of race condition issues that arose at scale +- Enable PMIx servers to generate notifications to the host RM + and to themselves + + 1.2.2 -- 21 March 2017 ---------------------- - Compiler fix for Sun/Oracle CC (PR #322) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index c6d9bba4cca..c3dd7d08258 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitd5e4801 +repo_rev=git6fb501d # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="May 30, 2017" +date="Jun 19, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -75,4 +75,4 @@ date="May 30, 2017" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=0:0:0 +libpmix_so_version=3:0:1 diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index 395b78406fd..fe800619c6b 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -167,6 +167,8 @@ AC_DEFUN([PMIX_SETUP_CORE],[ ############################################################################ pmix_show_title "Compiler and preprocessor tests" + PMIX_SETUP_CC + # # Check for some types # diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 index 84c04741f6a..ce83b3b207b 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 @@ -95,7 +95,7 @@ EOF # PMIX_CONFIGURE_USER="`whoami`" -PMIX_CONFIGURE_HOST="`hostname | head -n 1`" +PMIX_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`" PMIX_CONFIGURE_DATE="`date`" # @@ -115,7 +115,7 @@ AC_DEFUN([PMIX_BASIC_SETUP],[ # PMIX_CONFIGURE_USER="`whoami`" -PMIX_CONFIGURE_HOST="`hostname | head -n 1`" +PMIX_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`" PMIX_CONFIGURE_DATE="`date`" # diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 16e18e68ee7..e2cc36d8a3f 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -892,81 +892,83 @@ typedef struct pmix_value { } while (0) /* release the memory in the value struct data field */ -#define PMIX_VALUE_DESTRUCT(m) \ - do { \ - size_t _n; \ - if (PMIX_STRING == (m)->type) { \ - if (NULL != (m)->data.string) { \ - free((m)->data.string); \ - } \ - } else if ((PMIX_BYTE_OBJECT == (m)->type) || \ - (PMIX_COMPRESSED_STRING == (m)->type)) { \ - if (NULL != (m)->data.bo.bytes) { \ - free((m)->data.bo.bytes); \ - } \ - } else if (PMIX_DATA_ARRAY == (m)->type) { \ - if (PMIX_STRING == (m)->data.darray->type) { \ - char **_str = (char**)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - if (NULL != _str[_n]) { \ - free(_str[_n]); \ - } \ - } \ - } else if (PMIX_PROC_INFO == (m)->data.darray->type) { \ - pmix_proc_info_t *_info = \ - (pmix_proc_info_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - PMIX_PROC_INFO_DESTRUCT(&_info[_n]); \ - } \ - } else if (PMIX_INFO == (m)->data.darray->type) { \ - pmix_info_t *_info = \ - (pmix_info_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - /* cannot use info destruct as that loops back */ \ - if (PMIX_STRING == _info[_n].value.type) { \ - if (NULL != _info[_n].value.data.string) { \ - free(_info[_n].value.data.string); \ - } \ - } else if (PMIX_BYTE_OBJECT == _info[_n].value.type) { \ - if (NULL != _info[_n].value.data.bo.bytes) { \ - free(_info[_n].value.data.bo.bytes); \ - } \ - } else if (PMIX_PROC_INFO == _info[_n].value.type) { \ - PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \ - } \ - } \ - } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \ - pmix_byte_object_t *_obj = \ - (pmix_byte_object_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - if (NULL != _obj[_n].bytes) { \ - free(_obj[_n].bytes); \ - } \ - } \ - } \ - if (NULL != (m)->data.darray->array) { \ - free((m)->data.darray->array); \ - } \ - free((m)->data.darray); \ - /**** DEPRECATED ****/ \ - } else if (PMIX_INFO_ARRAY == (m)->type) { \ - pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \ - for (_n=0; _n < (m)->data.array->size; _n++) { \ - if (PMIX_STRING == _p[_n].value.type) { \ - if (NULL != _p[_n].value.data.string) { \ - free(_p[_n].value.data.string); \ - } \ - } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \ - if (NULL != _p[_n].value.data.bo.bytes) { \ - free(_p[_n].value.data.bo.bytes); \ - } \ - } else if (PMIX_PROC_INFO == _p[_n].value.type) { \ - PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); \ - } \ - } \ - free(_p); \ - /********************/ \ - } \ +#define PMIX_VALUE_DESTRUCT(m) \ + do { \ + size_t _n; \ + if (PMIX_STRING == (m)->type) { \ + if (NULL != (m)->data.string) { \ + free((m)->data.string); \ + } \ + } else if ((PMIX_BYTE_OBJECT == (m)->type) || \ + (PMIX_COMPRESSED_STRING == (m)->type)) { \ + if (NULL != (m)->data.bo.bytes) { \ + free((m)->data.bo.bytes); \ + } \ + } else if (PMIX_DATA_ARRAY == (m)->type) { \ + if (NULL != (m)->data.darray) { \ + if (PMIX_STRING == (m)->data.darray->type) { \ + char **_str = (char**)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + if (NULL != _str[_n]) { \ + free(_str[_n]); \ + } \ + } \ + } else if (PMIX_PROC_INFO == (m)->data.darray->type) { \ + pmix_proc_info_t *_info = \ + (pmix_proc_info_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + PMIX_PROC_INFO_DESTRUCT(&_info[_n]); \ + } \ + } else if (PMIX_INFO == (m)->data.darray->type) { \ + pmix_info_t *_info = \ + (pmix_info_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + /* cannot use info destruct as that loops back */ \ + if (PMIX_STRING == _info[_n].value.type) { \ + if (NULL != _info[_n].value.data.string) { \ + free(_info[_n].value.data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == _info[_n].value.type) { \ + if (NULL != _info[_n].value.data.bo.bytes) { \ + free(_info[_n].value.data.bo.bytes); \ + } \ + } else if (PMIX_PROC_INFO == _info[_n].value.type) { \ + PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \ + } \ + } \ + } \ + } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \ + pmix_byte_object_t *_obj = \ + (pmix_byte_object_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + if (NULL != _obj[_n].bytes) { \ + free(_obj[_n].bytes); \ + } \ + } \ + } \ + if (NULL != (m)->data.darray->array) { \ + free((m)->data.darray->array); \ + } \ + free((m)->data.darray); \ + /**** DEPRECATED ****/ \ + } else if (PMIX_INFO_ARRAY == (m)->type) { \ + pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \ + for (_n=0; _n < (m)->data.array->size; _n++) { \ + if (PMIX_STRING == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.string) { \ + free(_p[_n].value.data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.bo.bytes) { \ + free(_p[_n].value.data.bo.bytes); \ + } \ + } else if (PMIX_PROC_INFO == _p[_n].value.type) { \ + PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); \ + } \ + } \ + free(_p); \ + /********************/ \ + } \ } while (0) #define PMIX_VALUE_FREE(m, n) \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h index b4d25366000..27c18557f71 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h @@ -63,6 +63,8 @@ static inline void pmix_atomic_wmb(void) } #define PMIXMB() pmix_atomic_mb() +#define PMIXRMB() pmix_atomic_rmb() +#define PMIXWMB() pmix_atomic_wmb() /********************************************************************** * diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h index 98fbccbbfc3..9682b9e62af 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2017 IBM Corporation. All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. @@ -30,10 +30,8 @@ #define PMIXMB() __asm__ __volatile__ ("sync" : : : "memory") #define PMIXRMB() __asm__ __volatile__ ("lwsync" : : : "memory") -#define PMIXWMB() __asm__ __volatile__ ("eieio" : : : "memory") +#define PMIXWMB() __asm__ __volatile__ ("lwsync" : : : "memory") #define PMIXISYNC() __asm__ __volatile__ ("isync" : : : "memory") -#define PMIXSMP_SYNC "sync \n\t" -#define PMIXSMP_ISYNC "\n\tisync" /********************************************************************** diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h index 740da76ca10..da4d4ca2dd9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -127,6 +127,7 @@ #include #endif /* HAVE_STDLIB_H */ +#include "src/threads/thread_usage.h" BEGIN_C_DECLS @@ -496,7 +497,7 @@ static inline pmix_object_t *pmix_obj_new(pmix_class_t * cls) static inline int pmix_obj_update(pmix_object_t *object, int inc) __pmix_attribute_always_inline__; static inline int pmix_obj_update(pmix_object_t *object, int inc) { - return object->obj_reference_count += inc; + return PMIX_THREAD_ADD32(&object->obj_reference_count, inc); } END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c index dfd3b9a2c16..36b569051c7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c @@ -86,37 +86,42 @@ static void pmix_pointer_array_destruct(pmix_pointer_array_t *array) * A classical find first zero bit (ffs) on a large array. It checks starting * from the indicated position until it finds a zero bit. If SET is true, * the bit is set. The position of the bit is returned in store. + * + * According to Section 6.4.4.1 of the C standard we don't need to prepend a type + * indicator to constants (the type is inferred by the compiler according to + * the number of bits necessary to represent it). */ -#define FIND_FIRST_ZERO(START_IDX, STORE, SET) \ +#define FIND_FIRST_ZERO(START_IDX, STORE) \ do { \ uint32_t __b_idx, __b_pos; \ + if( 0 == table->number_free ) { \ + (STORE) = table->size; \ + break; \ + } \ GET_BIT_POS((START_IDX), __b_idx, __b_pos); \ - for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFULL; __b_idx++); \ + for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFu; __b_idx++); \ assert(__b_idx < (uint32_t)table->size); \ uint64_t __check_value = table->free_bits[__b_idx]; \ __b_pos = 0; \ \ - if( 0x00000000FFFFFFFFULL == (__check_value & 0x00000000FFFFFFFFULL) ) { \ + if( 0x00000000FFFFFFFFu == (__check_value & 0x00000000FFFFFFFFu) ) { \ __check_value >>= 32; __b_pos += 32; \ } \ - if( 0x000000000000FFFFULL == (__check_value & 0x000000000000FFFFULL) ) { \ + if( 0x000000000000FFFFu == (__check_value & 0x000000000000FFFFu) ) { \ __check_value >>= 16; __b_pos += 16; \ } \ - if( 0x00000000000000FFULL == (__check_value & 0x00000000000000FFULL) ) { \ + if( 0x00000000000000FFu == (__check_value & 0x00000000000000FFu) ) { \ __check_value >>= 8; __b_pos += 8; \ } \ - if( 0x000000000000000FULL == (__check_value & 0x000000000000000FULL) ) { \ + if( 0x000000000000000Fu == (__check_value & 0x000000000000000Fu) ) { \ __check_value >>= 4; __b_pos += 4; \ } \ - if( 0x0000000000000003ULL == (__check_value & 0x0000000000000003ULL) ) { \ + if( 0x0000000000000003u == (__check_value & 0x0000000000000003u) ) { \ __check_value >>= 2; __b_pos += 2; \ } \ - if( 0x0000000000000001ULL == (__check_value & 0x0000000000000001ULL) ) { \ + if( 0x0000000000000001u == (__check_value & 0x0000000000000001u) ) { \ __b_pos += 1; \ } \ - if( (SET) ) { \ - table->free_bits[__b_idx] |= (1ULL << __b_pos); \ - } \ (STORE) = (__b_idx * 8 * sizeof(uint64_t)) + __b_pos; \ } while(0) @@ -127,8 +132,8 @@ static void pmix_pointer_array_destruct(pmix_pointer_array_t *array) do { \ uint32_t __b_idx, __b_pos; \ GET_BIT_POS((IDX), __b_idx, __b_pos); \ - assert( 0 == (table->free_bits[__b_idx] & (1UL << __b_pos))); \ - table->free_bits[__b_idx] |= (1ULL << __b_pos); \ + assert( 0 == (table->free_bits[__b_idx] & (((uint64_t)1) << __b_pos))); \ + table->free_bits[__b_idx] |= (((uint64_t)1) << __b_pos); \ } while(0) /** @@ -138,8 +143,8 @@ static void pmix_pointer_array_destruct(pmix_pointer_array_t *array) do { \ uint32_t __b_idx, __b_pos; \ GET_BIT_POS((IDX), __b_idx, __b_pos); \ - assert( (table->free_bits[__b_idx] & (1UL << __b_pos))); \ - table->free_bits[__b_idx] ^= (1ULL << __b_pos); \ + assert( (table->free_bits[__b_idx] & (((uint64_t)1) << __b_pos))); \ + table->free_bits[__b_idx] ^= (((uint64_t)1) << __b_pos); \ } while(0) #if 0 @@ -157,9 +162,9 @@ static void pmix_pointer_array_validate(pmix_pointer_array_t *array) GET_BIT_POS(i, b_idx, p_idx); if( NULL == array->addr[i] ) { cnt++; - assert( 0 == (array->free_bits[b_idx] & (1ULL << p_idx)) ); + assert( 0 == (array->free_bits[b_idx] & (((uint64_t)1) << p_idx)) ); } else { - assert( 0 != (array->free_bits[b_idx] & (1ULL << p_idx)) ); + assert( 0 != (array->free_bits[b_idx] & (((uint64_t)1) << p_idx)) ); } } assert(cnt == array->number_free); @@ -236,7 +241,7 @@ int pmix_pointer_array_add(pmix_pointer_array_t *table, void *ptr) table->number_free--; SET_BIT(index); if (table->number_free > 0) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } else { table->lowest_free = table->size; } @@ -290,7 +295,7 @@ int pmix_pointer_array_set_item(pmix_pointer_array_t *table, int index, SET_BIT(index); /* Reset lowest_free if required */ if ( index == table->lowest_free ) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } } else { assert( index != table->lowest_free ); @@ -362,7 +367,7 @@ bool pmix_pointer_array_test_and_set_item (pmix_pointer_array_t *table, /* Reset lowest_free if required */ if( table->number_free > 0 ) { if ( index == table->lowest_free ) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } } else { table->lowest_free = table->size; diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index 7c5953baee8..3bf71848cd4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -7,7 +7,7 @@ * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -88,6 +88,7 @@ static const char pmix_version_string[] = PMIX_VERSION; static void _notify_complete(pmix_status_t status, void *cbdata) { pmix_event_chain_t *chain = (pmix_event_chain_t*)cbdata; + PMIX_ACQUIRE_OBJECT(chain); PMIX_RELEASE(chain); } @@ -166,20 +167,18 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, } -pmix_client_globals_t pmix_client_globals = {{{0}}}; -pmix_mutex_t pmix_client_bootstrap_mutex = PMIX_MUTEX_STATIC_INIT; +pmix_client_globals_t pmix_client_globals = {0}; /* callback for wait completion */ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + pmix_lock_t *lock = (pmix_lock_t*)cbdata; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client wait_cbfunc received"); - - *active = false; + PMIX_WAKEUP_THREAD(lock); } /* callback to receive job info */ @@ -197,7 +196,8 @@ static void job_data(struct pmix_peer_t *pr, if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nspace, &cnt, PMIX_STRING))) { PMIX_ERROR_LOG(rc); cb->status = PMIX_ERROR; - cb->active = false; + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); return; } assert(NULL != nspace); @@ -208,7 +208,8 @@ static void job_data(struct pmix_peer_t *pr, pmix_job_data_htable_store(pmix_globals.myid.nspace, buf); #endif cb->status = PMIX_SUCCESS; - cb->active = false; + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT const char* PMIx_Get_version(void) @@ -216,7 +217,6 @@ PMIX_EXPORT const char* PMIx_Get_version(void) return pmix_version_string; } -volatile bool waiting_for_debugger = true; static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, @@ -225,17 +225,13 @@ static void notification_fn(size_t evhdlr_registration_id, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { + pmix_lock_t *reglock = (pmix_lock_t*)cbdata; + if (NULL != cbfunc) { cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); } - waiting_for_debugger = false; -} -static void evhandler_reg_callbk(pmix_status_t status, - size_t evhandler_ref, - void *cbdata) -{ - volatile int *active = (volatile int*)cbdata; - *active = status; + PMIX_WAKEUP_THREAD(reglock); + } typedef struct { @@ -320,19 +316,19 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_cb_t cb; pmix_buffer_t *req; pmix_cmd_t cmd = PMIX_REQ_CMD; - volatile int active; pmix_status_t code = PMIX_ERR_DEBUGGER_RELEASE; pmix_proc_t wildcard; pmix_info_t ginfo; pmix_value_t *val = NULL; + pmix_lock_t reglock; if (NULL == proc) { return PMIX_ERR_BAD_PARAM; } - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); - if (0 < pmix_globals.init_cntr || PMIX_PROC_SERVER == pmix_globals.proc_type) { + if (0 < pmix_globals.init_cntr || PMIX_PROC_IS_SERVER) { /* since we have been called before, the nspace and * rank should be known. So return them here if * requested */ @@ -340,19 +336,19 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); proc->rank = pmix_globals.myid.rank; } + ++pmix_globals.init_cntr; /* we also need to check the info keys to see if something need * be done with them - e.g., to notify another library that we * also have called init */ + PMIX_RELEASE_THREAD(&pmix_global_lock); if (NULL != info) { _check_for_notify(info, ninfo); } - ++pmix_globals.init_cntr; - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_SUCCESS; } /* if we don't see the required info, then we cannot init */ if (NULL == getenv("PMIX_NAMESPACE")) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INVALID_NAMESPACE; } @@ -361,13 +357,17 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_CLIENT, info, ninfo, pmix_client_notify_recv))) { PMIX_ERROR_LOG(rc); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* setup the globals */ PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); - PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t); + pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); + if (NULL == pmix_client_globals.myserver) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOMEM; + } pmix_output_verbose(2, pmix_globals.debug_output, "pmix: init called"); @@ -375,7 +375,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* we require our nspace */ if (NULL == (evar = getenv("PMIX_NAMESPACE"))) { /* let the caller know that the server isn't available yet */ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INVALID_NAMESPACE; } if (NULL != proc) { @@ -389,7 +389,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* we also require our rank */ if (NULL == (evar = getenv("PMIX_RANK"))) { /* let the caller know that the server isn't available yet */ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } pmix_globals.myid.rank = strtol(evar, NULL, 10); @@ -403,25 +403,27 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, * to us at launch */ evar = getenv("PMIX_SECURITY_MODE"); if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, evar))) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* the server will be using the same */ - pmix_client_globals.myserver.compat.psec = pmix_globals.mypeer->compat.psec; + pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec; /* setup the shared memory support */ #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS != (rc = pmix_dstore_init(NULL, 0))) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } #endif /* PMIX_ENABLE_DSTORE */ /* connect to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){ + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + /* mark that we are using the same module as used for the server */ + pmix_globals.mypeer->compat.ptl = pmix_client_globals.myserver->compat.ptl; /* send a request for our job info - we do this as a non-blocking * transaction because some systems cannot handle very large @@ -430,28 +432,28 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(req, &cmd, 1, PMIX_CMD))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(req); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* send to the server */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - cb.active = true; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, req, job_data, (void*)&cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, req, job_data, (void*)&cb))){ PMIX_DESTRUCT(&cb); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* wait for the data to return */ - PMIX_WAIT_FOR_COMPLETION(cb.active); + PMIX_WAIT_THREAD(&cb.lock); rc = cb.status; PMIX_DESTRUCT(&cb); if (PMIX_SUCCESS == rc) { pmix_globals.init_cntr++; } else { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* lood for a debugger attach key */ (void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); @@ -460,18 +462,13 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS == PMIx_Get(&wildcard, PMIX_DEBUG_STOP_IN_INIT, &ginfo, 1, &val)) { PMIX_VALUE_FREE(val, 1); // cleanup memory /* if the value was found, then we need to wait for debugger attach here */ - /* register for the debugger release notificaation */ - active = -1; + /* register for the debugger release notification */ + PMIX_CONSTRUCT_LOCK(®lock); PMIx_Register_event_handler(&code, 1, NULL, 0, - notification_fn, evhandler_reg_callbk, (void*)&active); - while (-1 == active) { - usleep(100); - } - if (0 != active) { - return active; - } + notification_fn, NULL, (void*)®lock); /* wait for it to arrive */ - PMIX_WAIT_FOR_COMPLETION(waiting_for_debugger); + PMIX_WAIT_THREAD(®lock); + PMIX_DESTRUCT_LOCK(®lock); } PMIX_INFO_DESTRUCT(&ginfo); @@ -480,46 +477,81 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, _check_for_notify(info, ninfo); } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); - return PMIX_SUCCESS; } PMIX_EXPORT int PMIx_Initialized(void) { - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (0 < pmix_globals.init_cntr) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return true; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return false; } +typedef struct { + pmix_lock_t lock; + pmix_event_t ev; + bool active; +} pmix_client_timeout_t; + +/* timer callback */ +static void fin_timeout(int sd, short args, void *cbdata) +{ + pmix_client_timeout_t *tev; + tev = (pmix_client_timeout_t*)cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:client finwait timeout fired"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} +/* callback for finalize completion */ +static void finwait_cbfunc(struct pmix_peer_t *pr, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_client_timeout_t *tev; + tev = (pmix_client_timeout_t*)cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:client finwait_cbfunc received"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} + PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) { pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FINALIZE_CMD; pmix_status_t rc; size_t n; - volatile bool active; + pmix_client_timeout_t tev; + struct timeval tv = {2, 0}; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client finalize called"); + "%s:%d pmix:client finalize called", + pmix_globals.myid.nspace, pmix_globals.myid.rank); /* mark that I called finalize */ pmix_globals.mypeer->finalized = true; - if ( 0 <= pmix_client_globals.myserver.sd ) { + if ( 0 <= pmix_client_globals.myserver->sd ) { /* check to see if we are supposed to execute a * blocking fence prior to actually finalizing */ if (NULL != info && 0 < ninfo) { @@ -540,7 +572,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) } } } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* setup a cmd message to notify the PMIx * server that we are normally terminating */ @@ -554,19 +585,33 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client sending finalize sync to server"); - + "%s:%d pmix:client sending finalize sync to server", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* setup a timer to protect ourselves should the server be unable + * to answer for some reason */ + PMIX_CONSTRUCT_LOCK(&tev.lock); + pmix_event_assign(&tev.ev, pmix_globals.evbase, -1, 0, + fin_timeout, &tev); + tev.active = true; + PMIX_POST_OBJECT(&tev); + pmix_event_add(&tev.ev, &tv); /* send to the server */ - active = true;; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, - wait_cbfunc, (void*)&active))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, + finwait_cbfunc, (void*)&tev))){ return rc; } /* wait for the ack to return */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(&tev.lock); + PMIX_DESTRUCT_LOCK(&tev.lock); + if (tev.active) { + pmix_event_del(&tev.ev); + } + pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client finalize sync received"); + "%s:%d pmix:client finalize sync received", + pmix_globals.myid.nspace, pmix_globals.myid.rank); } if (!pmix_globals.external_evbase) { @@ -577,8 +622,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) (void)pmix_progress_thread_pause(NULL); } - PMIX_DESTRUCT(&pmix_client_globals.myserver); - #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (0 > (rc = pmix_dstore_nspace_del(pmix_globals.myid.nspace))) { PMIX_ERROR_LOG(rc); @@ -588,11 +631,16 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); - if (0 <= pmix_client_globals.myserver.sd) { - CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd); + if (0 <= pmix_client_globals.myserver->sd) { + CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd); + } + if (NULL != pmix_client_globals.myserver) { + PMIX_RELEASE(pmix_client_globals.myserver); } + pmix_rte_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } @@ -603,23 +651,23 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], pmix_buffer_t *bfr; pmix_cmd_t cmd = PMIX_ABORT_CMD; pmix_status_t rc; - volatile bool active; + pmix_lock_t reglock; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client abort called"); - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a buffer to hold the message */ bfr = PMIX_NEW(pmix_buffer_t); @@ -657,14 +705,15 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], } /* send to the server */ - active = true; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, bfr, - wait_cbfunc, (void*)&active))){ + PMIX_CONSTRUCT_LOCK(®lock); + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, bfr, + wait_cbfunc, (void*)®lock))){ return rc; } /* wait for the release */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(®lock); + PMIX_DESTRUCT_LOCK(®lock); return PMIX_SUCCESS; } @@ -677,6 +726,9 @@ static void _putfn(int sd, short args, void *cbdata) uint8_t *tmp; size_t len; + /* need to acquire the cb object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cb); + /* no need to push info that starts with "pmix" as that is * info we would have been provided at startup */ if (0 == strncmp(cb->key, "pmix", 4)) { @@ -754,7 +806,9 @@ static void _putfn(int sd, short args, void *cbdata) PMIX_RELEASE(kv); // maintain accounting } cb->pstatus = rc; - cb->active = false; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val) @@ -766,16 +820,15 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va "pmix: executing put for key %s type %d", key, val->type); - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; cb->scope = scope; cb->key = (char*)key; cb->value = val; @@ -784,7 +837,7 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va PMIX_THREADSHIFT(cb, _putfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; PMIX_RELEASE(cb); @@ -799,6 +852,9 @@ static void _commitfn(int sd, short args, void *cbdata) pmix_buffer_t *msgout; pmix_cmd_t cmd=PMIX_COMMIT_CMD; + /* need to acquire the cb object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cb); + msgout = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &cmd, 1, PMIX_CMD))) { @@ -839,15 +895,17 @@ static void _commitfn(int sd, short args, void *cbdata) /* always send, even if we have nothing to contribute, so the server knows * that we contributed whatever we had */ - if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msgout, - wait_cbfunc, (void*)&cb->active))){ + if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msgout, + wait_cbfunc, (void*)&cb->lock))){ cb->pstatus = PMIX_SUCCESS; return; } done: cb->pstatus = rc; - cb->active = false; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Commit(void) @@ -855,32 +913,30 @@ static void _commitfn(int sd, short args, void *cbdata) pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we are a server, or we aren't connected, don't attempt to send */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; // not an error } if (!pmix_globals.connected) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; - /* pass this into the event library for thread protection */ PMIX_THREADSHIFT(cb, _commitfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; PMIX_RELEASE(cb); @@ -898,6 +954,9 @@ static void _peersfn(int sd, short args, void *cbdata) #endif size_t i; + /* need to acquire the cb object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cb); + /* cycle across our known nspaces */ tmp = NULL; #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) @@ -952,7 +1011,9 @@ static void _peersfn(int sd, short args, void *cbdata) done: cb->pstatus = rc; - cb->active = false; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, @@ -962,16 +1023,15 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; cb->key = (char*)nodename; if (NULL != nspace) { (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); @@ -981,7 +1041,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, PMIX_THREADSHIFT(cb, _peersfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; /* transfer the result */ *procs = cb->procs; @@ -1001,6 +1061,9 @@ static void _nodesfn(int sd, short args, void *cbdata) pmix_nspace_t *nsptr; pmix_nrec_t *nptr; + /* need to acquire the cb object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cb); + /* cycle across our known nspaces */ tmp = NULL; PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { @@ -1020,7 +1083,9 @@ static void _nodesfn(int sd, short args, void *cbdata) } cb->pstatus = rc; - cb->active = false; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) @@ -1028,16 +1093,15 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (NULL != nspace) { (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); } @@ -1046,7 +1110,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist PMIX_THREADSHIFT(cb, _nodesfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; *nodelist = cb->key; PMIX_RELEASE(cb); diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c index 43bb9147920..50864d7fbc5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -51,6 +51,8 @@ #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" +#include "src/threads/threads.h" + #include "src/mca/ptl/ptl.h" #include "pmix_client_ops.h" @@ -68,23 +70,27 @@ PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs, pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: connect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Connect_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) { @@ -93,7 +99,7 @@ PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs, } /* wait for the connect to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -112,17 +118,22 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: connect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs || 0 >= nprocs) { @@ -168,7 +179,7 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -177,25 +188,28 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro } PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], size_t ninfo) { pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Disconnect_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) { PMIX_RELEASE(cb); @@ -203,7 +217,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t npro } /* wait for the connect to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -222,17 +236,22 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: disconnect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs || 0 >= nprocs) { @@ -278,7 +297,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -344,5 +363,6 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; cb->status = status; - cb->active = false; + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c index d22c1223d14..72ccdef2955 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -66,28 +66,32 @@ static void wait_cbfunc(struct pmix_peer_t *pr, static void op_cbfunc(pmix_status_t status, void *cbdata); PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], size_t ninfo) { pmix_cb_t *cb; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: executing fence"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(procs, nprocs, info, ninfo, @@ -97,7 +101,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, } /* wait for the fence to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -108,8 +112,8 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, } PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata) + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FENCENB_CMD; @@ -118,17 +122,22 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs pmix_proc_t rg, *rgs; size_t nrg; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: fence_nb called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs && 0 != nprocs) { @@ -160,7 +169,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -252,6 +261,5 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; cb->status = status; - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } - diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index 6abfb3fac89..e0932889707 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -53,6 +53,7 @@ #include "src/class/pmix_list.h" #include "src/buffer_ops/buffer_ops.h" +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/compress.h" #include "src/util/error.h" @@ -86,22 +87,25 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], pmix_cb_t *cb; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, _value_cbfunc, cb))) { PMIX_RELEASE(cb); return rc; } /* wait for the data to return */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; *val = cb->value; PMIX_RELEASE(cb); @@ -120,9 +124,13 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, int rank; char *nm; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* if the proc is NULL, then the caller is assuming * that the key is universally unique within the caller's @@ -168,7 +176,6 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, /* thread-shift so we can check global objects */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); cb->rank = rank; cb->key = (char*)key; @@ -186,18 +193,20 @@ static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; + PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (PMIX_SUCCESS == status) { if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&cb->value, kv, PMIX_VALUE))) { PMIX_ERROR_LOG(rc); } } - cb->active = false; + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, - const pmix_info_t info[], size_t ninfo, - pmix_cmd_t cmd) + const pmix_info_t info[], size_t ninfo, + pmix_cmd_t cmd) { pmix_buffer_t *msg; pmix_status_t rc; @@ -238,12 +247,12 @@ static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, return msg; } -/* this callback is coming from the usock recv, and thus +/* this callback is coming from the ptl recv, and thus * is occurring inside of our progress thread - hence, no * need to thread shift */ static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, - pmix_buffer_t *buf, void *cbdata) + pmix_buffer_t *buf, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_cb_t *cb2; @@ -486,6 +495,9 @@ static void _getnbfn(int fd, short flags, void *cbdata) char *tmp; bool my_nspace = false, my_rank = false; + /* cb was passed to us from another thread - acquire it */ + PMIX_ACQUIRE_OBJECT(cb); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: getnbfn value for proc %s:%d key %s", cb->nspace, cb->rank, @@ -614,8 +626,8 @@ static void _getnbfn(int fd, short flags, void *cbdata) rc = pmix_dstore_fetch(cb->nspace, cb->rank, cb->key, &val); #endif if( PMIX_SUCCESS != rc && !my_nspace ){ - /* we are asking about the job-level info from other - * namespace. It seems tha we don't have it - go and + /* we are asking about the job-level info from another + * namespace. It seems that we don't have it - go and * ask server */ goto request; @@ -681,12 +693,12 @@ static void _getnbfn(int fd, short flags, void *cbdata) goto respond; } -request: + request: /* if we got here, then we don't have the data for this proc. If we * are a server, or we are a client and not connected, then there is * nothing more we can do */ - if (PMIX_PROC_SERVER == pmix_globals.proc_type || - (PMIX_PROC_SERVER != pmix_globals.proc_type && !pmix_globals.connected)) { + if (PMIX_PROC_IS_SERVER || + (!PMIX_PROC_IS_SERVER && !pmix_globals.connected)) { rc = PMIX_ERR_NOT_FOUND; goto respond; } @@ -694,13 +706,14 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* we also have to check the user's directives to see if they do not want * us to attempt to retrieve it from the server */ for (n=0; n < cb->ninfo; n++) { - if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) && + if ((0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) || (0 == strcmp(cb->info[n].key, PMIX_IMMEDIATE))) && (PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) { /* they don't want us to try and retrieve it */ pmix_output_verbose(2, pmix_globals.debug_output, "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", cb->key, cb->rank, cb->nspace); rc = PMIX_ERR_NOT_FOUND; + val = NULL; goto respond; } } @@ -734,16 +747,17 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* track the callback object */ pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){ pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super); rc = PMIX_ERROR; goto respond; } - + /* we made a lot of changes to cb, so ensure they get + * written out before we return */ + PMIX_POST_OBJECT(cb); return; -respond: - + respond: /* if a callback was provided, execute it */ if (NULL != cb->value_cbfunc) { if (NULL != val) { @@ -768,5 +782,4 @@ static void _getnbfn(int fd, short flags, void *cbdata) } PMIX_RELEASE(cb); return; - } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h index 4fdcf6c2b33..ecf979572c5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h @@ -20,14 +20,12 @@ BEGIN_C_DECLS typedef struct { - pmix_peer_t myserver; // messaging support to/from my server + pmix_peer_t *myserver; // messaging support to/from my server pmix_list_t pending_requests; // list of pmix_cb_t pending data requests } pmix_client_globals_t; PMIX_EXPORT extern pmix_client_globals_t pmix_client_globals; -PMIX_EXPORT extern pmix_mutex_t pmix_client_bootstrap_mutex; - END_C_DECLS #endif /* PMIX_CLIENT_OPS_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c index 59b16100127..6981c96e1e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -48,6 +48,7 @@ #include "src/class/pmix_list.h" #include "src/buffer_ops/buffer_ops.h" +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -71,21 +72,25 @@ PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[], pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: publish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object to let us know when it is done */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(info, ninfo, op_cbfunc, cb))) { PMIX_ERROR_LOG(rc); @@ -94,7 +99,7 @@ PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[], } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = (pmix_status_t)cb->status; PMIX_RELEASE(cb); @@ -109,17 +114,22 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: publish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo cases */ if (NULL == info) { @@ -162,10 +172,9 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo cb = PMIX_NEW(pmix_cb_t); cb->op_cbfunc = cbfunc; cb->cbdata = cbdata; - cb->active = true; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -181,9 +190,23 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, char **keys = NULL; size_t i; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: lookup called"); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* bozo protection */ if (NULL == pdata) { return PMIX_ERR_BAD_PARAM; @@ -202,7 +225,6 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, cb = PMIX_NEW(pmix_cb_t); cb->cbdata = (void*)pdata; cb->nvals = ndata; - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(keys, info, ninfo, lookup_cbfunc, cb))) { @@ -212,7 +234,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); /* the data has been stored in the info array by lookup_cbfunc, so * nothing more for us to do */ @@ -231,13 +253,23 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, pmix_cb_t *cb; size_t nkeys, n; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: lookup called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* check for bozo cases */ if (NULL == keys) { return PMIX_ERR_BAD_PARAM; @@ -295,7 +327,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, cb->cbdata = cbdata; /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -304,19 +336,33 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, } PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], + size_t ninfo) { pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: unpublish called"); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(keys, info, ninfo, op_cbfunc, cb))) { @@ -325,7 +371,7 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys, } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -342,13 +388,23 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, pmix_cb_t *cb; size_t i, j; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: unpublish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* create the unpublish cmd */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ @@ -397,10 +453,9 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, cb = PMIX_NEW(pmix_cb_t); cb->op_cbfunc = cbfunc; cb->cbdata = cbdata; - cb->active = true; /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -417,6 +472,8 @@ static void wait_cbfunc(struct pmix_peer_t *pr, int ret; int32_t cnt; + PMIX_ACQUIRE_OBJECT(cb); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); @@ -437,7 +494,8 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; cb->status = status; - cb->active = false; + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } static void wait_lookup_cbfunc(struct pmix_peer_t *pr, @@ -450,6 +508,8 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, pmix_pdata_t *pdata; size_t ndata; + PMIX_ACQUIRE_OBJECT(cb); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); @@ -514,6 +574,7 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda pmix_pdata_t *tgt = (pmix_pdata_t*)cb->cbdata; size_t i, j; + PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (PMIX_SUCCESS == status) { /* find the matching key in the provided info array - error if not found */ @@ -530,6 +591,6 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda } } } - - cb->active = false; + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c index 71828db7367..a7842c5ffb4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -48,6 +48,7 @@ #include "src/class/pmix_list.h" #include "src/buffer_ops/buffer_ops.h" +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -68,17 +69,23 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: spawn called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* ensure the nspace (if provided) is initialized */ if (NULL != nspace) { @@ -87,7 +94,6 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Spawn_nb(job_info, ninfo, apps, napps, spawn_cbfunc, cb))) { PMIX_RELEASE(cb); @@ -95,7 +101,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, } /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; if (NULL != nspace) { (void)strncpy(nspace, cb->nspace, PMIX_MAX_NSLEN); @@ -114,17 +120,22 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: spawn called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ @@ -170,7 +181,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -189,6 +200,8 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_status_t rc, ret; int32_t cnt; + PMIX_ACQUIRE_OBJECT(cb); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); @@ -233,9 +246,11 @@ static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; + PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (NULL != nspace) { (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); } - cb->active = false; + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c index b0f614b582b..cf2f546f777 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -101,13 +102,23 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ pmix_status_t rc; pmix_query_caddy_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: job control called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* if we are the server, then we just issue the request and * return the response */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { @@ -125,12 +136,6 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ } /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { @@ -178,7 +183,7 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -195,13 +200,23 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm pmix_status_t rc; pmix_query_caddy_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: monitor called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* if we are the server, then we just issue the request and * return the response */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { @@ -217,12 +232,6 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm } /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { @@ -267,7 +276,7 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c index 6fb39262a77..92ea30189ed 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -61,13 +62,23 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, pmix_buffer_t *msg; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:log non-blocking"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (0 == ndata || NULL == data) { return PMIX_ERR_BAD_PARAM; } @@ -126,7 +137,7 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query sending to server"); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){ PMIX_RELEASE(cd); } } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c index e4d772f821e..5eec3f79c74 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -101,13 +102,23 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque pmix_buffer_t *msg; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query non-blocking"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (0 == nqueries || NULL == queries) { return PMIX_ERR_BAD_PARAM; } @@ -127,12 +138,6 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque rc = PMIX_SUCCESS; } else { /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - cd = PMIX_NEW(pmix_query_caddy_t); cd->cbfunc = cbfunc; cd->cbdata = cbdata; @@ -157,7 +162,7 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query sending to server"); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){ PMIX_RELEASE(cd); } } @@ -240,7 +245,7 @@ PMIX_EXPORT pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t dire cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h index 55f3fac311f..715289f5038 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h @@ -165,6 +165,7 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); ch->timer_active = true; \ pmix_event_assign(&ch->ev, pmix_globals.evbase, -1, 0, \ pmix_event_timeout_cb, ch); \ + PMIX_POST_OBJECT(ch); \ pmix_event_add(&ch->ev, &pmix_globals.event_window); \ } else { \ /* add this peer to the array of sources */ \ @@ -183,6 +184,7 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); ch->ninfo = ninfo; \ /* reset the timer */ \ pmix_event_del(&ch->ev); \ + PMIX_POST_OBJECT(ch); \ pmix_event_add(&ch->ev, &pmix_globals.event_window); \ } \ } while(0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 426063dcef3..27b1ed78260 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -18,6 +18,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/error.h" #include "src/util/output.h" @@ -44,7 +45,22 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, { int rc; - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + + + if (PMIX_PROC_IS_SERVER) { rc = pmix_server_notify_client_of_event(status, source, range, info, ninfo, cbfunc, cbdata); @@ -103,10 +119,6 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_globals.myid.nspace, pmix_globals.myid.rank, PMIx_Error_string(status)); - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - if (PMIX_RANGE_PROC_LOCAL != range) { /* create the msg object */ msg = PMIX_NEW(pmix_buffer_t); @@ -175,12 +187,11 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, cd->source.rank = source->rank; } cd->range = range; - - /* check for directives */ - if (NULL != info) { + if (0 < chain->ninfo) { cd->ninfo = chain->ninfo; PMIX_INFO_CREATE(cd->info, cd->ninfo); - for (n=0; n < chain->ninfo; n++) { + /* need to copy the info */ + for (n=0; n < cd->ninfo; n++) { PMIX_INFO_XFER(&cd->info[n], &chain->info[n]); if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { cd->nondefault = true; @@ -205,6 +216,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, } } } + /* add to our cache */ rbout = pmix_ring_buffer_push(&pmix_globals.notifications, cd); /* if an older event was bumped, release it */ @@ -225,7 +237,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_output_verbose(2, pmix_globals.debug_output, "client: notifying server %s:%d - sending", pmix_globals.myid.nspace, pmix_globals.myid.rank); - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cb); @@ -254,6 +266,9 @@ static void progress_local_event_hdlr(pmix_status_t status, pmix_op_cbfunc_t cbfunc, void *thiscbdata, void *notification_cbdata) { + /* this may be in the host's thread, so we need to threadshift it + * before accessing our internal data */ + pmix_event_chain_t *chain = (pmix_event_chain_t*)notification_cbdata; size_t n, nsave, cnt; pmix_info_t *newinfo; @@ -285,6 +300,7 @@ static void progress_local_event_hdlr(pmix_status_t status, ++cnt; } } + /* save this handler's returned status */ if (NULL != chain->evhdlr->name) { (void)strncpy(newinfo[cnt].key, chain->evhdlr->name, PMIX_MAX_KEYLEN); @@ -768,6 +784,9 @@ static void _notify_client_event(int sd, short args, void *cbdata) size_t n; bool matched, holdcd; + /* need to acquire the object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix_server: _notify_error notifying clients of error %s", PMIx_Error_string(cd->status)); @@ -1056,6 +1075,9 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg) { pmix_event_chain_t *ch = (pmix_event_chain_t*)arg; + /* need to acquire the object from its originating thread */ + PMIX_ACQUIRE_OBJECT(ch); + ch->timer_active = false; /* remove it from the list */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 03767050182..21fcc381301 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -16,6 +16,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/error.h" #include "src/util/output.h" @@ -67,6 +68,7 @@ PMIX_CLASS_INSTANCE(pmix_rshift_caddy_t, pmix_object_t, rscon, rsdes); +static void check_cached_events(pmix_rshift_caddy_t *cd); static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) @@ -107,6 +109,11 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, if (NULL != cd && NULL != cd->evregcbfn) { cd->evregcbfn(ret, index, cd->cbdata); } + if (NULL != cd) { + /* check this event against anything in our cache */ + check_cached_events(cd); + } + /* release any info we brought along as they are * internally generated and not provided by the caller */ if (NULL!= rb->info) { @@ -148,6 +155,7 @@ static void reg_cbfunc(pmix_status_t status, void *cbdata) /* pass back our local index */ cd->evregcbfn(rc, index, cd->cbdata); } + /* release any info we brought along as they are * internally generated and not provided by the caller */ if (NULL!= rb->info) { @@ -197,7 +205,7 @@ static pmix_status_t _send_to_server(pmix_rshift_caddy_t *rcd) return rc; } } - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, regevents_cbfunc, rcd); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, regevents_cbfunc, rcd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); @@ -280,7 +288,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) /* if we are a client, and we haven't already registered a handler of this * type with our server, or if we have directives, then we need to notify * the server */ - if (PMIX_PROC_SERVER != pmix_globals.proc_type && + if (!PMIX_PROC_IS_SERVER && (need_register || 0 < pmix_list_get_size(xfer))) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: _add_hdlr sending to server"); @@ -301,7 +309,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) /* if we are a server and are registering for events, then we only contact * our host if we want environmental events */ - if (PMIX_PROC_SERVER == pmix_globals.proc_type && cd->enviro && + if (PMIX_PROC_IS_SERVER && cd->enviro && NULL != pmix_host_server.register_events) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: _add_hdlr registering with server"); @@ -325,24 +333,94 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) return PMIX_SUCCESS; } +static void check_cached_events(pmix_rshift_caddy_t *cd) +{ + size_t i, n; + pmix_notify_caddy_t *ncd; + bool found, matched; + pmix_event_chain_t *chain; + + for (i=0; i < (size_t)pmix_globals.notifications.size; i++) { + if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { + continue; + } + found = false; + if (NULL == cd->codes) { + /* they registered a default event handler - always matches */ + found = true; + } else { + for (n=0; n < cd->ncodes; n++) { + if (cd->codes[n] == ncd->status) { + found = true; + break; + } + } + } + if (found) { + /* if we were given specific targets, check if we are one */ + if (NULL != ncd->targets) { + matched = false; + for (n=0; n < ncd->ntargets; n++) { + if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == ncd->targets[n].rank || + pmix_globals.myid.rank == ncd->targets[n].rank) { + matched = true; + break; + } + } + if (!matched) { + /* do not notify this one */ + continue; + } + } + /* all matches - notify */ + chain = PMIX_NEW(pmix_event_chain_t); + chain->status = ncd->status; + (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + chain->source.rank = pmix_globals.myid.rank; + /* we already left space for evhandler name plus + * a callback object when we cached the notification */ + chain->ninfo = ncd->ninfo; + PMIX_INFO_CREATE(chain->info, chain->ninfo); + if (0 < cd->ninfo) { + /* need to copy the info */ + for (n=0; n < ncd->ninfo; n++) { + PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]); + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { + chain->nondefault = true; + } + } + } + /* we don't want this chain to propagate, so indicate it + * should only be run as a single-shot */ + chain->endchain = true; + /* now notify any matching registered callbacks we have */ + pmix_invoke_local_event_hdlr(chain); + } + } +} + static void reg_event_hdlr(int sd, short args, void *cbdata) { pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)cbdata; - size_t index = 0, n, i; + size_t index = 0, n; pmix_status_t rc; pmix_event_hdlr_t *evhdlr, *ev; uint8_t location = PMIX_EVENT_ORDER_NONE; char *name = NULL, *locator = NULL; bool firstoverall=false, lastoverall=false; - bool found, matched; + bool found; pmix_list_t xfer; pmix_info_caddy_t *ixfer; void *cbobject = NULL; pmix_data_range_t range = PMIX_RANGE_UNDEF; pmix_proc_t *parray = NULL; size_t nprocs; - pmix_notify_caddy_t *ncd; - pmix_event_chain_t *chain; + + /* need to acquire the object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cd); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register event_hdlr with %d infos", (int)cd->ninfo); @@ -677,63 +755,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } /* check if any matching notifications have been cached */ - for (i=0; i < (size_t)pmix_globals.notifications.size; i++) { - if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { - break; - } - found = false; - if (NULL == cd->codes) { - /* they registered a default event handler - always matches */ - found = true; - } else { - for (n=0; n < cd->ncodes; n++) { - if (cd->codes[n] == ncd->status) { - found = true; - break; - } - } - } - if (found) { - /* if we were given specific targets, check if we are one */ - if (NULL != ncd->targets) { - matched = false; - for (n=0; n < ncd->ntargets; n++) { - if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == ncd->targets[n].rank || - pmix_globals.myid.rank == ncd->targets[n].rank) { - matched = true; - break; - } - } - if (!matched) { - /* do not notify this one */ - continue; - } - } - /* all matches - notify */ - chain = PMIX_NEW(pmix_event_chain_t); - chain->status = ncd->status; - (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); - chain->source.rank = pmix_globals.myid.rank; - /* we already left space for evhandler name plus - * a callback object when we cached the notification */ - chain->ninfo = ncd->ninfo; - PMIX_INFO_CREATE(chain->info, chain->ninfo); - if (0 < cd->ninfo) { - /* need to copy the info */ - for (n=0; n < ncd->ninfo; n++) { - PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]); - } - } - /* we don't want this chain to propagate, so indicate it - * should only be run as a single-shot */ - chain->endchain = true; - /* now notify any matching registered callbacks we have */ - pmix_invoke_local_event_hdlr(chain); - } - } + check_cached_events(cd); /* all done */ PMIX_RELEASE(cd); @@ -747,6 +769,17 @@ PMIX_EXPORT void PMIx_Register_event_handler(pmix_status_t codes[], size_t ncode { pmix_rshift_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, 0, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to thread shift this request so we can access * our global data to register this *local* event handler */ cd = PMIX_NEW(pmix_rshift_caddy_t); @@ -775,9 +808,12 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) size_t n; pmix_active_code_t *active; + /* need to acquire the object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cd); + /* if I am not the server, then I need to notify the server * to remove my registration */ - if (PMIX_PROC_SERVER != pmix_globals.proc_type) { + if (!PMIX_PROC_IS_SERVER) { msg = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { PMIX_RELEASE(msg); @@ -922,7 +958,7 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) report: if (NULL != msg) { /* send to the server */ - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, NULL, NULL); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, NULL, NULL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } @@ -942,6 +978,16 @@ PMIX_EXPORT void PMIx_Deregister_event_handler(size_t event_hdlr_ref, { pmix_shift_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to thread shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->cbfunc.opcbfn = cbfunc; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index 5dfbcd4d72a..85882d3e2fa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -40,10 +40,17 @@ #include "src/buffer_ops/types.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" +#include "src/threads/threads.h" + +pmix_lock_t pmix_global_lock = { + .mutex = PMIX_MUTEX_STATIC_INIT, + .cond = PMIX_CONDITION_STATIC_INIT, + .active = false +}; static void cbcon(pmix_cb_t *p) { - p->active = false; + PMIX_CONSTRUCT_LOCK(&p->lock); p->checked = false; PMIX_CONSTRUCT(&p->data, pmix_buffer_t); p->cbfunc = NULL; @@ -63,6 +70,7 @@ static void cbcon(pmix_cb_t *p) } static void cbdes(pmix_cb_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); PMIX_DESTRUCT(&p->data); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cb_t, @@ -220,7 +228,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_rank_info_t, static void scon(pmix_shift_caddy_t *p) { - p->active = false; + PMIX_CONSTRUCT_LOCK(&p->lock); p->codes = NULL; p->ncodes = 0; p->nspace = NULL; @@ -242,6 +250,7 @@ static void scon(pmix_shift_caddy_t *p) } static void scdes(pmix_shift_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); if (NULL != p->kv) { PMIX_RELEASE(p->kv); } @@ -256,6 +265,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_info_caddy_t, static void qcon(pmix_query_caddy_t *p) { + PMIX_CONSTRUCT_LOCK(&p->lock); p->queries = NULL; p->nqueries = 0; p->targets = NULL; @@ -266,9 +276,13 @@ static void qcon(pmix_query_caddy_t *p) p->cbdata = NULL; p->relcbfunc = NULL; } -PMIX_CLASS_INSTANCE(pmix_query_caddy_t, - pmix_object_t, - qcon, NULL); +static void qdes(pmix_query_caddy_t *p) +{ + PMIX_DESTRUCT_LOCK(&p->lock); +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t, + pmix_object_t, + qcon, qdes); static void jdcon(pmix_job_data_caddy_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 0e5548f7336..5cf9886a5f2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -38,7 +38,7 @@ #include "src/class/pmix_list.h" #include "src/class/pmix_ring_buffer.h" #include "src/event/pmix_event.h" - +#include "src/threads/threads.h" #include "src/mca/psec/psec.h" #include "src/mca/ptl/ptl.h" @@ -214,7 +214,7 @@ PMIX_CLASS_DECLARATION(pmix_server_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; pmix_status_t status; pmix_query_t *queries; size_t nqueries; @@ -234,7 +234,7 @@ typedef struct { pmix_cmd_t type; pmix_proc_t *pcs; // copy of the original array of participants size_t npcs; // number of procs in the array - volatile bool active; // flag for waiting for completion + pmix_lock_t lock; // flag for waiting for completion bool def_complete; // all local procs have been registered and the trk definition is complete pmix_list_t ranks; // list of pmix_rank_info_t of the local participants pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants @@ -271,7 +271,7 @@ PMIX_CLASS_DECLARATION(pmix_job_data_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; pmix_status_t status; pmix_status_t *codes; size_t ncodes; @@ -305,7 +305,7 @@ PMIX_CLASS_DECLARATION(pmix_shift_caddy_t); typedef struct { pmix_list_item_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; bool checked; int status; pmix_status_t pstatus; @@ -340,9 +340,9 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); #define PMIX_THREADSHIFT(r, c) \ do { \ - (r)->active = true; \ pmix_event_assign(&((r)->ev), pmix_globals.evbase, \ -1, EV_WRITE, (c), (r)); \ + PMIX_POST_OBJECT((r)); \ pmix_event_active(&((r)->ev), EV_WRITE, 1); \ } while (0) @@ -352,8 +352,27 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); while ((a)) { \ usleep(10); \ } \ + PMIX_ACQUIRE_OBJECT((a)); \ } while (0) +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_lock_t lock; + pmix_status_t status; + pmix_proc_t source; + pmix_data_range_t range; + pmix_proc_t *targets; + size_t ntargets; + bool nondefault; + pmix_info_t *info; + size_t ninfo; + pmix_buffer_t *buf; + pmix_op_cbfunc_t cbfunc; + void *cbdata; +} pmix_notify_caddy_t; +PMIX_CLASS_DECLARATION(pmix_notify_caddy_t); + /**** GLOBAL STORAGE ****/ /* define a global construct that includes values that must be shared @@ -382,6 +401,7 @@ typedef struct { PMIX_EXPORT extern pmix_globals_t pmix_globals; +PMIX_EXPORT extern pmix_lock_t pmix_global_lock; END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h b/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h index 78fc3f49aeb..09e581028e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +20,8 @@ #ifndef PMIX_PREFETCH_H #define PMIX_PREFETCH_H +#include + #if PMIX_C_HAVE_BUILTIN_EXPECT #define PMIX_LIKELY(expression) __builtin_expect(!!(expression), 1) #define PMIX_UNLIKELY(expression) __builtin_expect(!!(expression), 0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c index 5280c640e12..e93bb88d039 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c @@ -158,6 +158,8 @@ static void add_tracker(int sd, short flags, void *cbdata) { file_tracker_t *ft = (file_tracker_t*)cbdata; + PMIX_ACQUIRE_OBJECT(fd); + /* add the tracker to our list */ pmix_list_append(&mca_psensor_file_component.trackers, &ft->super); @@ -221,6 +223,7 @@ static pmix_status_t start(pmix_peer_t *requestor, pmix_status_t error, /* need to push into our event base to add this to our trackers */ pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, EV_WRITE, add_tracker, ft); + PMIX_POST_OBJECT(ft); pmix_event_active(&ft->cdev, EV_WRITE, 1); return PMIX_SUCCESS; @@ -232,6 +235,8 @@ static void del_tracker(int sd, short flags, void *cbdata) file_caddy_t *cd = (file_caddy_t*)cbdata; file_tracker_t *ft, *ftnext; + PMIX_ACQUIRE_OBJECT(cd); + /* remove the tracker from our list */ PMIX_LIST_FOREACH_SAFE(ft, ftnext, &mca_psensor_file_component.trackers, file_tracker_t) { if (ft->requestor != cd->requestor) { @@ -258,6 +263,7 @@ static pmix_status_t stop(pmix_peer_t *requestor, char *id) /* need to push into our event base to add this to our trackers */ pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, EV_WRITE, del_tracker, cd); + PMIX_POST_OBJECT(cd); pmix_event_active(&cd->ev, EV_WRITE, 1); return PMIX_SUCCESS; @@ -277,6 +283,8 @@ static void file_sample(int sd, short args, void *cbdata) pmix_status_t rc; pmix_proc_t source; + PMIX_ACQUIRE_OBJECT(ft); + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sampling file %s", pmix_globals.myid.nspace, pmix_globals.myid.rank, diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c index 7445ceb8d89..3147cfd738d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c @@ -150,6 +150,8 @@ static void add_tracker(int sd, short flags, void *cbdata) { pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; + PMIX_ACQUIRE_OBJECT(ft); + /* add the tracker to our list */ pmix_list_append(&mca_psensor_heartbeat_component.trackers, &ft->super); @@ -203,6 +205,7 @@ static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error /* need to push into our event base to add this to our trackers */ pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, EV_WRITE, add_tracker, ft); + PMIX_POST_OBJECT(ft); pmix_event_active(&ft->cdev, EV_WRITE, 1); return PMIX_SUCCESS; @@ -213,6 +216,8 @@ static void del_tracker(int sd, short flags, void *cbdata) heartbeat_caddy_t *cd = (heartbeat_caddy_t*)cbdata; pmix_heartbeat_trkr_t *ft, *ftnext; + PMIX_ACQUIRE_OBJECT(cd); + /* remove the tracker from our list */ PMIX_LIST_FOREACH_SAFE(ft, ftnext, &mca_psensor_heartbeat_component.trackers, pmix_heartbeat_trkr_t) { if (ft->requestor != cd->requestor) { @@ -239,6 +244,7 @@ static pmix_status_t heartbeat_stop(pmix_peer_t *requestor, char *id) /* need to push into our event base to add this to our trackers */ pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, EV_WRITE, del_tracker, cd); + PMIX_POST_OBJECT(cd); pmix_event_active(&cd->ev, EV_WRITE, 1); return PMIX_SUCCESS; @@ -261,6 +267,8 @@ static void check_heartbeat(int fd, short dummy, void *cbdata) pmix_status_t rc; pmix_proc_t source; + PMIX_ACQUIRE_OBJECT(ft); + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sensor:check_heartbeat for proc %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, @@ -301,6 +309,8 @@ static void add_beat(int sd, short args, void *cbdata) pmix_psensor_beat_t *b = (pmix_psensor_beat_t*)cbdata; pmix_heartbeat_trkr_t *ft; + PMIX_ACQUIRE_OBJECT(b); + /* find this peer in our trackers */ PMIX_LIST_FOREACH(ft, &mca_psensor_heartbeat_component.trackers, pmix_heartbeat_trkr_t) { if (ft->requestor == b->peer) { @@ -326,5 +336,6 @@ void pmix_psensor_heartbeat_recv_beats(struct pmix_peer_t *peer, /* shift this to our thread for processing */ pmix_event_assign(&b->ev, pmix_psensor_base.evbase, -1, EV_WRITE, add_beat, b); + PMIX_POST_OBJECT(b); pmix_event_active(&b->ev, EV_WRITE, 1); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index c17029d46f8..fbcf19cb022 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -82,9 +82,11 @@ static pmix_status_t pmix_ptl_close(void) /* ensure the listen thread has been shut down */ pmix_ptl.stop_listening(); - if (0 <= pmix_client_globals.myserver.sd) { - CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd); - pmix_client_globals.myserver.sd = -1; + if (NULL != pmix_client_globals.myserver) { + if (0 <= pmix_client_globals.myserver->sd) { + CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd); + pmix_client_globals.myserver->sd = -1; + } } /* the components will cleanup when closed */ @@ -105,7 +107,6 @@ static pmix_status_t pmix_ptl_open(pmix_mca_base_open_flag_t flags) PMIX_CONSTRUCT(&pmix_ptl_globals.unexpected_msgs, pmix_list_t); pmix_ptl_globals.listen_thread_active = false; PMIX_CONSTRUCT(&pmix_ptl_globals.listeners, pmix_list_t); - pmix_client_globals.myserver.sd = -1; /* Open up all available components */ return pmix_mca_base_framework_components_open(&pmix_ptl_base_framework, flags); @@ -142,6 +143,7 @@ PMIX_CLASS_INSTANCE(pmix_ptl_send_t, static void rcon(pmix_ptl_recv_t *p) { + p->peer = NULL; memset(&p->hdr, 0, sizeof(pmix_ptl_hdr_t)); p->hdr.tag = UINT32_MAX; p->hdr.nbytes = 0; @@ -150,9 +152,15 @@ static void rcon(pmix_ptl_recv_t *p) p->rdptr = NULL; p->rdbytes = 0; } +static void rdes(pmix_ptl_recv_t *p) +{ + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } +} PMIX_CLASS_INSTANCE(pmix_ptl_recv_t, pmix_list_item_t, - rcon, NULL); + rcon, rdes); static void prcon(pmix_ptl_posted_recv_t *p) { @@ -167,13 +175,20 @@ PMIX_CLASS_INSTANCE(pmix_ptl_posted_recv_t, static void srcon(pmix_ptl_sr_t *p) { + p->peer = NULL; p->bfr = NULL; p->cbfunc = NULL; p->cbdata = NULL; } +static void srdes(pmix_ptl_sr_t *p) +{ + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } +} PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_ptl_sr_t, pmix_object_t, - srcon, NULL); + srcon, srdes); static void pccon(pmix_pending_connection_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_listener.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_listener.c index 901679ee307..94decd2e0b7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_listener.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_listener.c @@ -284,6 +284,8 @@ static void* listen_thread(void *obj) pmix_output_verbose(8, pmix_globals.debug_output, "listen_thread: new connection: (%d, %d)", pending_connection->sd, pmix_socket_errno); + /* post the object */ + PMIX_POST_OBJECT(pending_connection); /* activate the event */ pmix_event_active(&pending_connection->ev, EV_WRITE, 1); accepted_connections++; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c index cee50a0325a..5e70a07ac2e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,7 +70,7 @@ int pmix_ptl_base_select(void) PMIX_LIST_FOREACH(active, &pmix_ptl_globals.actives, pmix_ptl_base_active_t) { if (newactive->pri > active->pri) { pmix_list_insert_pos(&pmix_ptl_globals.actives, - (pmix_list_item_t*)active, &newactive->super); + &active->super, &newactive->super); inserted = true; break; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index 5301d8a0216..350c4d81bda 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -173,7 +173,7 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) PMIX_DESTRUCT(&buf); /* if I called finalize, then don't generate an event */ if (!pmix_globals.mypeer->finalized) { - PMIX_REPORT_EVENT(err, &pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete); + PMIX_REPORT_EVENT(err, pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete); } } } @@ -183,6 +183,7 @@ static pmix_status_t send_msg(int sd, pmix_ptl_send_t *msg) struct iovec iov[2]; int iov_count; ssize_t remain = msg->sdbytes, rc; + iov[0].iov_base = msg->sdptr; iov[0].iov_len = msg->sdbytes; if (!msg->hdr_sent && NULL != msg->data) { @@ -297,7 +298,7 @@ static pmix_status_t read_bytes(int sd, char **buf, size_t *remain) ptr += rc; } /* we read the full data block */ -exit: + exit: *buf = ptr; return ret; } @@ -312,8 +313,12 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) pmix_ptl_send_t *msg = peer->send_msg; pmix_status_t rc; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(peer); + pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg", + "%s:%d ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg", + pmix_globals.myid.nspace, pmix_globals.myid.rank, peer->info->nptr->nspace, peer->info->rank, (NULL == msg) ? UINT_MAX : ntohl(msg->hdr.tag), (NULL == msg) ? "NULL" : "NON-NULL"); @@ -332,14 +337,24 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) /* exit this event and let the event lib progress */ pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:send_handler RES BUSY OR WOULD BLOCK"); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else { + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d SEND ERROR %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + PMIx_Error_string(rc)); // report the error pmix_event_del(&peer->send_event); peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; lost_connection(peer, rc); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } @@ -358,6 +373,9 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) pmix_event_del(&peer->send_event); peer->send_ev_active = false; } + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); } /* @@ -374,8 +392,12 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) size_t nbytes; char *ptr; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(peer); + pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:recv:handler called with peer %s:%d", + "%s:%d ptl:base:recv:handler called with peer %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (NULL == peer) ? "NULL" : peer->info->nptr->nspace, (NULL == peer) ? PMIX_RANK_UNDEF : peer->info->rank); @@ -391,6 +413,7 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) pmix_output(0, "sptl:base:recv_handler: unable to allocate recv message\n"); goto err_close; } + PMIX_RETAIN(peer); peer->recv_msg->peer = peer; // provide a handle back to the peer object /* start by reading the header */ peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr; @@ -424,6 +447,11 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg->data = NULL; // make sure peer->recv_msg->rdptr = NULL; peer->recv_msg->rdbytes = 0; + /* post it for delivery */ + PMIX_ACTIVATE_POST_MSG(peer->recv_msg); + peer->recv_msg = NULL; + PMIX_POST_OBJECT(peer); + return; } else { pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:recv:handler allocate data region of size %lu", @@ -445,7 +473,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) * and let the caller know */ pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:msg_recv: peer closed connection"); + "ptl:base:msg_recv: peer %s:%d closed connection", + peer->info->nptr->nspace, peer->info->rank); goto err_close; } } @@ -458,29 +487,39 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) { /* we recvd all of the message */ pmix_output_verbose(2, pmix_globals.debug_output, - "RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d", + "%s:%d RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (int)peer->recv_msg->hdr.nbytes, peer->recv_msg->hdr.tag, peer->sd); /* post it for delivery */ PMIX_ACTIVATE_POST_MSG(peer->recv_msg); peer->recv_msg = NULL; + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else if (PMIX_ERR_RESOURCE_BUSY == rc || PMIX_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else { /* the remote peer closed the connection - report that condition * and let the caller know */ pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:msg_recv: peer closed connection"); + "%s:%d ptl:base:msg_recv: peer %s:%d closed connection", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + peer->info->nptr->nspace, peer->info->rank); goto err_close; } } /* success */ return; - err_close: + + err_close: /* stop all events */ if (peer->recv_ev_active) { pmix_event_del(&peer->recv_event); @@ -495,6 +534,9 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg = NULL; } lost_connection(peer, PMIX_ERR_UNREACH); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); } void pmix_ptl_base_send(int sd, short args, void *cbdata) @@ -502,10 +544,16 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) pmix_ptl_queue_t *queue = (pmix_ptl_queue_t*)cbdata; pmix_ptl_send_t *snd; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(queue); + if (NULL == queue->peer || queue->peer->sd < 0 || NULL == queue->peer->info || NULL == queue->peer->info->nptr) { /* this peer has lost connection */ PMIX_RELEASE(queue); + /* ensure we post the object before another thread + * picks it back up */ + PMIX_POST_OBJECT(queue); return; } @@ -533,10 +581,12 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) } /* ensure the send event is active */ if (!(queue->peer)->send_ev_active) { - pmix_event_add(&(queue->peer)->send_event, 0); (queue->peer)->send_ev_active = true; + PMIX_POST_OBJECT(queue->peer); + pmix_event_add(&(queue->peer)->send_event, 0); } PMIX_RELEASE(queue); + PMIX_POST_OBJECT(snd); } void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) @@ -546,9 +596,15 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) pmix_ptl_send_t *snd; uint32_t tag; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(ms); + if (ms->peer->sd < 0) { /* this peer's socket has been closed */ PMIX_RELEASE(ms); + /* ensure we post the object before another thread + * picks it back up */ + PMIX_POST_OBJECT(NULL); return; } @@ -565,6 +621,7 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) req->tag = tag; req->cbfunc = ms->cbfunc; req->cbdata = ms->cbdata; + pmix_output_verbose(5, pmix_globals.debug_output, "posting recv on tag %d", req->tag); /* add it to the list of recvs - we cannot have unexpected messages @@ -594,11 +651,13 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) } /* ensure the send event is active */ if (!ms->peer->send_ev_active) { - pmix_event_add(&ms->peer->send_event, 0); ms->peer->send_ev_active = true; + PMIX_POST_OBJECT(snd); + pmix_event_add(&ms->peer->send_event, 0); } /* cleanup */ PMIX_RELEASE(ms); + PMIX_POST_OBJECT(snd); } void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) @@ -607,8 +666,12 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) pmix_ptl_posted_recv_t *rcv; pmix_buffer_t buf; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(msg); + pmix_output_verbose(5, pmix_globals.debug_output, - "message received %d bytes for tag %u on socket %d", + "%s:%d message received %d bytes for tag %u on socket %d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (int)msg->hdr.nbytes, msg->hdr.tag, msg->sd); /* see if we have a waiting recv for this message */ @@ -628,7 +691,14 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; } msg->data = NULL; // protect the data region + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d EXECUTE CALLBACK for tag %u", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + msg->hdr.tag); rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata); + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d CALLBACK COMPLETE", + pmix_globals.myid.nspace, pmix_globals.myid.rank); PMIX_DESTRUCT(&buf); // free's the msg data } /* done with the recv if it is a dynamic tag */ @@ -653,4 +723,7 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) /* it is possible that someone may post a recv for this message * at some point, so we have to hold onto it */ pmix_list_append(&pmix_ptl_globals.unexpected_msgs, &msg->super); + /* ensure we post the modified object before another thread + * picks it back up */ + PMIX_POST_OBJECT(msg); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h index e5571c35dbe..55e617690aa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h @@ -145,6 +145,7 @@ PMIX_CLASS_DECLARATION(pmix_ptl_sr_t); typedef struct { pmix_object_t super; + volatile bool active; pmix_event_t ev; struct pmix_peer_t *peer; pmix_buffer_t *buf; @@ -205,6 +206,7 @@ PMIX_CLASS_DECLARATION(pmix_listener_t); __FILE__, __LINE__); \ pmix_event_assign(&((ms)->ev), pmix_globals.evbase, -1, \ EV_WRITE, pmix_ptl_base_process_msg, (ms)); \ + PMIX_POST_OBJECT(ms); \ pmix_event_active(&((ms)->ev), EV_WRITE, 1); \ } while (0) @@ -247,8 +249,9 @@ PMIX_CLASS_DECLARATION(pmix_listener_t); } \ /* ensure the send event is active */ \ if (!(p)->send_ev_active && 0 <= (p)->sd) { \ - pmix_event_add(&(p)->send_event, 0); \ (p)->send_ev_active = true; \ + PMIX_POST_OBJECT(snd); \ + pmix_event_add(&(p)->send_event, 0); \ } \ } while (0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c index 2a089d8457b..8c962c0fd52 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -123,7 +123,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, /* if I am a client, then we need to look for the appropriate * connection info in the environment */ - if (PMIX_PROC_CLIENT == pmix_globals.proc_type) { + if (PMIX_PROC_IS_CLIENT) { if (NULL == (evar = getenv("PMIX_SERVER_URI2"))) { /* not us */ return PMIX_ERR_NOT_SUPPORTED; @@ -149,12 +149,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } *p2 = '\0'; ++p2; - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, p, PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, p, PMIX_MAX_NSLEN); /* set the server rank */ - pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10); + pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10); /* save the URI, but do not overwrite what we may have received from * the info-key directives */ @@ -163,7 +163,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } pmix_argv_free(uri); - } else if (PMIX_PROC_TOOL == pmix_globals.proc_type) { + } else if (PMIX_PROC_IS_TOOL) { /* if we already have a URI, then look no further */ if (NULL == mca_ptl_tcp_component.super.uri) { /* we have to discover the connection info, @@ -208,19 +208,16 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, *p2 = '\0'; ++p2; /* set the server nspace */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, srvr, PMIX_MAX_NSLEN); - pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, srvr, PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10); /* now parse the uti itself */ mca_ptl_tcp_component.super.uri = strdup(p); free(srvr); } } - /* mark that we are the active module for this server */ - pmix_client_globals.myserver.compat.ptl = &pmix_ptl_tcp_module; - /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage)); if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) { @@ -285,7 +282,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, PMIX_ERROR_LOG(rc); return rc; } - pmix_client_globals.myserver.sd = sd; + pmix_client_globals.myserver->sd = sd; /* send our identity and any authentication credentials to the server */ if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) { @@ -310,21 +307,22 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ - pmix_event_assign(&pmix_client_globals.myserver.recv_event, + pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, - pmix_ptl_base_recv_handler, &pmix_client_globals.myserver); - pmix_event_add(&pmix_client_globals.myserver.recv_event, 0); - pmix_client_globals.myserver.recv_ev_active = true; + pmix_ptl_base_recv_handler, pmix_client_globals.myserver); + pmix_client_globals.myserver->recv_ev_active = true; + PMIX_POST_OBJECT(pmix_client_globals.myserver); + pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); /* setup send event */ - pmix_event_assign(&pmix_client_globals.myserver.send_event, + pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, - pmix_ptl_base_send_handler, &pmix_client_globals.myserver); - pmix_client_globals.myserver.send_ev_active = false; + pmix_ptl_base_send_handler, pmix_client_globals.myserver); + pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; } @@ -335,19 +333,19 @@ static pmix_status_t send_recv(struct pmix_peer_t *peer, void *cbdata) { pmix_ptl_sr_t *ms; + pmix_peer_t *pr = (pmix_peer_t*)peer; pmix_output_verbose(5, pmix_globals.debug_output, "[%s:%d] post send to server", __FILE__, __LINE__); ms = PMIX_NEW(pmix_ptl_sr_t); - ms->peer = peer; + PMIX_RETAIN(pr); + ms->peer = pr; ms->bfr = bfr; ms->cbfunc = cbfunc; ms->cbdata = cbdata; - pmix_event_assign(&ms->ev, pmix_globals.evbase, -1, - EV_WRITE, pmix_ptl_base_send_recv, ms); - pmix_event_active(&ms->ev, EV_WRITE, 1); + PMIX_THREADSHIFT(ms, pmix_ptl_base_send_recv); return PMIX_SUCCESS; } @@ -363,13 +361,10 @@ static pmix_status_t send_oneway(struct pmix_peer_t *peer, * peer's send queue */ q = PMIX_NEW(pmix_ptl_queue_t); PMIX_RETAIN(pr); - q->peer = peer; + q->peer = pr; q->buf = bfr; q->tag = tag; - pmix_event_assign(&q->ev, pmix_globals.evbase, -1, - EV_WRITE, pmix_ptl_base_send, q); - pmix_event_active(&q->ev, EV_WRITE, 1); - + PMIX_THREADSHIFT(q, pmix_ptl_base_send); return PMIX_SUCCESS; } @@ -406,7 +401,7 @@ static pmix_status_t send_connect_ack(int sd) * local PMIx server, if known. Now use that module to * get a credential, if the security system provides one. Not * every psec module will do so, thus we must first check */ - if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(&pmix_client_globals.myserver, + if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver, PMIX_PROTOCOL_V2, &cred, &len))) { return rc; } @@ -554,7 +549,7 @@ static pmix_status_t recv_connect_ack(int sd) if (PMIX_PROC_IS_CLIENT) { /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return rc; } } else if (PMIX_SUCCESS != reply) { @@ -591,16 +586,16 @@ static pmix_status_t recv_connect_ack(int sd) pmix_globals.myid.rank = 0; /* get the server's nspace and rank so we can send to it */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver.info->nptr->nspace, PMIX_MAX_NSLEN+1); - pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver.info->rank), sizeof(int)); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver->info->nptr->nspace, PMIX_MAX_NSLEN+1); + pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver->info->rank), sizeof(int)); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - pmix_client_globals.myserver.info->nptr->nspace, - pmix_client_globals.myserver.info->rank); + pmix_client_globals.myserver->info->nptr->nspace, + pmix_client_globals.myserver->info->rank); /* get the returned status from the security handshake */ pmix_ptl_base_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t)); @@ -610,7 +605,7 @@ static pmix_status_t recv_connect_ack(int sd) if (NULL == pmix_psec.client_handshake) { return PMIX_ERR_HANDSHAKE_FAILED; } - if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return reply; } /* if the handshake succeeded, then fall thru to the next step */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index b20f817c213..b85fdb23c23 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -687,6 +687,9 @@ static void connection_handler(int sd, short args, void *cbdata) pmix_rank_info_t *info; pmix_proc_t proc; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(pnd); + pmix_output_verbose(8, pmix_ptl_base_framework.framework_output, "ptl:tcp:connection_handler: new connection: %d", pnd->sd); @@ -717,7 +720,7 @@ static void connection_handler(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); return; } - if (PMIX_SUCCESS != pmix_ptl_base_recv_blocking(pnd->sd, msg, hdr.nbytes)) { + if (PMIX_SUCCESS != (rc = pmix_ptl_base_recv_blocking(pnd->sd, msg, hdr.nbytes))) { /* unable to complete the recv */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:connection_handler unable to complete recv of connect-ack with client ON SOCKET %d", @@ -972,7 +975,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* tell the client all is good */ u32 = htonl(PMIX_SUCCESS); - if (PMIX_SUCCESS != pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t))) { + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { PMIX_ERROR_LOG(rc); info->proc_cnt--; PMIX_RELEASE(info); @@ -1024,7 +1027,8 @@ static void connection_handler(int sd, short args, void *cbdata) error: /* send an error reply to the client */ - if (PMIX_SUCCESS != pmix_ptl_base_send_blocking(pnd->sd, (char*)&rc, sizeof(int))) { + u32 = htonl(rc); + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(int)))) { PMIX_ERROR_LOG(rc); CLOSE_THE_SOCKET(pnd->sd); } @@ -1042,6 +1046,9 @@ static void process_cbfunc(int sd, short args, void *cbdata) int rc; uint32_t u32; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(cd); + /* send this status so they don't hang */ u32 = ntohl(cd->status); if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c index 95d8342e05f..0a090bb51de 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c @@ -116,12 +116,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } /* set the server nspace */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, uri[0], PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, uri[0], PMIX_MAX_NSLEN); /* set the server rank */ - pmix_client_globals.myserver.info->rank = strtoull(uri[1], NULL, 10); + pmix_client_globals.myserver->info->rank = strtoull(uri[1], NULL, 10); /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage)); @@ -141,7 +141,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, PMIX_ERROR_LOG(rc); return rc; } - pmix_client_globals.myserver.sd = sd; + pmix_client_globals.myserver->sd = sd; /* send our identity and any authentication credentials to the server */ if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) { @@ -164,21 +164,21 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ - pmix_event_assign(&pmix_client_globals.myserver.recv_event, + pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, pmix_ptl_base_recv_handler, &pmix_client_globals.myserver); - pmix_event_add(&pmix_client_globals.myserver.recv_event, 0); - pmix_client_globals.myserver.recv_ev_active = true; + pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); + pmix_client_globals.myserver->recv_ev_active = true; /* setup send event */ - pmix_event_assign(&pmix_client_globals.myserver.send_event, + pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, &pmix_client_globals.myserver); - pmix_client_globals.myserver.send_ev_active = false; + pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; } @@ -199,9 +199,7 @@ static pmix_status_t send_recv(struct pmix_peer_t *peer, ms->bfr = bfr; ms->cbfunc = cbfunc; ms->cbdata = cbdata; - pmix_event_assign(&ms->ev, pmix_globals.evbase, -1, - EV_WRITE, pmix_ptl_base_send_recv, ms); - pmix_event_active(&ms->ev, EV_WRITE, 1); + PMIX_THREADSHIFT(ms, pmix_ptl_base_send_recv); return PMIX_SUCCESS; } @@ -220,9 +218,7 @@ static pmix_status_t send_oneway(struct pmix_peer_t *peer, q->peer = peer; q->buf = bfr; q->tag = tag; - pmix_event_assign(&q->ev, pmix_globals.evbase, -1, - EV_WRITE, pmix_ptl_base_send, q); - pmix_event_active(&q->ev, EV_WRITE, 1); + PMIX_THREADSHIFT(q, pmix_ptl_base_send); return PMIX_SUCCESS; } @@ -248,7 +244,7 @@ static pmix_status_t send_connect_ack(int sd) /* get a credential, if the security system provides one. Not * every SPC will do so, thus we must first check */ - if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(&pmix_client_globals.myserver, + if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver, PMIX_PROTOCOL_V1, &cred, &len))) { return rc; } @@ -335,7 +331,7 @@ static pmix_status_t recv_connect_ack(int sd) /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return rc; } } else if (PMIX_SUCCESS != reply) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c index ed302d77b0b..8f363be4272 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -412,6 +412,9 @@ static void connection_handler(int sd, short args, void *cbdata) pmix_proc_t proc; size_t len; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(pnd); + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "USOCK CONNECTION FROM PEER ON SOCKET %d", pnd->sd); diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c index efa32eaa6b3..df0af87c280 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c @@ -106,7 +106,6 @@ static void* progress_engine(pmix_object_t *obj) pmix_progress_tracker_t *trk = (pmix_progress_tracker_t*)t->t_arg; while (trk->ev_active) { - pmix_event_loop(trk->ev_base, PMIX_EVLOOP_ONCE); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index bcfe3a2c7e9..94bc36c4fe1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -141,9 +141,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, NULL }; - if (0 < pmix_globals.init_cntr) { - return PMIX_SUCCESS; - } + PMIX_ACQUIRE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server init called"); @@ -152,15 +150,18 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, * opens and initializes the required frameworks */ if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_SERVER, info, ninfo, NULL))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } if (0 != (rc = initialize_server_base(module))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } #endif /* PMIX_ENABLE_DSTORE */ @@ -174,7 +175,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) { pmix_show_help("help-pmix-server.txt", "listener-thread-start", true); - PMIx_server_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } @@ -204,7 +205,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, kv.key = NULL; kv.value = NULL; PMIX_DESTRUCT(&kv); - PMIx_server_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } } @@ -220,7 +221,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, /* get our available ptl modules */ ptl_mode = pmix_ptl.get_available_modules(); + /* just in case, assign our own default modules */ + if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + ++pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } @@ -230,11 +238,19 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) int i; pmix_peer_t *peer; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; + PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server finalize called"); @@ -298,6 +314,8 @@ static void _register_nspace(int sd, short args, void *cbdata) int32_t cnt; #endif + PMIX_ACQUIRE_OBJECT(caddy); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _register_nspace %s", cd->proc.nspace); @@ -498,6 +516,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + cd = PMIX_NEW(pmix_setup_caddy_t); (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); cd->nlocalprocs = nlocalprocs; @@ -521,6 +546,8 @@ static void _deregister_nspace(int sd, short args, void *cbdata) pmix_nspace_t *tmp; pmix_status_t rc = PMIX_SUCCESS; + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _deregister_nspace %s", cd->proc.nspace); @@ -558,6 +585,16 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[], "pmix:server deregister nspace %s", nspace); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + cd = PMIX_NEW(pmix_setup_caddy_t); (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); cd->opcbfunc = cbfunc; @@ -578,6 +615,8 @@ void pmix_server_execute_collective(int sd, short args, void *cbdata) pmix_rank_info_t *info; pmix_value_t *val; + PMIX_ACQUIRE_OBJECT(tcd); + /* we don't need to check for non-NULL APIs here as * that was already done when the tracker was created */ if (PMIX_FENCENB_CMD == trk->type) { @@ -659,6 +698,8 @@ static void _register_client(int sd, short args, void *cbdata) bool all_def; size_t i; + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _register_client for nspace %s rank %d", cd->proc.nspace, cd->proc.rank); @@ -772,6 +813,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server register client %s:%d", proc->nspace, proc->rank); @@ -797,6 +845,8 @@ static void _deregister_client(int sd, short args, void *cbdata) pmix_rank_info_t *info; pmix_nspace_t *nptr, *tmp; + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _deregister_client for nspace %s rank %d", cd->proc.nspace, cd->proc.rank); @@ -834,6 +884,16 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server deregister client %s:%d", proc->nspace, proc->rank); @@ -856,6 +916,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char * pmix_listener_t *lt; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server setup_fork for nspace %s rank %d", proc->nspace, proc->rank); @@ -910,6 +977,8 @@ static void _dmodex_req(int sd, short args, void *cbdata) pmix_dmdx_remote_t *dcd; pmix_status_t rc; + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "DMODX LOOKING FOR %s:%d", cd->proc.nspace, cd->proc.rank); @@ -932,7 +1001,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -944,8 +1013,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) /* execute the callback */ cd->cbfunc(PMIX_SUCCESS, data, sz, cd->cbdata); - cd->active = false; - + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -964,7 +1032,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -977,7 +1045,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -1000,7 +1068,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) if (NULL != data) { free(data); } - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); } PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, @@ -1009,6 +1077,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* protect against bozo */ if (NULL == cbfunc || NULL == proc) { return PMIX_ERR_BAD_PARAM; @@ -1028,7 +1103,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, * potential threading issues */ PMIX_THREADSHIFT(cd, _dmodex_req); - PMIX_WAIT_FOR_COMPLETION(cd->active); + PMIX_WAIT_THREAD(&cd->lock); PMIX_RELEASE(cd); return PMIX_SUCCESS; } @@ -1038,6 +1113,8 @@ static void _store_internal(int sd, short args, void *cbdata) pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; pmix_nspace_t *ns, *nsptr; + PMIX_ACQUIRE_OBJECT(cd); + ns = NULL; PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { if (0 == strncmp(cd->nspace, nsptr->nspace, PMIX_MAX_NSLEN)) { @@ -1051,7 +1128,9 @@ static void _store_internal(int sd, short args, void *cbdata) } else { cd->status = pmix_hash_store(&ns->internal, cd->rank, cd->kv); } - cd->active = false; + if (cd->lock.active) { + PMIX_WAKEUP_THREAD(&cd->lock); + } } PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, @@ -1060,6 +1139,13 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, pmix_shift_caddy_t *cd; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* setup to thread shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->nspace = proc->nspace; @@ -1075,12 +1161,8 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, return rc; } - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - PMIX_THREADSHIFT(cd, _store_internal); - PMIX_WAIT_FOR_COMPLETION(cd->active); - } else { - _store_internal(0, 0, cd); - } + PMIX_THREADSHIFT(cd, _store_internal); + PMIX_WAIT_THREAD(&cd->lock); rc = cd->status; PMIX_RELEASE(cd); @@ -1102,6 +1184,13 @@ PMIX_EXPORT pmix_status_t PMIx_generate_regex(const char *input, char **regexp) char **regexargs = NULL, *tmp, *tmp2; char *cptr; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* define the default */ *regexp = NULL; @@ -1327,6 +1416,13 @@ PMIX_EXPORT pmix_status_t PMIx_generate_ppn(const char *input, char **regexp) char *tmp, *tmp2; char *cptr; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* define the default */ *regexp = NULL; @@ -1453,6 +1549,8 @@ static void _setup_app(int sd, short args, void *cbdata) pmix_kval_t *kv; size_t n; + PMIX_ACQUIRE_OBJECT(cd); + PMIX_CONSTRUCT(&ilist, pmix_list_t); /* pass to the network libraries */ @@ -1507,6 +1605,13 @@ pmix_status_t PMIx_server_setup_application(const char nspace[], { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to threadshift this request */ cd = PMIX_NEW(pmix_setup_caddy_t); if (NULL == cd) { @@ -1529,6 +1634,8 @@ static void _setup_local_support(int sd, short args, void *cbdata) pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_status_t rc; + PMIX_ACQUIRE_OBJECT(cd); + /* pass to the network libraries */ rc = pmix_pnet.setup_local_network(cd->nspace, cd->info, cd->ninfo); @@ -1549,6 +1656,13 @@ pmix_status_t PMIx_server_setup_local_support(const char nspace[], { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to threadshift this request */ cd = PMIX_NEW(pmix_setup_caddy_t); if (NULL == cd) { @@ -1611,12 +1725,14 @@ static void _spcb(int sd, short args, void *cbdata) pmix_status_t rc; char *msg; + PMIX_ACQUIRE_OBJECT(cd); + /* setup the reply with the returned status */ reply = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &cd->status, 1, PMIX_STATUS))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cd->cd); - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); return; } if (PMIX_SUCCESS == cd->status) { @@ -1646,7 +1762,7 @@ static void _spcb(int sd, short args, void *cbdata) PMIX_SERVER_QUEUE_REPLY(cd->cd->peer, cd->cd->hdr.tag, reply); /* cleanup */ PMIX_RELEASE(cd->cd); - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); } static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata) @@ -1660,7 +1776,7 @@ static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata) cd->cd = (pmix_server_caddy_t*)cbdata;; PMIX_THREADSHIFT(cd, _spcb); - PMIX_WAIT_FOR_COMPLETION(cd->active); + PMIX_WAIT_THREAD(&cd->lock); PMIX_RELEASE(cd); } @@ -1715,6 +1831,8 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) int32_t cnt = 1; char byte; + PMIX_ACQUIRE_OBJECT(scd); + /* pass the blobs being returned */ PMIX_CONSTRUCT(&xfer, pmix_buffer_t); @@ -1978,6 +2096,8 @@ static void _cnct(int sd, short args, void *cbdata) pmix_nspace_t *nptr; pmix_buffer_t *job_info_ptr; + PMIX_ACQUIRE_OBJECT(scd); + /* setup the reply, starting with the returned status */ reply = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &scd->status, 1, PMIX_STATUS))) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c index 278176ad725..ab1915a4a06 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c @@ -63,6 +63,7 @@ extern pmix_server_module_t pmix_host_server; typedef struct { pmix_object_t super; pmix_event_t ev; + volatile bool active; pmix_status_t status; const char *data; size_t ndata; @@ -597,6 +598,8 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) pmix_nspace_t *ns, *nptr; pmix_status_t rc; + PMIX_ACQUIRE_OBJECT(caddy); + pmix_output_verbose(2, pmix_globals.debug_output, "[%s:%d] process dmdx reply from %s:%u", __FILE__, __LINE__, @@ -709,7 +712,5 @@ static void dmdx_cbfunc(pmix_status_t status, "[%s:%d] queue dmdx reply for %s:%u", __FILE__, __LINE__, caddy->lcd->proc.nspace, caddy->lcd->proc.rank); - pmix_event_assign(&caddy->ev, pmix_globals.evbase, -1, EV_WRITE, - _process_dmdx_reply, caddy); - pmix_event_active(&caddy->ev, EV_WRITE, 1); + PMIX_THREADSHIFT(caddy, _process_dmdx_reply); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 97fdd7cdfe9..5826c4b8870 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -1675,7 +1675,7 @@ static void tcon(pmix_server_trkr_t *t) { t->pcs = NULL; t->npcs = 0; - t->active = true; + PMIX_CONSTRUCT_LOCK(&t->lock); t->def_complete = false; PMIX_CONSTRUCT(&t->ranks, pmix_list_t); PMIX_CONSTRUCT(&t->local_cbs, pmix_list_t); @@ -1690,6 +1690,7 @@ static void tcon(pmix_server_trkr_t *t) } static void tdes(pmix_server_trkr_t *t) { + PMIX_DESTRUCT_LOCK(&t->lock); if (NULL != t->pcs) { free(t->pcs); } @@ -1725,7 +1726,7 @@ PMIX_CLASS_INSTANCE(pmix_snd_caddy_t, static void scadcon(pmix_setup_caddy_t *p) { memset(&p->proc, 0, sizeof(pmix_proc_t)); - p->active = true; + PMIX_CONSTRUCT_LOCK(&p->lock); p->nspace = NULL; p->server_object = NULL; p->nlocalprocs = 0; @@ -1738,6 +1739,7 @@ static void scadcon(pmix_setup_caddy_t *p) } static void scaddes(pmix_setup_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, pmix_object_t, @@ -1745,7 +1747,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, static void ncon(pmix_notify_caddy_t *p) { - p->active = true; + PMIX_CONSTRUCT_LOCK(&p->lock); memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1); p->source.rank = PMIX_RANK_UNDEF; p->range = PMIX_RANGE_UNDEF; @@ -1758,6 +1760,7 @@ static void ncon(pmix_notify_caddy_t *p) } static void ndes(pmix_notify_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h index f978e058b33..dac731d2242 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h @@ -19,6 +19,7 @@ #include #include #include +#include "src/threads/threads.h" #include "src/util/hash.h" typedef struct { @@ -31,7 +32,7 @@ PMIX_CLASS_DECLARATION(pmix_trkr_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; char *nspace; pmix_status_t status; pmix_proc_t proc; @@ -48,24 +49,6 @@ typedef struct { } pmix_setup_caddy_t; PMIX_CLASS_DECLARATION(pmix_setup_caddy_t); -typedef struct { - pmix_object_t super; - pmix_event_t ev; - volatile bool active; - pmix_status_t status; - pmix_proc_t source; - pmix_data_range_t range; - pmix_proc_t *targets; - size_t ntargets; - bool nondefault; - pmix_info_t *info; - size_t ninfo; - pmix_buffer_t *buf; - pmix_op_cbfunc_t cbfunc; - void *cbdata; -} pmix_notify_caddy_t; -PMIX_CLASS_DECLARATION(pmix_notify_caddy_t); - typedef struct { pmix_list_item_t super; pmix_setup_caddy_t *cd; diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include index ba93edb67ab..d0d41f1c577 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include @@ -25,7 +25,6 @@ # Source code files headers += \ - threads/condition.h \ threads/mutex.h \ threads/mutex_unix.h \ threads/threads.h \ @@ -34,7 +33,6 @@ headers += \ threads/thread_usage.h libpmix_la_SOURCES += \ - threads/condition.c \ threads/mutex.c \ threads/thread.c \ threads/wait_sync.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c deleted file mode 100644 index 13a9d3ab164..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "pmix_config.h" - -#include "src/threads/condition.h" - - -static void pmix_condition_construct(pmix_condition_t *c) -{ - c->c_waiting = 0; - c->c_signaled = 0; -} - - -static void pmix_condition_destruct(pmix_condition_t *c) -{ -} - -PMIX_CLASS_INSTANCE(pmix_condition_t, - pmix_object_t, - pmix_condition_construct, - pmix_condition_destruct); diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h deleted file mode 100644 index 7a18660d8f2..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef PMIX_CONDITION_SPINLOCK_H -#define PMIX_CONDITION_SPINLOCK_H - -#include "pmix_config.h" -#ifdef HAVE_SYS_TIME_H -#include -#endif -#include -#include - -#include "src/threads/mutex.h" - -BEGIN_C_DECLS - -struct pmix_condition_t { - pmix_object_t super; - volatile int c_waiting; - volatile int c_signaled; -}; -typedef struct pmix_condition_t pmix_condition_t; - -PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_condition_t); - - -static inline int pmix_condition_wait(pmix_condition_t *c, pmix_mutex_t *m) -{ - int rc = 0; - c->c_waiting++; - - if (c->c_signaled) { - c->c_waiting--; - return 0; - } - - c->c_signaled--; - c->c_waiting--; - return rc; -} - -static inline int pmix_condition_signal(pmix_condition_t *c) -{ - if (c->c_waiting) { - c->c_signaled++; - } - return 0; -} - -static inline int pmix_condition_broadcast(pmix_condition_t *c) -{ - c->c_signaled = c->c_waiting; - return 0; -} - -END_C_DECLS - -#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h b/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h index b861da61dd6..d66e594ead6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h @@ -35,7 +35,6 @@ #endif #include "mutex.h" -#include "condition.h" BEGIN_C_DECLS @@ -59,61 +58,138 @@ PMIX_EXPORT extern bool pmix_debug_threads; PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_thread_t); +#define pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t pmix_condition_t; +#define pmix_condition_broadcast(a) pthread_cond_broadcast(a) +#define pmix_condition_signal(a) pthread_cond_signal(a) +#define PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + +typedef struct { + pmix_mutex_t mutex; + pmix_condition_t cond; + volatile bool active; +} pmix_lock_t; + +#define PMIX_CONSTRUCT_LOCK(l) \ + do { \ + PMIX_CONSTRUCT(&(l)->mutex, pmix_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define PMIX_DESTRUCT_LOCK(l) \ + do { \ + PMIX_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + +#if PMIX_ENABLE_DEBUG +#define PMIX_ACQUIRE_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + if (pmix_debug_threads) { \ + pmix_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (pmix_debug_threads) { \ + pmix_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + (lck)->active = true; \ + } while(0) +#else +#define PMIX_ACQUIRE_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + (lck)->active = true; \ + } while(0) +#endif + + #if PMIX_ENABLE_DEBUG -#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \ - do { \ - PMIX_THREAD_LOCK((lck)); \ - if (pmix_debug_threads) { \ - pmix_output(0, "Waiting for thread %s:%d", \ - __FILE__, __LINE__); \ - } \ - while (*(act)) { \ - pmix_condition_wait((cnd), (lck)); \ - } \ - if (pmix_debug_threads) { \ - pmix_output(0, "Thread obtained %s:%d", \ - __FILE__, __LINE__); \ - } \ - *(act) = true; \ - } while(0); +#define PMIX_WAIT_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + if (pmix_debug_threads) { \ + pmix_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (pmix_debug_threads) { \ + pmix_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) #else -#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \ - do { \ - PMIX_THREAD_LOCK((lck)); \ - while (*(act)) { \ - pmix_condition_wait((cnd), (lck)); \ - } \ - *(act) = true; \ - } while(0); +#define PMIX_WAIT_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) #endif #if PMIX_ENABLE_DEBUG -#define PMIX_RELEASE_THREAD(lck, cnd, act) \ +#define PMIX_RELEASE_THREAD(lck) \ do { \ if (pmix_debug_threads) { \ pmix_output(0, "Releasing thread %s:%d", \ __FILE__, __LINE__); \ } \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - PMIX_THREAD_UNLOCK((lck)); \ - } while(0); + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) #else -#define PMIX_RELEASE_THREAD(lck, cnd, act) \ - do { \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - PMIX_THREAD_UNLOCK((lck)); \ - } while(0); +#define PMIX_RELEASE_THREAD(lck) \ + do { \ + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) #endif -#define PMIX_WAKEUP_THREAD(cnd, act) \ - do { \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - } while(0); +#define PMIX_WAKEUP_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) + + +/* provide a macro for forward-proofing the shifting + * of objects between threads - at some point, we + * may revamp our threading model */ + +/* post an object to another thread - for now, we + * only have a memory barrier */ +#define PMIX_POST_OBJECT(o) pmix_atomic_wmb() + +/* acquire an object from another thread - for now, + * we only have a memory barrier */ +#define PMIX_ACQUIRE_OBJECT(o) pmix_atomic_rmb() PMIX_EXPORT int pmix_thread_start(pmix_thread_t *); diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h index 50717a96d7e..4430912606d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h @@ -19,8 +19,9 @@ #if !defined(PMIX_THREADS_WAIT_SYNC_H) #define PMIX_THREADS_WAIT_SYNC_H +#include "src/include/prefetch.h" #include "src/atomics/sys/atomic.h" -#include "src/threads/condition.h" +#include "src/threads/threads.h" #include "src/util/error.h" #include diff --git a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c index 0f4dba4445a..196938a62bb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c @@ -158,18 +158,12 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_nspace_t *nptr, *nsptr; char hostname[PMIX_MAX_NSLEN]; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (NULL == proc) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_BAD_PARAM; } - - /* if we were given an nspace in the environment, then we - * must have been spawned by a PMIx server - so even though - * we technically will operate as a tool, we are actually - * a "client" of the PMIx server and should connect that way */ - if (NULL != getenv("PMIX_NAMESPACE")) { - return PMIx_Init(proc, info, ninfo); - } - if (0 < pmix_globals.init_cntr) { /* since we have been called before, the nspace and * rank should be known. So return them here if @@ -179,19 +173,30 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, proc->rank = pmix_globals.myid.rank; } ++pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } + /* if we were given an nspace in the environment, then we + * must have been spawned by a PMIx server - so even though + * we technically will operate as a tool, we are actually + * a "client" of the PMIx server and should connect that way */ + if (NULL != getenv("PMIX_NAMESPACE")) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIx_Init(proc, info, ninfo); + } + /* setup the runtime - this init's the globals, * opens and initializes the required frameworks */ if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_TOOL, info, ninfo, pmix_tool_notify_recv))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); - PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t); + pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: init called"); @@ -199,13 +204,15 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, /* select our psec module - we take the default as we cannot * do any better */ if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* the server will have to use the same */ - pmix_client_globals.myserver.compat.psec = pmix_globals.mypeer->compat.psec; + pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec; /* connect to the server - returns job info if successful */ - if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){ + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -228,6 +235,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, } } if (NULL == nsptr) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOT_FOUND; } @@ -239,6 +247,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(nsptr->nspace); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -251,6 +260,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.integer = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -263,6 +273,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -275,6 +286,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -287,6 +299,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup("0"); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -299,7 +312,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); - return rc; + PMIX_RELEASE_THREAD(&pmix_global_lock); } PMIX_RELEASE(kptr); // maintain accounting @@ -311,6 +324,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -323,6 +337,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -335,6 +350,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -348,6 +364,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -360,6 +377,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -372,6 +390,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -384,6 +403,8 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -395,6 +416,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -407,6 +429,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -425,6 +448,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(hostname); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -442,6 +466,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(hostname); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -455,24 +480,49 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup("0"); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } -/* callback for wait completion */ -static void wait_cbfunc(struct pmix_peer_t *pr, - pmix_ptl_hdr_t *hdr, - pmix_buffer_t *buf, void *cbdata) +typedef struct { + pmix_lock_t lock; + pmix_event_t ev; + bool active; +} pmix_tool_timeout_t; + +/* timer callback */ +static void fin_timeout(int sd, short args, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + pmix_tool_timeout_t *tev; + tev = (pmix_tool_timeout_t*)cbdata; pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:tool wait_cbfunc received"); + "pmix:tool finwait timeout fired"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} +/* callback for finalize completion */ +static void finwait_cbfunc(struct pmix_peer_t *pr, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_tool_timeout_t *tev; + tev = (pmix_tool_timeout_t*)cbdata; - *active = false; + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:tool finwait_cbfunc received"); + if (tev->active) { + tev->active = false; + pmix_event_del(&tev->ev); // stop the timer + PMIX_WAKEUP_THREAD(&tev->lock); + } } PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) @@ -480,13 +530,17 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FINALIZE_CMD; pmix_status_t rc; - volatile bool active; + pmix_tool_timeout_t tev; + struct timeval tv = {2, 0}; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; + PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool finalize called"); @@ -505,15 +559,25 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool sending finalize sync to server"); - /* send to the server */ - active = true;; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, - wait_cbfunc, (void*)&active))){ + /* setup a timer to protect ourselves should the server be unable + * to answer for some reason */ + PMIX_CONSTRUCT_LOCK(&tev.lock); + pmix_event_assign(&tev.ev, pmix_globals.evbase, -1, 0, + fin_timeout, &tev); + tev.active = true; + PMIX_POST_OBJECT(&tev); + pmix_event_add(&tev.ev, &tv); + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, + finwait_cbfunc, (void*)&tev))){ return rc; } /* wait for the ack to return */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(&tev.lock); + PMIX_DESTRUCT_LOCK(&tev.lock); + if (tev.active) { + pmix_event_del(&tev.ev); + } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool finalize sync received"); @@ -525,7 +589,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) (void)pmix_progress_thread_pause(NULL); } - PMIX_DESTRUCT(&pmix_client_globals.myserver); + PMIX_RELEASE(pmix_client_globals.myserver); PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); /* shutdown services */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c index ba479ab3351..d76a45ac4a3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/output.c b/opal/mca/pmix/pmix2x/pmix/src/util/output.c index d7d36a1e92b..4ff79d596ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/output.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -89,7 +89,7 @@ static void construct(pmix_object_t *stream); static int do_open(int output_id, pmix_output_stream_t * lds); static int open_file(int i); static void free_descriptor(int output_id); -static int make_string(char **no_newline_string, output_desc_t *ldi, +static int make_string(char **out, char **no_newline_string, output_desc_t *ldi, const char *format, va_list arglist); static int output(int output_id, const char *format, va_list arglist); @@ -111,8 +111,6 @@ int pmix_output_redirected_syslog_pri = 0; static bool initialized = false; static int default_stderr_fd = -1; static output_desc_t info[PMIX_OUTPUT_MAX_STREAMS]; -static char *temp_str = 0; -static size_t temp_str_len = 0; #if defined(HAVE_SYSLOG) static bool syslog_opened = false; #endif @@ -356,50 +354,6 @@ void pmix_output_vverbose(int level, int output_id, const char *format, } -/* - * Send a message to a string if the verbose level is high enough - */ -char *pmix_output_string(int level, int output_id, const char *format, ...) -{ - int rc; - char *ret = NULL; - - if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && - info[output_id].ldi_verbose_level >= level) { - va_list arglist; - va_start(arglist, format); - rc = make_string(&ret, &info[output_id], format, arglist); - va_end(arglist); - if (PMIX_SUCCESS != rc) { - ret = NULL; - } - } - - return ret; -} - - -/* - * Send a message to a string if the verbose level is high enough - */ -char *pmix_output_vstring(int level, int output_id, const char *format, - va_list arglist) -{ - int rc; - char *ret = NULL; - - if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && - info[output_id].ldi_verbose_level >= level) { - rc = make_string(&ret, &info[output_id], format, arglist); - if (PMIX_SUCCESS != rc) { - ret = NULL; - } - } - - return ret; -} - - /* * Set the verbosity level of a stream */ @@ -501,11 +455,6 @@ void pmix_output_finalize(void) free (output_prefix); free (output_dir); - if(NULL != temp_str) { - free(temp_str); - temp_str = NULL; - temp_str_len = 0; - } PMIX_DESTRUCT(&verbose); } } @@ -813,14 +762,15 @@ static void free_descriptor(int output_id) } -static int make_string(char **no_newline_string, output_desc_t *ldi, +static int make_string(char **out, char **no_newline_string, output_desc_t *ldi, const char *format, va_list arglist) { - size_t len, total_len; + size_t len, total_len, temp_str_len; bool want_newline = false; + char *temp_str; /* Make the formatted string */ - + *out = NULL; if (0 > vasprintf(no_newline_string, format, arglist)) { return PMIX_ERR_NOMEM; } @@ -844,16 +794,11 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, if (NULL != ldi->ldi_suffix) { total_len += strlen(ldi->ldi_suffix); } - if (temp_str_len < total_len + want_newline) { - if (NULL != temp_str) { - free(temp_str); - } - temp_str = (char *) malloc(total_len * 2); - if (NULL == temp_str) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - temp_str_len = total_len * 2; + temp_str = (char *) malloc(total_len * 2); + if (NULL == temp_str) { + return PMIX_ERR_OUT_OF_RESOURCE; } + temp_str_len = total_len * 2; if (NULL != ldi->ldi_prefix && NULL != ldi->ldi_suffix) { if (want_newline) { snprintf(temp_str, temp_str_len, "%s%s%s\n", @@ -885,7 +830,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, snprintf(temp_str, temp_str_len, "%s", *no_newline_string); } } - + *out = temp_str; return PMIX_SUCCESS; } @@ -897,7 +842,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, static int output(int output_id, const char *format, va_list arglist) { int rc = PMIX_SUCCESS; - char *str, *out = NULL; + char *str=NULL, *out = NULL; output_desc_t *ldi; /* Setup */ @@ -913,8 +858,8 @@ static int output(int output_id, const char *format, va_list arglist) ldi = &info[output_id]; /* Make the strings */ - if (PMIX_SUCCESS != (rc = make_string(&str, ldi, format, arglist))) { - return rc; + if (PMIX_SUCCESS != (rc = make_string(&out, &str, ldi, format, arglist))) { + goto cleanup; } /* Syslog output -- does not use the newline-appended string */ @@ -924,15 +869,11 @@ static int output(int output_id, const char *format, va_list arglist) } #endif - /* All others (stdout, stderr, file) use temp_str, potentially - with a newline appended */ - - out = temp_str; - /* stdout output */ if (ldi->ldi_stdout) { if (0 > write(fileno(stdout), out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } fflush(stdout); } @@ -942,7 +883,8 @@ static int output(int output_id, const char *format, va_list arglist) if (0 > write((-1 == default_stderr_fd) ? fileno(stderr) : default_stderr_fd, out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } fflush(stderr); } @@ -964,7 +906,8 @@ static int output(int output_id, const char *format, va_list arglist) "[WARNING: %d lines lost because the PMIx process session directory did\n not exist when pmix_output() was invoked]\n", ldi->ldi_file_num_lines_lost); if (0 > write(ldi->ldi_fd, buffer, (int)strlen(buffer))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } ldi->ldi_file_num_lines_lost = 0; if (out != buffer) { @@ -974,13 +917,22 @@ static int output(int output_id, const char *format, va_list arglist) } if (ldi->ldi_fd != -1) { if (0 > write(ldi->ldi_fd, out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } } } free(str); + str = NULL; } + cleanup: + if (NULL != str) { + free(str); + } + if (NULL != out) { + free(out); + } return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/output.h b/opal/mca/pmix/pmix2x/pmix/src/util/output.h index 52a452a175c..78bbcf119ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/output.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/output.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -423,29 +423,6 @@ struct pmix_output_stream_t { void pmix_output_vverbose(int verbose_level, int output_id, const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0); - /** - * Send output to a string if the verbosity level is high enough. - * - * @param output_id Stream id returned from pmix_output_open(). - * @param level Target verbosity level. - * @param format printf-style format string. - * @param varargs printf-style varargs list to fill the string - * specified by the format parameter. - * - * Exactly the same as pmix_output_verbose(), except the output it - * sent to a string instead of to the stream. If the verbose - * level is not high enough, NULL is returned. The caller is - * responsible for free()'ing the returned string. - */ - char *pmix_output_string(int verbose_level, int output_id, - const char *format, ...) __pmix_attribute_format__(__printf__, 3, 4); - - /** - * Same as pmix_output_string, but accepts a va_list form of varargs. - */ - char *pmix_output_vstring(int verbose_level, int output_id, - const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0); - /** * Set the verbosity level for a stream. * @@ -567,4 +544,3 @@ PMIX_CLASS_DECLARATION(pmix_output_stream_t); END_C_DECLS #endif /* PMIX_OUTPUT_H_ */ - diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c index 003c3437e09..df50881b5c9 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c @@ -73,6 +73,54 @@ static void opcbfunc(pmix_status_t status, void *cbdata) *active = false; } +/* this is an event notification function that we explicitly request + * be called when the PMIX_MODEL_DECLARED notification is issued. + * We could catch it in the general event notification function and test + * the status to see if the status matched, but it often is simpler + * to declare a use-specific notification callback point. In this case, + * we are asking to know whenever a model is declared as a means + * of testing server self-notification */ +static void model_callback(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + size_t n; + + /* just let us know it was received */ + fprintf(stderr, "%s:%d Model event handler called with status %d(%s)\n", + myproc.nspace, myproc.rank, status, PMIx_Error_string(status)); + for (n=0; n < ninfo; n++) { + if (PMIX_STRING == info[n].value.type) { + fprintf(stderr, "%s:%d\t%s:\t%s\n", + myproc.nspace, myproc.rank, + info[n].key, info[n].value.data.string); + } + } + + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } +} + +/* event handler registration is done asynchronously */ +static void model_registration_callback(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + fprintf(stderr, "simpclient EVENT HANDLER REGISTRATION RETURN STATUS %d, ref=%lu\n", + status, (unsigned long)evhandler_ref); + *active = false; +} + int main(int argc, char **argv) { int rc; @@ -84,7 +132,9 @@ int main(int argc, char **argv) int cnt, j; bool doabort = false; volatile bool active; - pmix_info_t info; + pmix_info_t info, *iptr; + size_t ninfo; + pmix_status_t code; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { @@ -92,12 +142,16 @@ int main(int argc, char **argv) } } - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { + /* init us and declare we are a test programming model */ + PMIX_INFO_CREATE(iptr, 2); + PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); + PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } + PMIX_INFO_FREE(iptr, 2); pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* test something */ @@ -110,6 +164,19 @@ int main(int argc, char **argv) } PMIX_VALUE_RELEASE(val); + /* register a handler specifically for when models declare */ + active = true; + ninfo = 1; + PMIX_INFO_CREATE(iptr, ninfo); + PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); + code = PMIX_MODEL_DECLARED; + PMIx_Register_event_handler(&code, 1, iptr, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (active) { + usleep(10); + } + PMIX_INFO_FREE(iptr, ninfo); + /* register our errhandler */ active = true; PMIx_Register_event_handler(NULL, 0, NULL, 0, diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c index 10b236a0c51..58b89804415 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c @@ -214,9 +214,10 @@ static void model_callback(size_t evhdlr_registration_id, size_t n; /* just let us know it was received */ - fprintf(stderr, "Model event handler called with status %d(%s)\n", status, PMIx_Error_string(status)); + fprintf(stderr, "SIMPTEST: Model event handler called with status %d(%s)\n", + status, PMIx_Error_string(status)); for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (PMIX_STRING == info[n].value.type) { fprintf(stderr, "\t%s:\t%s\n", info[n].key, info[n].value.data.string); } } diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 9cd36f1001e..d30cd1547a9 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -6,6 +6,8 @@ * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -117,15 +120,32 @@ const opal_pmix_base_module_t opal_pmix_pmix2x_module = { .register_jobid = pmix2x_register_jobid }; +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(op); + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + + static const char *pmix2x_get_nspace(opal_jobid_t jobid) { opal_pmix2x_jobid_trkr_t *jptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return jptr->nspace; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return NULL; } @@ -133,9 +153,12 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) { opal_pmix2x_jobid_trkr_t *jptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + /* if we don't already have it, add this to our jobid tracker */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } } @@ -143,6 +166,7 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); jptr->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } static void event_hdlr_complete(pmix_status_t status, void *cbdata) @@ -162,6 +186,7 @@ static void return_local_event_hdlr(int status, opal_list_t *results, pmix_status_t pstatus; size_t n; + OPAL_ACQUIRE_OBJECT(cd); if (NULL != cd->pmixcbfunc) { op = OBJ_NEW(pmix2x_opcaddy_t); @@ -196,38 +221,6 @@ static void return_local_event_hdlr(int status, opal_list_t *results, } } -static void _event_hdlr(int sd, short args, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_event_t *event; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status); - - /* cycle thru the registrations */ - OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - if (cd->id == event->index) { - /* found it - invoke the handler, pointing its - * callback function to our callback function */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - event->handler(cd->status, &cd->pname, - cd->info, &cd->results, - return_local_event_hdlr, (void*)cd); - return; - } - } - /* if we didn't find a match, we still have to call their final callback */ - if (NULL != cd->pmixcbfunc) { - cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata); - } - OPAL_LIST_RELEASE(cd->info); - OBJ_RELEASE(cd); - return; -} - /* this function will be called by the PMIx client library * whenever it receives notification of an event. The * notification can come from an ORTE daemon (when launched @@ -245,15 +238,14 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, int rc; opal_value_t *iptr; size_t n; - - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + opal_pmix2x_event_t *event; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s RECEIVED NOTIFICATION OF STATUS %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + cd = OBJ_NEW(pmix2x_threadshift_t); cd->id = evhdlr_registration_id; cd->pmixcbfunc = cbfunc; @@ -273,6 +265,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&cd->pname.jobid, source->nspace))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(cd); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } cd->pname.vpid = pmix2x_convert_rank(source->rank); @@ -307,10 +300,35 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, } } - /* now push it into the local thread */ - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _event_hdlr, cd); - event_active(&cd->ev, EV_WRITE, 1); + /* cycle thru the registrations */ + OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + if (evhdlr_registration_id == event->index) { + /* found it - invoke the handler, pointing its + * callback function to our callback function */ + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s _EVENT_HDLR CALLING EVHDLR", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL != event->handler) { + OBJ_RETAIN(event); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + event->handler(cd->status, &cd->pname, + cd->info, &cd->results, + return_local_event_hdlr, cd); + OBJ_RELEASE(event); + return; + } + } + } + + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* if we didn't find a match, we still have to call their final callback */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + OPAL_LIST_RELEASE(cd->info); + OBJ_RELEASE(cd); + return; } opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank) @@ -409,6 +427,9 @@ pmix_status_t pmix2x_convert_opalrc(int rc) case OPAL_ERR_PARTIAL_SUCCESS: return PMIX_QUERY_PARTIAL_SUCCESS; + case OPAL_ERR_MODEL_DECLARED: + return PMIX_MODEL_DECLARED; + case OPAL_ERROR: return PMIX_ERROR; case OPAL_SUCCESS: @@ -499,6 +520,10 @@ int pmix2x_convert_rc(pmix_status_t rc) case PMIX_MONITOR_FILE_ALERT: return OPAL_ERR_FILE_ALERT; + case PMIX_MODEL_DECLARED: + return OPAL_ERR_MODEL_DECLARED; + + case PMIX_ERROR: return OPAL_ERROR; case PMIX_SUCCESS: @@ -611,6 +636,20 @@ pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t persist) } } +char* pmix2x_convert_jobid(opal_jobid_t jobid) +{ + opal_pmix2x_jobid_trkr_t *jptr; + + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return jptr->nspace; + } + } + return NULL; +} + /**** RHC: NEED TO ADD SUPPORT FOR NEW PMIX DATA TYPES, INCLUDING **** CONVERSION OF PROC STATES ****/ @@ -977,6 +1016,7 @@ static void errreg_cbfunc (pmix_status_t status, { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); op->event->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu", @@ -987,39 +1027,47 @@ static void errreg_cbfunc (pmix_status_t status, OBJ_RELEASE(op); } -static void _reg_hdlr(int sd, short args, void *cbdata) +static void register_handler(opal_list_t *event_codes, + opal_list_t *info, + opal_pmix_notification_fn_t evhandler, + opal_pmix_evhandler_reg_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - pmix2x_opcaddy_t *op; - opal_value_t *kv; + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; size_t n; + opal_value_t *kv; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s REGISTER HANDLER CODES %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == cd->event_codes) ? "NULL" : "NON-NULL"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, 0, cbdata); + } + return; + } op = OBJ_NEW(pmix2x_opcaddy_t); - op->evregcbfunc = cd->cbfunc; - op->cbdata = cd->cbdata; + op->evregcbfunc = cbfunc; + op->cbdata = cbdata; /* convert the event codes */ - if (NULL != cd->event_codes) { - op->ncodes = opal_list_get_size(cd->event_codes); + if (NULL != event_codes) { + op->ncodes = opal_list_get_size(event_codes); op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t)); n=0; - OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) { + OPAL_LIST_FOREACH(kv, event_codes, opal_value_t) { op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer); + ++n; } } /* convert the list of info to an array of pmix_info_t */ - if (NULL != cd->info) { - op->ninfo = opal_list_get_size(cd->info); + if (NULL != info) { + op->ninfo = opal_list_get_size(info); if (0 < op->ninfo) { PMIX_INFO_CREATE(op->info, op->ninfo); n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&op->info[n].value, kv); ++n; @@ -1029,59 +1077,49 @@ static void _reg_hdlr(int sd, short args, void *cbdata) /* register the event */ op->event = OBJ_NEW(opal_pmix2x_event_t); - op->event->handler = cd->evhandler; + op->event->handler = evhandler; opal_list_append(&mca_pmix_pmix2x_component.events, &op->event->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIx_Register_event_handler(op->pcodes, op->ncodes, op->info, op->ninfo, pmix2x_event_hdlr, errreg_cbfunc, op); - - OBJ_RELEASE(cd); return; } -static void register_handler(opal_list_t *event_codes, - opal_list_t *info, - opal_pmix_notification_fn_t evhandler, - opal_pmix_evhandler_reg_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - - OPAL_PMIX_THREADSHIFT(event_codes, info, evhandler, _reg_hdlr, cbfunc, cbdata); - return; -} - -static void _dereg_hdlr(int sd, short args, void *cbdata) +static void deregister_handler(size_t evhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + pmix2x_opcaddy_t *op; opal_pmix2x_event_t *event; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } + /* look for this event */ OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - if (cd->handler == event->index) { + if (evhandler == event->index) { opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); OBJ_RELEASE(event); break; } } - /* tell the library to deregister this handler */ - PMIx_Deregister_event_handler(cd->handler, NULL, NULL); - /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(OPAL_SUCCESS, cd->cbdata); - } - OBJ_RELEASE(cd); -} + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); -static void deregister_handler(size_t evhandler, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_OP_THREADSHIFT(evhandler, _dereg_hdlr, cbfunc, cbdata); + op = OBJ_NEW(pmix2x_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + /* tell the library to deregister this handler */ + PMIx_Deregister_event_handler(evhandler, opcbfunc, op); return; } @@ -1094,55 +1132,56 @@ static void notify_complete(pmix_status_t status, void *cbdata) OBJ_RELEASE(op); } -static void _notify(int sd, short args, void *cbdata) +static int notify_event(int status, + const opal_process_name_t *source, + opal_pmix_data_range_t range, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata; pmix2x_opcaddy_t *op; opal_value_t *kv; pmix_proc_t p, *pptr; pmix_status_t pstatus; size_t n; - int rc=OPAL_SUCCESS; pmix_data_range_t prange; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } op = OBJ_NEW(pmix2x_opcaddy_t); /* convert the status */ - pstatus = pmix2x_convert_opalrc(cd->status); + pstatus = pmix2x_convert_opalrc(status); /* convert the source */ - if (NULL == cd->source) { + if (NULL == source) { pptr = NULL; } else { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { - job = jptr; - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(source->jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - if (NULL == job) { - rc = OPAL_ERR_NOT_FOUND; - goto release; - } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_opalrank(source->vpid); pptr = &p; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the range */ - prange = pmix2x_convert_opalrange(cd->range); + prange = pmix2x_convert_opalrange(range); /* convert the list of info */ - if (NULL != cd->info) { - op->ninfo = opal_list_get_size(cd->info); + if (NULL != info) { + op->ninfo = opal_list_get_size(info); if (0 < op->ninfo) { PMIX_INFO_CREATE(op->info, op->ninfo); n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&op->info[n].value, kv); ++n; @@ -1152,26 +1191,8 @@ static void _notify(int sd, short args, void *cbdata) /* ask the library to notify our clients */ pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op); - rc = pmix2x_convert_rc(pstatus); - release: - /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } - OBJ_RELEASE(cd); -} - -static int notify_event(int status, - const opal_process_name_t *source, - opal_pmix_data_range_t range, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata); - return OPAL_SUCCESS; + return pmix2x_convert_rc(pstatus); } static void relcbfunc(void *cbdata) @@ -1194,6 +1215,8 @@ static void infocbfunc(pmix_status_t status, opal_value_t *iptr; size_t n; + OPAL_ACQUIRE_OBJECT(cd); + /* convert the array of pmix_info_t to the list of info */ if (NULL != info) { results = OBJ_NEW(opal_list_t); @@ -1230,6 +1253,14 @@ static void pmix2x_query(opal_list_t *queries, pmix_status_t prc; opal_pmix_query_t *q; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ cd = OBJ_NEW(pmix2x_opcaddy_t); @@ -1280,16 +1311,6 @@ static void pmix2x_query(opal_list_t *queries, return; } -static void opcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; - - if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); - } - OBJ_RELEASE(op); -} - static void pmix2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { @@ -1299,6 +1320,14 @@ static void pmix2x_log(opal_list_t *info, pmix2x_opcaddy_t *cd; pmix_status_t prc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ cd = OBJ_NEW(pmix2x_opcaddy_t); @@ -1362,27 +1391,36 @@ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, static void evcon(opal_pmix2x_event_t *p) { + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); p->handler = NULL; p->cbdata = NULL; } +static void evdes(opal_pmix2x_event_t *p) +{ + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); +} OBJ_CLASS_INSTANCE(opal_pmix2x_event_t, opal_list_item_t, - evcon, NULL); + evcon, evdes); static void opcon(pmix2x_opcaddy_t *p) { memset(&p->p, 0, sizeof(pmix_proc_t)); + p->nspace = NULL; p->procs = NULL; p->nprocs = 0; + p->pdata = NULL; + p->npdata = 0; p->error_procs = NULL; p->nerror_procs = 0; p->info = NULL; p->ninfo = 0; p->apps = NULL; p->sz = 0; - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); p->codes = NULL; p->pcodes = NULL; + p->ncodes = 0; p->queries = NULL; p->nqueries = 0; p->event = NULL; @@ -1392,17 +1430,25 @@ static void opcon(pmix2x_opcaddy_t *p) p->lkcbfunc = NULL; p->spcbfunc = NULL; p->evregcbfunc = NULL; + p->qcbfunc = NULL; p->cbdata = NULL; } static void opdes(pmix2x_opcaddy_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->nspace) { + free(p->nspace); + } if (NULL != p->procs) { PMIX_PROC_FREE(p->procs, p->nprocs); } + if (NULL != p->pdata) { + PMIX_PDATA_FREE(p->pdata, p->npdata); + } if (NULL != p->error_procs) { PMIX_PROC_FREE(p->error_procs, p->nerror_procs); } - if (0 < p->ninfo) { + if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } if (NULL != p->apps) { @@ -1447,7 +1493,9 @@ OBJ_CLASS_INSTANCE(pmix2x_opalcaddy_t, static void tscon(pmix2x_threadshift_t *p) { - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); + p->msg = NULL; + p->strings = NULL; p->source = NULL; p->event_codes = NULL; p->info = NULL; @@ -1460,6 +1508,10 @@ static void tscon(pmix2x_threadshift_t *p) } static void tsdes(pmix2x_threadshift_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->strings) { + free(p->strings); + } OPAL_LIST_DESTRUCT(&p->results); } OBJ_CLASS_INSTANCE(pmix2x_threadshift_t, diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index 63506b19f1f..129802f2bed 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -1,9 +1,12 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +31,7 @@ #include "opal/mca/event/event.h" #include "opal/util/proc.h" -#include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/base/base.h" #include "pmix_server.h" #include "pmix_common.h" @@ -59,6 +62,7 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t); typedef struct { opal_list_item_t super; + opal_pmix_lock_t lock; size_t index; opal_pmix_notification_fn_t handler; void *cbdata; @@ -75,17 +79,21 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_dmx_trkr_t); typedef struct { opal_object_t super; + opal_event_t ev; pmix_status_t status; + char *nspace; pmix_proc_t p; pmix_proc_t *procs; size_t nprocs; + pmix_pdata_t *pdata; + size_t npdata; pmix_proc_t *error_procs; size_t nerror_procs; pmix_info_t *info; size_t ninfo; pmix_app_t *apps; size_t sz; - volatile bool active; + opal_pmix_lock_t lock; opal_list_t *codes; pmix_status_t *pcodes; size_t ncodes; @@ -124,7 +132,9 @@ OBJ_CLASS_DECLARATION(pmix2x_opalcaddy_t); typedef struct { opal_object_t super; opal_event_t ev; - volatile bool active; + opal_pmix_lock_t lock; + const char *msg; + char *strings; size_t id; int status; opal_process_name_t pname; @@ -133,6 +143,7 @@ typedef struct { opal_pmix_data_range_t range; bool nondefault; size_t handler; + opal_value_t *val; opal_list_t *event_codes; opal_list_t *info; opal_list_t results; @@ -140,6 +151,8 @@ typedef struct { opal_pmix_evhandler_reg_cbfunc_t cbfunc; opal_pmix_op_cbfunc_t opcbfunc; pmix_event_notification_cbfunc_fn_t pmixcbfunc; + opal_pmix_value_cbfunc_t valcbfunc; + opal_pmix_lookup_cbfunc_t lkcbfunc; void *cbdata; } pmix2x_threadshift_t; OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); @@ -151,9 +164,10 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->handler = (e); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) #define OPAL_PMIX_THREADSHIFT(e, i, eh, fn, cb, cd) \ @@ -165,9 +179,10 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->evhandler = (eh); \ _cd->cbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) #define OPAL_PMIX_NOTIFY_THREADSHIFT(s, sr, r, i, fn, cb, cd) \ @@ -180,13 +195,22 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->info = (i); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ + } while(0) + +#define OPAL_PMIX2X_THREADSHIFT(p, cb) \ + do { \ + opal_event_assign(&((p)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (cb), (p)); \ + OPAL_POST_OBJECT(p); \ + opal_event_active(&((p)->ev), EV_WRITE, 1); \ } while(0) /**** CLIENT FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_client_init(void); +OPAL_MODULE_DECLSPEC int pmix2x_client_init(opal_list_t *ilist); OPAL_MODULE_DECLSPEC int pmix2x_client_finalize(void); OPAL_MODULE_DECLSPEC int pmix2x_initialized(void); OPAL_MODULE_DECLSPEC int pmix2x_abort(int flag, const char *msg, @@ -290,6 +314,8 @@ OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv, OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir); +OPAL_MODULE_DECLSPEC char* pmix2x_convert_jobid(opal_jobid_t jobid); + END_C_DECLS #endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index d758c8f6e37..e4c73854101 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -27,6 +27,7 @@ #endif #include "opal/hash_string.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/proc.h" @@ -37,46 +38,80 @@ static pmix_proc_t my_proc; static char *dbgvalue=NULL; -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - } while (0) - - static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { opal_pmix2x_event_t *event = (opal_pmix2x_event_t*)cbdata; + OPAL_ACQUIRE_OBJECT(event); + event->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); + OPAL_POST_OBJECT(event); + OPAL_PMIX_WAKEUP_THREAD(&event->lock); } -int pmix2x_client_init(void) +int pmix2x_client_init(opal_list_t *ilist) { opal_process_name_t pname; pmix_status_t rc; int dbg; opal_pmix2x_jobid_trkr_t *job; opal_pmix2x_event_t *event; + pmix_info_t *pinfo; + size_t ninfo, n; + opal_value_t *ival; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client init"); - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } } - rc = PMIx_Init(&my_proc, NULL, 0); + /* convert the incoming list to info structs */ + if (NULL != ilist) { + ninfo = opal_list_get_size(ilist); + if (0 < ninfo) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, ival); + ++n; + } + } else { + pinfo = NULL; + ninfo = 0; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_Init(&my_proc, pinfo, ninfo); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, ninfo); + } if (PMIX_SUCCESS != rc) { return pmix2x_convert_rc(rc); } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + ++opal_pmix_base.initialized; + if (1 < opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } /* store our jobid and rank */ if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { @@ -99,38 +134,70 @@ int pmix2x_client_init(void) pname.vpid = pmix2x_convert_rank(my_proc.rank); opal_proc_set_name(&pname); + /* release the thread in case the event handler fires when + * registered */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* register the default event handler */ event = OBJ_NEW(opal_pmix2x_event_t); opal_list_append(&mca_pmix_pmix2x_component.events, &event->super); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-DEFAULT", PMIX_STRING); PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + PMIX_INFO_FREE(pinfo, 1); + return OPAL_SUCCESS; } +static void dereg_cbfunc(pmix_status_t st, void *cbdata) +{ + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); +} + int pmix2x_client_finalize(void) { pmix_status_t rc; - opal_pmix2x_event_t *event; + opal_pmix2x_event_t *event, *ev2; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client finalize"); - /* deregister all event handlers */ - OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - PMIx_Deregister_event_handler(event->index, NULL, NULL); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); + OBJ_RELEASE(event); + } } - /* the list will be destructed when the component is finalized */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Finalize(NULL, 0); + return pmix2x_convert_rc(rc); } int pmix2x_initialized(void) { + int init; + opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client initialized"); - return PMIx_Initialized(); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + init = opal_pmix_base.initialized; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + return init; } int pmix2x_abort(int flag, const char *msg, @@ -140,37 +207,35 @@ int pmix2x_abort(int flag, const char *msg, pmix_proc_t *parray=NULL; size_t n, cnt=0; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client abort"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } - /* call the library abort */ + /* call the library abort - this is a blocking call */ rc = PMIx_Abort(flag, msg, parray, cnt); /* release the array */ @@ -184,25 +249,21 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) pmix_value_t kv; pmix_status_t rc; pmix_proc_t p; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { return OPAL_ERR_NOT_FOUND; } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); p.rank = pmix2x_convert_opalrank(proc->vpid); } else { /* use our name */ @@ -213,6 +274,7 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) PMIX_VALUE_CONSTRUCT(&kv); pmix2x_value_load(&kv, val); + /* call the library - this is a blocking call */ rc = PMIx_Store_internal(&p, val->key, &kv); PMIX_VALUE_DESTRUCT(&kv); @@ -223,6 +285,13 @@ int pmix2x_commit(void) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_Commit(); return pmix2x_convert_rc(rc); } @@ -231,6 +300,7 @@ static void opcbfunc(pmix_status_t status, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } @@ -240,39 +310,39 @@ static void opcbfunc(pmix_status_t status, void *cbdata) int pmix2x_fence(opal_list_t *procs, int collect_data) { pmix_status_t rc; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; opal_namelist_t *ptr; + char *nsptr; + size_t cnt, n; + pmix_proc_t *parray = NULL; pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client fence"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (collect_data) { PMIX_INFO_CONSTRUCT(&info); (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); @@ -285,17 +355,15 @@ int pmix2x_fence(opal_list_t *procs, int collect_data) n = 0; } - /* call the library function */ rc = PMIx_Fence(parray, cnt, iptr, n); - - /* release the array */ - PMIX_PROC_FREE(parray, cnt); - if (NULL != iptr) { + if (collect_data) { PMIX_INFO_DESTRUCT(&info); } + if (NULL != parray) { + PMIX_PROC_FREE(parray, cnt); + } return pmix2x_convert_rc(rc); - } int pmix2x_fencenb(opal_list_t *procs, int collect_data, @@ -306,14 +374,16 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, size_t n, cnt=0; opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client fence_nb"); + "PMIx_client fencenb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the list of procs to an array * of pmix_proc_t */ @@ -321,36 +391,17 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } - - if (collect_data) { - PMIX_INFO_CONSTRUCT(&info); - (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); - info.value.type = PMIX_BOOL; - info.value.data.flag = true; - iptr = &info; - n = 1; - } else { - iptr = NULL; - n = 0; - } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); @@ -359,14 +410,15 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, op->procs = parray; op->nprocs = cnt; - /* call the library function */ - rc = PMIx_Fence_nb(parray, cnt, iptr, n, opcbfunc, op); - if (PMIX_SUCCESS != rc) { - OBJ_RELEASE(op); + if (collect_data) { + op->ninfo = 1; + PMIX_INFO_CREATE(op->info, op->ninfo); + PMIX_INFO_LOAD(&op->info[0], PMIX_COLLECT_DATA, NULL, PMIX_BOOL); } + /* call the library function */ + rc = PMIx_Fence_nb(op->procs, op->nprocs, op->info, op->ninfo, opcbfunc, op); return pmix2x_convert_rc(rc); - } int pmix2x_put(opal_pmix_scope_t opal_scope, @@ -379,6 +431,13 @@ int pmix2x_put(opal_pmix_scope_t opal_scope, opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client put"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIX_VALUE_CONSTRUCT(&kv); pmix2x_value_load(&kv, val); @@ -390,87 +449,81 @@ int pmix2x_put(opal_pmix_scope_t opal_scope, int pmix2x_get(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_value_t **val) { - int ret; - pmix_value_t *kv; pmix_status_t rc; - pmix_proc_t p, *pptr; - size_t ninfo, n; - pmix_info_t *pinfo; + pmix_proc_t p; + char *nsptr; + pmix_info_t *pinfo = NULL; + size_t sz = 0, n; opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_value_t *pval = NULL; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "%s PMIx_client get on proc %s key %s", + "%s pmix2x:client get on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); - /* prep default response */ - *val = NULL; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(proc->vpid); - pptr = &p; - } else { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { /* if they are asking for our jobid, then return it */ if (0 == strcmp(key, OPAL_PMIX_JOBID)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_UINT32; (*val)->data.uint32 = OPAL_PROC_MY_NAME.jobid; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; - } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_INT; (*val)->data.integer = pmix2x_convert_rank(my_proc.rank); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - pptr = NULL; } + *val = NULL; + + if (NULL == proc) { + (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_opalrank(proc->vpid); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { - ninfo = opal_list_get_size(info); - if (0 < ninfo) { - PMIX_INFO_CREATE(pinfo, ninfo); + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); n=0; OPAL_LIST_FOREACH(ival, info, opal_value_t) { (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&pinfo[n].value, ival); ++n; } - } else { - pinfo = NULL; } - } else { - pinfo = NULL; - ninfo = 0; } - /* pass the request down */ - rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); + rc = PMIx_Get(&p, key, pinfo, sz, &pval); if (PMIX_SUCCESS == rc) { - if (NULL == kv) { - ret = OPAL_SUCCESS; - } else { - *val = OBJ_NEW(opal_value_t); - ret = pmix2x_value_unload(*val, kv); - PMIX_VALUE_FREE(kv, 1); - } - } else { - ret = pmix2x_convert_rc(rc); + ival = OBJ_NEW(opal_value_t); + pmix2x_value_unload(ival, pval); + *val = ival; + PMIX_VALUE_FREE(pval, 1); } - PMIX_INFO_FREE(pinfo, ninfo); - return ret; + PMIX_INFO_FREE(pinfo, sz); + + return pmix2x_convert_rc(rc); } static void val_cbfunc(pmix_status_t status, @@ -480,6 +533,8 @@ static void val_cbfunc(pmix_status_t status, int rc; opal_value_t val, *v=NULL; + OPAL_ACQUIRE_OBJECT(op); + OBJ_CONSTRUCT(&val, opal_value_t); rc = pmix2x_convert_opalrc(status); if (PMIX_SUCCESS == status && NULL != kv) { rc = pmix2x_value_unload(&val, kv); @@ -489,6 +544,7 @@ static void val_cbfunc(pmix_status_t status, if (NULL != op->valcbfunc) { op->valcbfunc(rc, v, op->cbdata); } + OBJ_DESTRUCT(&val); OBJ_RELEASE(op); } @@ -497,52 +553,73 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { pmix2x_opcaddy_t *op; + opal_value_t *val; pmix_status_t rc; + char *nsptr; size_t n; - opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s PMIx_client get_nb on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { + /* if they are asking for our jobid, then return it */ + if (0 == strcmp(key, OPAL_PMIX_JOBID)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_UINT32; + val->data.uint32 = OPAL_PROC_MY_NAME.jobid; + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_INT; + val->data.integer = pmix2x_convert_rank(my_proc.rank); + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->valcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == proc) { + (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(op->p.nspace, nsptr, PMIX_MAX_NSLEN); op->p.rank = pmix2x_convert_opalrank(proc->vpid); - } else { - (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { op->sz = opal_list_get_size(info); if (0 < op->sz) { PMIX_INFO_CREATE(op->info, op->sz); n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, ival); + OPAL_LIST_FOREACH(val, info, opal_value_t) { + (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, val); ++n; } } @@ -567,6 +644,13 @@ int pmix2x_publish(opal_list_t *info) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } @@ -603,6 +687,13 @@ int pmix2x_publishnb(opal_list_t *info, opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish_nb"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } @@ -624,60 +715,58 @@ int pmix2x_publishnb(opal_list_t *info, } ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op); - if (0 < op->sz) { - PMIX_INFO_FREE(op->info, op->sz); - } return pmix2x_convert_rc(ret); } int pmix2x_lookup(opal_list_t *data, opal_list_t *info) { - pmix_pdata_t *pdata; - pmix_info_t *pinfo; - size_t sz, ninfo, n; - int rc; - pmix_status_t ret; opal_pmix_pdata_t *d; + pmix_pdata_t *pdata; + pmix_info_t *pinfo = NULL; + pmix_status_t rc; + size_t cnt, n, sz; opal_value_t *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_pmix2x_jobid_trkr_t *jptr, *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup"); + "pmix2x:client lookup"); - if (NULL == data) { - return OPAL_ERR_BAD_PARAM; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - sz = opal_list_get_size(data); - PMIX_PDATA_CREATE(pdata, sz); - n=0; + if (NULL == data || 0 == (cnt = opal_list_get_size(data))) { + return OPAL_ERR_BAD_PARAM; + } + PMIX_PDATA_CREATE(pdata, cnt); + n = 0; OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { - (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); + (void)strncpy(pdata[n].key, d->value.key, PMIX_MAX_KEYLEN); + ++n; } if (NULL != info) { - ninfo = opal_list_get_size(info); - PMIX_INFO_CREATE(pinfo, ninfo); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); - ++n; + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, iptr); + ++n; + } } - } else { - pinfo = NULL; - ninfo = 0; } - ret = PMIx_Lookup(pdata, sz, pinfo, ninfo); - PMIX_INFO_FREE(pinfo, ninfo); - - if (PMIX_SUCCESS == ret) { - /* transfer the data back */ + rc = PMIx_Lookup(pdata, cnt, pinfo, sz); + if (PMIX_SUCCESS == rc) { + /* load the answers back into the list */ n=0; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then @@ -703,17 +792,15 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } d->proc.vpid = pmix2x_convert_rank(pdata[n].proc.rank); - rc = pmix2x_value_unload(&d->value, &pdata[n].value); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - PMIX_PDATA_FREE(pdata, sz); - return OPAL_ERR_BAD_PARAM; - } - ++n; + pmix2x_value_unload(&d->value, &pdata[n].value); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - - return pmix2x_convert_rc(ret); + PMIX_PDATA_FREE(pdata, cnt); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, sz); + } + return pmix2x_convert_rc(rc); } static void lk_cbfunc(pmix_status_t status, @@ -727,17 +814,16 @@ static void lk_cbfunc(pmix_status_t status, size_t n; opal_pmix2x_jobid_trkr_t *job, *jptr; - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + OPAL_ACQUIRE_OBJECT(op); if (NULL == op->lkcbfunc) { OBJ_RELEASE(op); return; } - rc = pmix2x_convert_rc(status); + rc = pmix2x_convert_rc(op->status); if (OPAL_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OBJ_CONSTRUCT(&results, opal_list_t); for (n=0; n < ndata; n++) { d = OBJ_NEW(opal_pmix_pdata_t); @@ -771,12 +857,15 @@ static void lk_cbfunc(pmix_status_t status, if (OPAL_SUCCESS != rc) { rc = OPAL_ERR_BAD_PARAM; OPAL_ERROR_LOG(rc); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); goto release; } } r = &results; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } -release: + + release: /* execute the callback */ op->lkcbfunc(rc, r, op->cbdata); @@ -796,7 +885,14 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup_nb"); + "pmix2x:client lookup_nb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); @@ -815,7 +911,6 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, } } } - ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); return pmix2x_convert_rc(ret); @@ -828,6 +923,13 @@ int pmix2x_unpublish(char **keys, opal_list_t *info) pmix_info_t *pinfo; opal_value_t *iptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != info) { ninfo = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, ninfo); @@ -856,6 +958,13 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, opal_value_t *iptr; size_t n; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; @@ -881,21 +990,30 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) { - pmix_status_t ret; - pmix_info_t *pinfo = NULL; + pmix_status_t rc; + pmix_info_t *info = NULL; pmix_app_t *papps; - size_t napps, n, m, ninfo = 0; - char nspace[PMIX_MAX_NSLEN+1]; - opal_value_t *info; + size_t ninfo, napps, n, m; + opal_value_t *ival; opal_pmix_app_t *app; + char nspace[PMIX_MAX_NSLEN+1]; opal_pmix2x_jobid_trkr_t *job; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + *jobid = OPAL_JOBID_INVALID; + if (NULL != job_info && 0 < (ninfo = opal_list_get_size(job_info))) { - PMIX_INFO_CREATE(pinfo, ninfo); + PMIX_INFO_CREATE(info, ninfo); n=0; - OPAL_LIST_FOREACH(info, job_info, opal_value_t) { - (void)strncpy(pinfo[n].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, info); + OPAL_LIST_FOREACH(ival, job_info, opal_value_t) { + (void)strncpy(info[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&info[n].value, ival); ++n; } } @@ -905,23 +1023,28 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { papps[n].cmd = strdup(app->cmd); - papps[n].argv = opal_argv_copy(app->argv); - papps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + papps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + papps[n].env = opal_argv_copy(app->env); + } papps[n].maxprocs = app->maxprocs; if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo); m=0; - OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { - (void)strncpy(papps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&papps[n].info[m].value, info); + OPAL_LIST_FOREACH(ival, &app->info, opal_value_t) { + (void)strncpy(papps[n].info[m].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&papps[n].info[m].value, ival); ++m; } } ++n; } - ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); - if (PMIX_SUCCESS == ret) { + rc = PMIx_Spawn(info, ninfo, papps, napps, nspace); + if (PMIX_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -936,29 +1059,26 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = *jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - if (0 < ninfo) { - PMIX_INFO_FREE(pinfo, ninfo); - } - PMIX_APP_FREE(papps, napps); - - return pmix2x_convert_rc(ret); + return rc; } static void spcbfunc(pmix_status_t status, char *nspace, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; - int rc; - opal_jobid_t jobid=OPAL_JOBID_INVALID; opal_pmix2x_jobid_trkr_t *job; + opal_jobid_t jobid; + int rc; - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + OPAL_ACQUIRE_OBJECT(op); rc = pmix2x_convert_rc(status); if (PMIX_SUCCESS == status) { + /* this is in the PMIx local thread - need to protect + * the framework-level data */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -973,6 +1093,7 @@ static void spcbfunc(pmix_status_t status, (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } op->spcbfunc(rc, jobid, op->cbdata); @@ -988,6 +1109,13 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, opal_value_t *info; opal_pmix_app_t *app; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->spcbfunc = cbfunc; @@ -1008,8 +1136,12 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { op->apps[n].cmd = strdup(app->cmd); - op->apps[n].argv = opal_argv_copy(app->argv); - op->apps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + op->apps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + op->apps[n].env = opal_argv_copy(app->env); + } op->apps[n].maxprocs = app->maxprocs; if (0 < (op->apps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(op->apps[n].info, op->apps[n].ninfo); @@ -1030,43 +1162,45 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, int pmix2x_connect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client connect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - PMIX_PROC_FREE(parray, cnt); + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Connect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Connect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); return pmix2x_convert_rc(ret); } @@ -1075,80 +1209,96 @@ int pmix2x_connectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; - opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + opal_namelist_t *ptr; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client connect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Connect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } return pmix2x_convert_rc(ret); } int pmix2x_disconnect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client disconnect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Disconnect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Disconnect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); return pmix2x_convert_rc(ret); } @@ -1157,83 +1307,86 @@ int pmix2x_disconnectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; - opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + opal_namelist_t *ptr; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client disconnect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Disconnect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } return pmix2x_convert_rc(ret); } - -int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, +int pmix2x_resolve_peers(const char *nodename, + opal_jobid_t jobid, opal_list_t *procs) { + pmix_status_t ret; char *nspace; pmix_proc_t *array=NULL; size_t nprocs, n; opal_namelist_t *nm; - int rc; - pmix_status_t ret; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_pmix2x_jobid_trkr_t *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD == jobid) { - nspace = NULL; - } else { - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (OPAL_JOBID_WILDCARD != jobid) { + if (NULL == (nspace = pmix2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - nspace = job->nspace; + } else { + nspace = NULL; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs); - rc = pmix2x_convert_rc(ret); if (NULL != array && 0 < nprocs) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); opal_list_append(procs, &nm->super); @@ -1247,53 +1400,38 @@ int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, OPAL_HASH_JOBID(array[n].nspace, nm->name.jobid); } /* if we don't already have it, add this to our jobid tracker */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == nm->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == pmix2x_convert_jobid(nm->name.jobid)) { job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); - (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = jobid; + (void)strncpy(job->nspace, array[n].nspace, PMIX_MAX_NSLEN); + job->jobid = nm->name.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } nm->name.vpid = pmix2x_convert_rank(array[n].rank); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } PMIX_PROC_FREE(array, nprocs); - - return rc; + return pmix2x_convert_rc(ret); } int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist) { pmix_status_t ret; - char *nspace=NULL; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD != jobid) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - nspace = job->nspace; + if (NULL == (nsptr = pmix2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Resolve_nodes(nspace, nodelist); + ret = PMIx_Resolve_nodes(nsptr, nodelist); - return pmix2x_convert_rc(ret);; + return pmix2x_convert_rc(ret); } diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index 5094ef3c3bf..220893a2432 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -29,6 +29,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -142,6 +143,7 @@ static void opal_opcbfunc(int status, void *cbdata) { pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(opalcaddy); if (NULL != opalcaddy->opcbfunc) { opalcaddy->opcbfunc(pmix2x_convert_opalrc(status), opalcaddy->cbdata); } @@ -198,6 +200,10 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv opalcaddy->cbdata = cbdata; /* pass it up */ + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s FINALIZED", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); @@ -226,6 +232,11 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED ABORT", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -277,10 +288,12 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata /* if we were collecting all data, then check for any pending * dmodx requests that we cached and notify them that the * data has arrived */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); while (NULL != (dmdx = (opal_pmix2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_pmix2x_component.dmdx))) { dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL); OBJ_RELEASE(dmdx); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } else { OBJ_RELEASE(opalcaddy); } @@ -297,6 +310,9 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, opal_value_t *iptr; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s FENCE CALLED", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL == host_module || NULL == host_module->fence_nb) { return PMIX_ERR_NOT_SUPPORTED; } @@ -357,6 +373,11 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED DMODX", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; @@ -370,10 +391,12 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, * arrived - this will trigger the pmix server to tell the * client that the data is available */ if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); dmdx = OBJ_NEW(opal_pmix2x_dmx_trkr_t); dmdx->cbfunc = cbfunc; dmdx->cbdata = cbdata; opal_list_append(&mca_pmix_pmix2x_component.dmdx, &dmdx->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return PMIX_SUCCESS; } @@ -419,6 +442,11 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED PUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -495,6 +523,11 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED LOOKUP", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->lkupcbfunc = cbfunc; @@ -541,6 +574,11 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED UNPUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -765,6 +803,10 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, opal_value_t *oinfo; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s REGISTER EVENTS", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -793,6 +835,9 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, pmix_op_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s DEREGISTER EVENTS", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + return PMIX_ERR_NOT_SUPPORTED; } @@ -827,6 +872,11 @@ static pmix_status_t server_notify_event(pmix_status_t code, } src.vpid = pmix2x_convert_rank(source->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED NOTIFY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(src)); + /* ignore the range for now */ /* convert the info */ @@ -923,6 +973,11 @@ static pmix_status_t server_query(pmix_proc_t *proct, } requestor.vpid = pmix2x_convert_rank(proct->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED QUERY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(requestor)); + /* convert the queries */ for (n=0; n < nqueries; n++) { q = OBJ_NEW(opal_pmix_query_t); diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index e1195da202a..2a26e2cdb55 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -7,6 +7,8 @@ * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +32,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -49,48 +52,40 @@ extern pmix_server_module_t mymodule; extern opal_pmix_server_module_t *host_module; static char *dbgvalue=NULL; -static size_t errhdler_ref = 0; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - } while (0) static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; - errhdler_ref = errhandler_ref; + OPAL_ACQUIRE_OBJECT(ev); + ev->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); - *active = false; + OPAL_POST_OBJECT(ev); + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } static void opcbfunc(pmix_status_t status, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); + if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } - if (op->active) { - op->status = status; - op->active = false; - } else { - OBJ_RELEASE(op); - } + OBJ_RELEASE(op); } -static void op2cbfunc(pmix_status_t status, void *cbdata) +static void lkcbfunc(pmix_status_t status, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata; - *active = false; + OPAL_POST_OBJECT(lk); + OPAL_PMIX_WAKEUP_THREAD(lk); } int pmix2x_server_init(opal_pmix_server_module_t *module, @@ -101,13 +96,19 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, opal_value_t *kv; pmix_info_t *pinfo; size_t sz, n; - volatile bool active; + opal_pmix2x_event_t *event; opal_pmix2x_jobid_trkr_t *job; + opal_pmix_lock_t lk; - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } } + ++opal_pmix_base.initialized; /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -130,6 +131,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, (void)opal_snprintf_jobid(job->nspace, PMIX_MAX_NSLEN, OPAL_PROC_MY_NAME.jobid); job->jobid = OPAL_PROC_MY_NAME.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { PMIX_INFO_FREE(pinfo, sz); @@ -141,37 +143,53 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, host_module = module; /* register the default event handler */ - active = true; - PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + event = OBJ_NEW(opal_pmix2x_event_t); + opal_list_append(&mca_pmix_pmix2x_component.events, &event->super); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-SERVER-DEFAULT", PMIX_STRING); + PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + PMIX_INFO_FREE(pinfo, 1); /* as we might want to use some client-side functions, be sure * to register our own nspace */ + OPAL_PMIX_CONSTRUCT_LOCK(&lk); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL); - active = true; - PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, op2cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, lkcbfunc, (void*)&lk); + OPAL_PMIX_WAIT_THREAD(&lk); + OPAL_PMIX_DESTRUCT_LOCK(&lk); PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } -static void fincb(pmix_status_t status, void *cbdata) +static void dereg_cbfunc(pmix_status_t st, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; - *active = false; + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } int pmix2x_server_finalize(void) { pmix_status_t rc; - volatile bool active; - - /* deregister the default event handler */ - active = true; - PMIx_Deregister_event_handler(errhdler_ref, fincb, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + opal_pmix2x_event_t *event, *ev2; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); + OBJ_RELEASE(event); + } + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_server_finalize(); return pmix2x_convert_rc(rc); @@ -181,6 +199,13 @@ int pmix2x_server_gen_regex(const char *input, char **regex) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_regex(input, regex); return pmix2x_convert_rc(rc); } @@ -190,13 +215,23 @@ int pmix2x_server_gen_ppn(const char *input, char **ppn) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_ppn(input, ppn); return pmix2x_convert_rc(rc); } -static void _reg_nspace(int sd, short args, void *cbdata) +int pmix2x_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_value_t *kv, *k2; pmix_info_t *pinfo = NULL, *pmap; size_t sz, szmap, m, n; @@ -204,26 +239,31 @@ static void _reg_nspace(int sd, short args, void *cbdata) pmix_status_t rc; opal_list_t *pmapinfo; opal_pmix2x_jobid_trkr_t *job; - pmix2x_opcaddy_t op; + opal_pmix_lock_t lock; + int ret; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the jobid */ - (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, cd->jobid); + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); /* store this job in our list of known nspaces */ job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = cd->jobid; + job->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ - if (NULL != cd->info) { - sz = opal_list_get_size(cd->info); + if (NULL != info) { + sz = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, sz); n = 0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) { pinfo[n].value.type = PMIX_DATA_ARRAY; @@ -253,110 +293,63 @@ static void _reg_nspace(int sd, short args, void *cbdata) pinfo = NULL; } - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; - rc = PMIx_server_register_nspace(nspace, cd->status, pinfo, sz, - opcbfunc, (void*)&op); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz, + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - } else { - op.status = rc; - } - /* ensure we execute the cbfunc so the caller doesn't hang */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(op.status), cd->cbdata); + OPAL_PMIX_WAIT_THREAD(&lock); } + OPAL_PMIX_DESTRUCT_LOCK(&lock); + if (NULL != pinfo) { PMIX_INFO_FREE(pinfo, sz); } - OBJ_DESTRUCT(&op); - OBJ_RELEASE(cd); -} - -int pmix2x_server_register_nspace(opal_jobid_t jobid, - int nlocalprocs, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->status = nlocalprocs; - cd->info = info; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - /* if the cbfunc is NULL, then the caller is in an event - * and we can directly call the processing function */ - if (NULL == cbfunc) { - _reg_nspace(0, 0, cd); - } else { - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _reg_nspace, cd); - event_active(&cd->ev, EV_WRITE, 1); - } - - return OPAL_SUCCESS; -} -static void tdcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + ret = pmix2x_convert_rc(rc); - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(status), cd->cbdata); - } - if (cd->active) { - cd->active = false; - } else { - OBJ_RELEASE(cd); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(ret, cbdata); } + return ret; } -static void _dereg_nspace(int sd, short args, void *cbdata) +void pmix2x_server_deregister_nspace(opal_jobid_t jobid, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_jobid_trkr_t *jptr; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->jobid) { + if (jptr->jobid == jobid) { /* found it - tell the server to deregister */ - cd->active = true; - PMIx_server_deregister_nspace(jptr->nspace, tdcbfunc, cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); - OBJ_RELEASE(cd); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_nspace(jptr->nspace, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); /* now get rid of it from our list */ opal_list_remove_item(&mca_pmix_pmix2x_component.jobids, &jptr->super); OBJ_RELEASE(jptr); - return; + break; } } - /* must release the caller */ - tdcbfunc(PMIX_ERR_NOT_FOUND, cd); -} -void pmix2x_server_deregister_nspace(opal_jobid_t jobid, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_nspace(0, 0, cd); - } else { - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_nspace, cd); - event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } @@ -368,65 +361,64 @@ int pmix2x_server_register_client(const opal_process_name_t *proc, { pmix_status_t rc; pmix_proc_t p; - pmix2x_opcaddy_t op; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); p.rank = pmix2x_convert_opalrank(proc->vpid); - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; + OPAL_PMIX_CONSTRUCT_LOCK(&lock); rc = PMIx_server_register_client(&p, uid, gid, server_object, - opcbfunc, (void*)&op); + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - rc = op.status; + OPAL_PMIX_WAIT_THREAD(&lock); } - OBJ_DESTRUCT(&op); + OPAL_PMIX_DESTRUCT_LOCK(&lock); return pmix2x_convert_rc(rc); } -static void _dereg_client(int sd, short args, void *cbdata) +/* tell the local PMIx server to cleanup this client as it is + * done executing */ +void pmix2x_server_deregister_client(const opal_process_name_t *proc, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_jobid_trkr_t *jptr; pmix_proc_t p; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { + if (jptr->jobid == proc->jobid) { /* found it - tell the server to deregister */ (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); - cd->active = true; - PMIx_server_deregister_client(&p, tdcbfunc, (void*)cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); + p.rank = pmix2x_convert_opalrank(proc->vpid); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_client(&p, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); break; } } - OBJ_RELEASE(cd); -} - -/* tell the local PMIx server to cleanup this client as it is - * done executing */ -void pmix2x_server_deregister_client(const opal_process_name_t *proc, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->source = proc; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_client(0, 0, cd); - } else { - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_client, cd); - event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } @@ -436,6 +428,13 @@ int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env) pmix_status_t rc; pmix_proc_t p; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); p.rank = pmix2x_convert_opalrank(proc->vpid); @@ -466,6 +465,13 @@ int pmix2x_server_dmodex(const opal_process_name_t *proc, pmix2x_opcaddy_t *op; pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* setup the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->mdxcbfunc = cbfunc; @@ -495,6 +501,13 @@ int pmix2x_server_notify_event(int status, pmix_status_t rc; pmix2x_opcaddy_t *op; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the list to an array of pmix_info_t */ if (NULL != info) { sz = opal_list_get_size(info); diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index 113ea02c330..1b8651fc3d8 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -67,7 +67,10 @@ BEGIN_C_DECLS /* identification attributes */ #define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id #define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id - +#define OPAL_PMIX_PROGRAMMING_MODEL "pmix.pgm.model" // (char*) programming model being initialized (e.g., "MPI" or "OpenMP") +#define OPAL_PMIX_MODEL_LIBRARY_NAME "pmix.mdl.name" // (char*) programming model implementation ID (e.g., "OpenMPI" or "MPICH") +#define OPAL_PMIX_MODEL_LIBRARY_VERSION "pmix.mld.vrs" // (char*) programming model version string (e.g., "2.1.1") +#define OPAL_PMIX_THREADING_MODEL "pmix.threads" // (char*) threading model used (e.g., "pthreads") /* attributes for the rendezvous socket */ #define OPAL_PMIX_USOCK_DISABLE "pmix.usock.disable" // (bool) disable legacy usock support diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index b04b247b3cf..f68b427f716 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -31,7 +31,7 @@ #include "opal/mca/pmix/base/pmix_base_hash.h" #include "pmix_s1.h" -static int s1_init(void); +static int s1_init(opal_list_t *ilist); static int s1_fini(void); static int s1_initialized(void); static int s1_abort(int flag, const char msg[], @@ -141,7 +141,7 @@ static int kvs_put(const char key[], const char value[]) return rc; } -static int s1_init(void) +static int s1_init(opal_list_t *ilist) { PMI_BOOL initialized; int spawned; @@ -155,6 +155,10 @@ static int s1_init(void) char **localranks=NULL; opal_process_name_t wildcard_rank; + if (0 < pmix_init_count) { + return OPAL_SUCCESS; + } + if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { OPAL_PMI_ERROR(rc, "PMI_Initialized"); return OPAL_ERROR; diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index 130dedac5bb..02d3beceb44 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -36,7 +36,7 @@ #include "opal/mca/pmix/base/pmix_base_hash.h" #include "pmix_s2.h" -static int s2_init(void); +static int s2_init(opal_list_t *ilist); static int s2_fini(void); static int s2_initialized(void); static int s2_abort(int flag, const char msg[], @@ -158,7 +158,7 @@ static int kvs_get(const char key[], char value [], int maxvalue) return OPAL_SUCCESS; } -static int s2_init(void) +static int s2_init(opal_list_t *ilist) { int spawned, size, rank, appnum; int rc, ret = OPAL_ERROR; @@ -174,6 +174,10 @@ static int s2_init(void) char nmtmp[64]; opal_process_name_t wildcard_rank; + if (0 < pmix_init_count) { + return OPAL_SUCCESS; + } + /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { return OPAL_SUCCESS; diff --git a/opal/threads/threads.h b/opal/threads/threads.h index 91aa031e908..661d6b00ee0 100644 --- a/opal/threads/threads.h +++ b/opal/threads/threads.h @@ -13,6 +13,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -114,6 +115,19 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_thread_t); opal_condition_broadcast((cnd)); \ } while(0); +/* provide a macro for forward-proofing the shifting + * of objects between libevent threads - at some point, we + * may revamp that threading model */ + +/* post an object to another thread - for now, we + * only have a memory barrier */ +#define OPAL_POST_OBJECT(o) opal_atomic_wmb() + +/* acquire an object from another thread - for now, + * we only have a memory barrier */ +#define OPAL_ACQUIRE_OBJECT(o) opal_atomic_rmb() + + OPAL_DECLSPEC int opal_thread_start(opal_thread_t *); OPAL_DECLSPEC int opal_thread_join(opal_thread_t *, void **thread_return); diff --git a/orte/include/orte/constants.h b/orte/include/orte/constants.h index 89b23e86fbb..de6c3cbb212 100644 --- a/orte/include/orte/constants.h +++ b/orte/include/orte/constants.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -142,20 +142,12 @@ enum { ORTE_ERR_ALLOCATION_PENDING = (ORTE_ERR_BASE - 43), ORTE_ERR_NO_PATH_TO_TARGET = (ORTE_ERR_BASE - 44), ORTE_ERR_OP_IN_PROGRESS = (ORTE_ERR_BASE - 45), - ORTE_ERR_OPEN_CHANNEL_PEER_FAIL = (ORTE_ERR_BASE - 46), - ORTE_ERR_OPEN_CHANNEL_PEER_REJECT = (ORTE_ERR_BASE - 47), - ORTE_ERR_QOS_TYPE_UNSUPPORTED = (ORTE_ERR_BASE - 48), - ORTE_ERR_QOS_ACK_WINDOW_FULL = (ORTE_ERR_BASE - 49), - ORTE_ERR_ACK_TIMEOUT_SENDER = (ORTE_ERR_BASE - 50), - ORTE_ERR_ACK_TIMEOUT_RECEIVER = (ORTE_ERR_BASE - 51), - ORTE_ERR_LOST_MSG_IN_WINDOW = (ORTE_ERR_BASE - 52), - ORTE_ERR_CHANNEL_BUSY = (ORTE_ERR_BASE - 53), - ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 54), - ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 55), - ORTE_ERR_OPEN_CHANNEL_DUPLICATE = (ORTE_ERR_BASE - 56), - ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 57), - ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 58), - ORTE_ERR_CONDUIT_SEND_FAIL = (ORTE_ERR_BASE - 59) + ORTE_ERR_OPEN_CONDUIT_FAIL = (ORTE_ERR_BASE - 46), + ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 47), + ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 48), + ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 49), + ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 50), + ORTE_ERR_CONDUIT_SEND_FAIL = (ORTE_ERR_BASE - 51) }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) @@ -163,4 +155,3 @@ enum { END_C_DECLS #endif /* ORTE_CONSTANTS_H */ - diff --git a/orte/mca/dfs/app/dfs_app.c b/orte/mca/dfs/app/dfs_app.c index 560c9c4e331..33676f5095a 100644 --- a/orte/mca/dfs/app/dfs_app.c +++ b/orte/mca/dfs/app/dfs_app.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -33,6 +33,7 @@ #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -507,6 +508,8 @@ static void process_opens(int fd, short args, void *cbdata) opal_list_t lt; opal_namelist_t *nm; + ORTE_ACQUIRE_OBJECT(dfs); + /* get the scheme to determine if we can process locally or not */ if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); @@ -661,7 +664,7 @@ static void dfs_open(char *uri, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_opens); + ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); } static void process_close(int fd, short args, void *cbdata) @@ -672,6 +675,8 @@ static void process_close(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(close_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s closing fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -757,7 +762,7 @@ static void dfs_close(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_close); + ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); } static void process_sizes(int fd, short args, void *cbdata) @@ -769,6 +774,8 @@ static void process_sizes(int fd, short args, void *cbdata) int rc; struct stat buf; + ORTE_ACQUIRE_OBJECT(size_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing get_size on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -881,7 +888,7 @@ static void dfs_get_file_size(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_sizes); + ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); } @@ -895,6 +902,8 @@ static void process_seeks(int fd, short args, void *cbdata) int rc; struct stat buf; + ORTE_ACQUIRE_OBJECT(seek_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing seek on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1035,7 +1044,7 @@ static void dfs_seek(int fd, long offset, int whence, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_seeks); + ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); } static void process_reads(int fd, short args, void *cbdata) @@ -1048,6 +1057,8 @@ static void process_reads(int fd, short args, void *cbdata) int64_t i64; int rc; + ORTE_ACQUIRE_OBJECT(read_dfs); + /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); @@ -1145,7 +1156,7 @@ static void dfs_read(int fd, uint8_t *buffer, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_reads); + ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); } static void process_posts(int fd, short args, void *cbdata) @@ -1154,6 +1165,8 @@ static void process_posts(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* we will get confirmation in our receive function, so * add this request to our list */ dfs->id = req_id++; @@ -1212,7 +1225,7 @@ static void dfs_post_file_map(opal_buffer_t *bo, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_posts); + ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); } static void process_getfm(int fd, short args, void *cbdata) @@ -1221,6 +1234,8 @@ static void process_getfm(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* we will get confirmation in our receive function, so * add this request to our list */ dfs->id = req_id++; @@ -1275,7 +1290,7 @@ static void dfs_get_file_map(orte_process_name_t *target, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_getfm); + ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); } static void dfs_load_file_maps(orte_jobid_t jobid, @@ -1298,4 +1313,3 @@ static void dfs_purge_file_maps(orte_jobid_t jobid, cbfunc(cbdata); } } - diff --git a/orte/mca/dfs/base/base.h b/orte/mca/dfs/base/base.h index cca2e8909d3..8356b488cd7 100644 --- a/orte/mca/dfs/base/base.h +++ b/orte/mca/dfs/base/base.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,14 +77,6 @@ typedef struct { } orte_dfs_request_t; OBJ_CLASS_DECLARATION(orte_dfs_request_t); -#define ORTE_DFS_POST_REQUEST(d, cb) \ - do { \ - opal_event_set(orte_event_base, &((d)->ev), \ - -1, OPAL_EV_WRITE, (cb), (d)); \ - opal_event_set_priority(&((d)->ev), ORTE_SYS_PRI); \ - opal_event_active(&((d)->ev), OPAL_EV_WRITE, 1); \ - } while(0); - END_C_DECLS #endif diff --git a/orte/mca/dfs/orted/dfs_orted.c b/orte/mca/dfs/orted/dfs_orted.c index f8d64a07d20..ac72ce3a412 100644 --- a/orte/mca/dfs/orted/dfs_orted.c +++ b/orte/mca/dfs/orted/dfs_orted.c @@ -2,7 +2,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -35,6 +35,7 @@ #include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -304,6 +305,8 @@ static void process_opens(int fd, short args, void *cbdata) int v; orte_node_t *node, *nptr; + ORTE_ACQUIRE_OBJECT(dfs); + /* get the scheme to determine if we can process locally or not */ if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { OBJ_RELEASE(dfs); @@ -465,7 +468,7 @@ static void dfs_open(char *uri, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_opens); + ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); } static void process_close(int fd, short args, void *cbdata) @@ -476,6 +479,8 @@ static void process_close(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(close_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s closing fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -561,7 +566,7 @@ static void dfs_close(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_close); + ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); } static void process_sizes(int fd, short args, void *cbdata) @@ -573,6 +578,8 @@ static void process_sizes(int fd, short args, void *cbdata) int rc; struct stat buf; + ORTE_ACQUIRE_OBJECT(size_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing get_size on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -665,7 +672,7 @@ static void dfs_get_file_size(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_sizes); + ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); } @@ -679,6 +686,8 @@ static void process_seeks(int fd, short args, void *cbdata) int rc; struct stat buf; + ORTE_ACQUIRE_OBJECT(seek_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing seek on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -814,7 +823,7 @@ static void dfs_seek(int fd, long offset, int whence, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_seeks); + ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); } static void process_reads(int fd, short args, void *cbdata) @@ -827,6 +836,8 @@ static void process_reads(int fd, short args, void *cbdata) int64_t i64; int rc; + ORTE_ACQUIRE_OBJECT(read_dfs); + /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); @@ -924,7 +935,7 @@ static void dfs_read(int fd, uint8_t *buffer, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_reads); + ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); } static void process_posts(int fd, short args, void *cbdata) @@ -935,6 +946,8 @@ static void process_posts(int fd, short args, void *cbdata) opal_list_item_t *item; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s posting file map containing %d bytes for target %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1009,7 +1022,7 @@ static void dfs_post_file_map(opal_buffer_t *buffer, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_posts); + ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); } static int get_job_maps(orte_dfs_jobfm_t *jfm, @@ -1057,6 +1070,8 @@ static void process_getfm(int fd, short args, void *cbdata) int32_t n, ntotal; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* if the target job is WILDCARD, then process * data for all jobids - else, find the one */ @@ -1120,7 +1135,7 @@ static void dfs_get_file_map(orte_process_name_t *target, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_getfm); + ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); } static void process_load(int fd, short args, void *cbdata) @@ -1135,6 +1150,8 @@ static void process_load(int fd, short args, void *cbdata) int rc; opal_buffer_t *xfer; + ORTE_ACQUIRE_OBJECT(dfs); + /* see if we already have a tracker for this job */ jfm = NULL; for (item = opal_list_get_first(&file_maps); @@ -1233,7 +1250,7 @@ static void dfs_load_file_maps(orte_jobid_t jobid, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_load); + ORTE_THREADSHIFT(dfs, orte_event_base, process_load, ORTE_SYS_PRI); } static void process_purge(int fd, short args, void *cbdata) @@ -1242,6 +1259,8 @@ static void process_purge(int fd, short args, void *cbdata) opal_list_item_t *item; orte_dfs_jobfm_t *jfm, *jptr; + ORTE_ACQUIRE_OBJECT(dfs); + /* find the job tracker */ jfm = NULL; for (item = opal_list_get_first(&file_maps); @@ -1288,7 +1307,7 @@ static void dfs_purge_file_maps(orte_jobid_t jobid, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_purge); + ORTE_THREADSHIFT(dfs, orte_event_base, process_purge, ORTE_SYS_PRI); } @@ -2368,4 +2387,3 @@ static void remote_read(int fd, short args, void *cbdata) } OBJ_RELEASE(req); } - diff --git a/orte/mca/dfs/test/dfs_test.c b/orte/mca/dfs/test/dfs_test.c index 7ebeba37984..24392e013dc 100644 --- a/orte/mca/dfs/test/dfs_test.c +++ b/orte/mca/dfs/test/dfs_test.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -32,6 +32,7 @@ #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -449,6 +450,8 @@ static void process_opens(int fd, short args, void *cbdata) opal_list_t lt; opal_namelist_t *nm; + ORTE_ACQUIRE_OBJECT(dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s PROCESSING OPEN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -583,7 +586,7 @@ static void dfs_open(char *uri, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_opens); + ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); } static void process_close(int fd, short args, void *cbdata) @@ -594,6 +597,8 @@ static void process_close(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(close_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s closing fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -673,7 +678,7 @@ static void dfs_close(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_close); + ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); } static void process_sizes(int fd, short args, void *cbdata) @@ -684,6 +689,8 @@ static void process_sizes(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(size_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing get_size on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -775,7 +782,7 @@ static void dfs_get_file_size(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_sizes); + ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); } @@ -788,6 +795,8 @@ static void process_seeks(int fd, short args, void *cbdata) int64_t i64; int rc; + ORTE_ACQUIRE_OBJECT(seek_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing seek on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -885,7 +894,7 @@ static void dfs_seek(int fd, long offset, int whence, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_seeks); + ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); } static void process_reads(int fd, short args, void *cbdata) @@ -897,6 +906,8 @@ static void process_reads(int fd, short args, void *cbdata) int64_t i64; int rc; + ORTE_ACQUIRE_OBJECT(read_dfs); + /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); @@ -979,7 +990,7 @@ static void dfs_read(int fd, uint8_t *buffer, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_reads); + ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); } static void process_posts(int fd, short args, void *cbdata) @@ -988,6 +999,8 @@ static void process_posts(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* we will get confirmation in our receive function, so * add this request to our list */ dfs->id = req_id++; @@ -1046,7 +1059,7 @@ static void dfs_post_file_map(opal_buffer_t *bo, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_posts); + ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); } static void process_getfm(int fd, short args, void *cbdata) @@ -1055,6 +1068,8 @@ static void process_getfm(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* we will get confirmation in our receive function, so * add this request to our list */ dfs->id = req_id++; @@ -1109,7 +1124,7 @@ static void dfs_get_file_map(orte_process_name_t *target, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_getfm); + ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); } static void dfs_load_file_maps(orte_jobid_t jobid, @@ -1132,4 +1147,3 @@ static void dfs_purge_file_maps(orte_jobid_t jobid, cbfunc(cbdata); } } - diff --git a/orte/mca/errmgr/base/Makefile.am b/orte/mca/errmgr/base/Makefile.am index b901a8b4656..8fd7d3b3d45 100644 --- a/orte/mca/errmgr/base/Makefile.am +++ b/orte/mca/errmgr/base/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,5 +27,4 @@ headers += \ libmca_errmgr_la_SOURCES += \ base/errmgr_base_select.c \ base/errmgr_base_frame.c \ - base/errmgr_base_fns.c \ - base/errmgr_base_tool.c + base/errmgr_base_fns.c diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c b/orte/mca/errmgr/base/errmgr_base_fns.c index 87da0a8b5d5..8ce8794f295 100644 --- a/orte/mca/errmgr/base/errmgr_base_fns.c +++ b/orte/mca/errmgr/base/errmgr_base_fns.c @@ -13,7 +13,7 @@ * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -82,99 +82,6 @@ #include "orte/mca/errmgr/base/base.h" #include "orte/mca/errmgr/base/errmgr_private.h" -/* - * Object stuff - */ -void orte_errmgr_predicted_proc_construct(orte_errmgr_predicted_proc_t *item); -void orte_errmgr_predicted_proc_destruct( orte_errmgr_predicted_proc_t *item); - -OBJ_CLASS_INSTANCE(orte_errmgr_predicted_proc_t, - opal_list_item_t, - orte_errmgr_predicted_proc_construct, - orte_errmgr_predicted_proc_destruct); - -void orte_errmgr_predicted_proc_construct(orte_errmgr_predicted_proc_t *item) -{ - item->proc_name.vpid = ORTE_VPID_INVALID; - item->proc_name.jobid = ORTE_JOBID_INVALID; -} - -void orte_errmgr_predicted_proc_destruct( orte_errmgr_predicted_proc_t *item) -{ - item->proc_name.vpid = ORTE_VPID_INVALID; - item->proc_name.jobid = ORTE_JOBID_INVALID; -} - -void orte_errmgr_predicted_node_construct(orte_errmgr_predicted_node_t *item); -void orte_errmgr_predicted_node_destruct( orte_errmgr_predicted_node_t *item); - -OBJ_CLASS_INSTANCE(orte_errmgr_predicted_node_t, - opal_list_item_t, - orte_errmgr_predicted_node_construct, - orte_errmgr_predicted_node_destruct); - -void orte_errmgr_predicted_node_construct(orte_errmgr_predicted_node_t *item) -{ - item->node_name = NULL; -} - -void orte_errmgr_predicted_node_destruct( orte_errmgr_predicted_node_t *item) -{ - if( NULL != item->node_name ) { - free(item->node_name); - item->node_name = NULL; - } -} - -void orte_errmgr_predicted_map_construct(orte_errmgr_predicted_map_t *item); -void orte_errmgr_predicted_map_destruct( orte_errmgr_predicted_map_t *item); - -OBJ_CLASS_INSTANCE(orte_errmgr_predicted_map_t, - opal_list_item_t, - orte_errmgr_predicted_map_construct, - orte_errmgr_predicted_map_destruct); - -void orte_errmgr_predicted_map_construct(orte_errmgr_predicted_map_t *item) -{ - item->proc_name.vpid = ORTE_VPID_INVALID; - item->proc_name.jobid = ORTE_JOBID_INVALID; - - item->node_name = NULL; - - item->map_proc_name.vpid = ORTE_VPID_INVALID; - item->map_proc_name.jobid = ORTE_JOBID_INVALID; - - item->map_node_name = NULL; - item->off_current_node = false; - item->pre_map_fixed_node = NULL; -} - -void orte_errmgr_predicted_map_destruct( orte_errmgr_predicted_map_t *item) -{ - item->proc_name.vpid = ORTE_VPID_INVALID; - item->proc_name.jobid = ORTE_JOBID_INVALID; - - if( NULL != item->node_name ) { - free(item->node_name); - item->node_name = NULL; - } - - item->map_proc_name.vpid = ORTE_VPID_INVALID; - item->map_proc_name.jobid = ORTE_JOBID_INVALID; - - if( NULL != item->map_node_name ) { - free(item->map_node_name); - item->map_node_name = NULL; - } - - item->off_current_node = false; - - if( NULL != item->pre_map_fixed_node ) { - free(item->pre_map_fixed_node); - item->pre_map_fixed_node = NULL; - } -} - /* * Public interfaces */ @@ -231,12 +138,6 @@ void orte_errmgr_base_abort(int error_code, char *fmt, ...) /* No way to reach here */ } -void orte_errmgr_base_register_migration_warning(struct timeval *tv) -{ - /* stub function - ignore */ - return; -} - int orte_errmgr_base_abort_peers(orte_process_name_t *procs, orte_std_cntr_t num_procs, int error_code) @@ -244,195 +145,6 @@ int orte_errmgr_base_abort_peers(orte_process_name_t *procs, return ORTE_ERR_NOT_IMPLEMENTED; } -int orte_errmgr_base_register_error_callback(orte_errmgr_error_callback_fn_t *cbfunc, - orte_errmgr_error_order_t order) -{ - orte_errmgr_cback_t *cb, *cbcur; - - /* check the order to see what to do */ - switch(order) { - case ORTE_ERRMGR_CALLBACK_FIRST: - /* only one can be so designated */ - if (NULL != (cb = (orte_errmgr_cback_t*)opal_list_get_first(&orte_errmgr_base.error_cbacks))) { - if (ORTE_ERRMGR_CALLBACK_FIRST == cb->order) { - return ORTE_ERR_NOT_SUPPORTED; - } - } - cb = OBJ_NEW(orte_errmgr_cback_t); - cb->order = order; - cb->callback =cbfunc; - opal_list_prepend(&orte_errmgr_base.error_cbacks, &cb->super); - break; - case ORTE_ERRMGR_CALLBACK_LAST: - /* only one can be so designated */ - if (NULL != (cb = (orte_errmgr_cback_t*)opal_list_get_last(&orte_errmgr_base.error_cbacks))) { - if (ORTE_ERRMGR_CALLBACK_LAST == cb->order) { - return ORTE_ERR_NOT_SUPPORTED; - } - } - cb = OBJ_NEW(orte_errmgr_cback_t); - cb->order = order; - cb->callback = cbfunc; - opal_list_append(&orte_errmgr_base.error_cbacks, &cb->super); - break; - case ORTE_ERRMGR_CALLBACK_PREPEND: - cb = OBJ_NEW(orte_errmgr_cback_t); - cb->order = order; - cb->callback =cbfunc; - if (NULL != (cbcur = (orte_errmgr_cback_t*)opal_list_get_first(&orte_errmgr_base.error_cbacks)) && - ORTE_ERRMGR_CALLBACK_FIRST == cbcur->order) { - opal_list_insert(&orte_errmgr_base.error_cbacks, &cb->super, 1); - } else { - opal_list_prepend(&orte_errmgr_base.error_cbacks, &cb->super); - } - break; - case ORTE_ERRMGR_CALLBACK_APPEND: - cb = OBJ_NEW(orte_errmgr_cback_t); - cb->order = order; - cb->callback =cbfunc; - if (NULL != (cbcur = (orte_errmgr_cback_t*)opal_list_get_last(&orte_errmgr_base.error_cbacks)) && - ORTE_ERRMGR_CALLBACK_LAST == cbcur->order) { - opal_list_insert_pos(&orte_errmgr_base.error_cbacks, &cbcur->super, &cb->super); - } else { - opal_list_append(&orte_errmgr_base.error_cbacks, &cb->super); - } - opal_list_append(&orte_errmgr_base.error_cbacks, &cb->super); - break; - } - return ORTE_SUCCESS; -} - -void orte_errmgr_base_execute_error_callbacks(opal_pointer_array_t *errors) -{ - orte_errmgr_cback_t *cb; - char *errstring=NULL; - orte_error_t *err; - int errcode = ORTE_ERROR_DEFAULT_EXIT_CODE; - - /* if no callbacks have been provided, then we abort */ - if (0 == opal_list_get_size(&orte_errmgr_base.error_cbacks)) { - /* take the first entry, if available */ - if (NULL != errors && - (NULL != (err = (orte_error_t*)opal_pointer_array_get_item(errors, 0)))) { - errstring = (char*)ORTE_ERROR_NAME(err->errcode); - errcode = err->errcode; - } - if (NULL == errstring) { - /* if the error is silent, say nothing */ - orte_errmgr.abort(errcode, NULL); - } - orte_errmgr.abort(errcode, "Executing default error callback: %s", errstring); - } - - /* cycle across the provided callbacks until we complete the list - * or one reports that no further action is required - */ - OPAL_LIST_FOREACH(cb, &orte_errmgr_base.error_cbacks, orte_errmgr_cback_t) { - if (ORTE_SUCCESS == cb->callback(errors)) { - break; - } - } -} - -/******************** - * Utility functions - ********************/ -#if OPAL_ENABLE_FT_CR - -void orte_errmgr_base_migrate_state_notify(int state) -{ - switch(state) { - case ORTE_ERRMGR_MIGRATE_STATE_ERROR: - case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS: - opal_output(0, "%d: Migration failed for process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); - break; - case ORTE_ERRMGR_MIGRATE_STATE_FINISH: - opal_output(0, "%d: Migration successful for process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); - break; - - case ORTE_ERRMGR_MIGRATE_STATE_NONE: - case ORTE_ERRMGR_MIGRATE_STATE_REQUEST: - case ORTE_ERRMGR_MIGRATE_STATE_RUNNING: - case ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT: - case ORTE_ERRMGR_MIGRATE_STATE_STARTUP: - case ORTE_ERRMGR_MIGRATE_MAX: - default: - break; - } -} - -void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc) -{ - if (NULL != proc) { - switch(state) { - case ORTE_PROC_STATE_ABORTED: - case ORTE_PROC_STATE_ABORTED_BY_SIG: - case ORTE_PROC_STATE_TERM_WO_SYNC: - case ORTE_PROC_STATE_TERMINATED: - case ORTE_PROC_STATE_KILLED_BY_CMD: - case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED: - opal_output(0, "%d: Process %s is dead.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - - case ORTE_PROC_STATE_HEARTBEAT_FAILED: - opal_output(0, "%d: Process %s is unreachable.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - - case ORTE_PROC_STATE_COMM_FAILED: - opal_output(0, "%d: Failed to communicate with process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - - case ORTE_PROC_STATE_CALLED_ABORT: - case ORTE_PROC_STATE_FAILED_TO_START: - opal_output(0, "%d: Process %s has called abort.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - case ORTE_PROC_STATE_MIGRATING: - default: - break; - } - } -} - -int orte_errmgr_base_migrate_state_str(char ** state_str, int state) -{ - switch(state) { - case ORTE_ERRMGR_MIGRATE_STATE_NONE: - *state_str = strdup(" -- "); - break; - case ORTE_ERRMGR_MIGRATE_STATE_REQUEST: - *state_str = strdup("Requested"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_RUNNING: - *state_str = strdup("Running"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT: - *state_str = strdup("Checkpointing"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_STARTUP: - *state_str = strdup("Restarting"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_FINISH: - *state_str = strdup("Finished"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_ERROR: - *state_str = strdup("Error"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS: - *state_str = strdup("Error: Migration in progress"); - break; - default: - asprintf(state_str, "Unknown %d", state); - break; - } - - return ORTE_SUCCESS; -} -#endif #if OPAL_ENABLE_FT_CR int orte_errmgr_base_update_app_context_for_cr_recovery(orte_job_t *jobdata, diff --git a/orte/mca/errmgr/base/errmgr_base_frame.c b/orte/mca/errmgr/base/errmgr_base_frame.c index 455779cdc13..d9a29cb6403 100644 --- a/orte/mca/errmgr/base/errmgr_base_frame.c +++ b/orte/mca/errmgr/base/errmgr_base_frame.c @@ -12,7 +12,7 @@ * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -53,17 +53,11 @@ orte_errmgr_base_t orte_errmgr_base = {{{0}}}; /* Public module provides a wrapper around previous functions */ orte_errmgr_base_module_t orte_errmgr_default_fns = { - NULL, /* init */ - NULL, /* finalize */ - orte_errmgr_base_log, - orte_errmgr_base_abort, - orte_errmgr_base_abort_peers, - NULL, /* predicted_fault */ - NULL, /* suggest_map_targets */ - NULL, /* ft_event */ - orte_errmgr_base_register_migration_warning, - orte_errmgr_base_register_error_callback, - orte_errmgr_base_execute_error_callbacks + .init = NULL, /* init */ + .finalize = NULL, /* finalize */ + .logfn = orte_errmgr_base_log, + .abort = orte_errmgr_base_abort, + .abort_peers = orte_errmgr_base_abort_peers }; /* NOTE: ABSOLUTELY MUST initialize this * struct to include the log function as it @@ -71,16 +65,7 @@ orte_errmgr_base_module_t orte_errmgr_default_fns = { * opened yet due to error */ orte_errmgr_base_module_t orte_errmgr = { - NULL, - NULL, - orte_errmgr_base_log, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL + .logfn = orte_errmgr_base_log }; static int orte_errmgr_base_close(void) @@ -118,7 +103,3 @@ static int orte_errmgr_base_open(mca_base_open_flag_t flags) MCA_BASE_FRAMEWORK_DECLARE(orte, errmgr, "ORTE Error Manager", NULL, orte_errmgr_base_open, orte_errmgr_base_close, mca_errmgr_base_static_components, 0); - -OBJ_CLASS_INSTANCE(orte_errmgr_cback_t, - opal_list_item_t, - NULL, NULL); diff --git a/orte/mca/errmgr/base/errmgr_base_tool.c b/orte/mca/errmgr/base/errmgr_base_tool.c deleted file mode 100644 index 6fe45e0d913..00000000000 --- a/orte/mca/errmgr/base/errmgr_base_tool.c +++ /dev/null @@ -1,441 +0,0 @@ -/* - * Copyright (c) 2009-2010 The Trustees of Indiana University. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#if HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#if HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_DIRENT_H -#include -#endif /* HAVE_DIRENT_H */ -#include - -#include "opal/dss/dss.h" - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/util/os_dirpath.h" -#include "opal/util/output.h" -#include "opal/util/basename.h" -#include "opal/util/argv.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/snapc/snapc.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -/** - * This file contains function for the HNP to communicate with the - * orte-migrate command. - */ -#if OPAL_ENABLE_FT_CR - -/****************** - * Local Functions - ******************/ -static int errmgr_base_tool_start_cmdline_listener(void); -static int errmgr_base_tool_stop_cmdline_listener(void); - -static void errmgr_base_tool_cmdline_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata); - -/****************** - * Object stuff - ******************/ -static orte_process_name_t errmgr_cmdline_sender = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}; -static bool errmgr_cmdline_recv_issued = false; -static int errmgr_tool_initialized = false; - -/******************** - * Module Functions - ********************/ -int orte_errmgr_base_tool_init(void) -{ - int ret; - - if( (++errmgr_tool_initialized) != 1 ) { - if( errmgr_tool_initialized < 1 ) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; - } - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * Setup command line migrate tool request listener - */ - if( ORTE_SUCCESS != (ret = errmgr_base_tool_start_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - return ORTE_SUCCESS; -} - -int orte_errmgr_base_tool_finalize(void) -{ - int ret; - - if( (--errmgr_tool_initialized) != 0 ) { - if( errmgr_tool_initialized < 0 ) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; - } - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * Clean up listeners - */ - if( ORTE_SUCCESS != (ret = errmgr_base_tool_stop_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - return ORTE_SUCCESS; -} - -int orte_errmgr_base_migrate_update(int status) -{ - int ret, exit_status = ORTE_SUCCESS; - opal_buffer_t *loc_buffer = NULL; - orte_errmgr_tool_cmd_flag_t command = ORTE_ERRMGR_MIGRATE_TOOL_UPDATE_CMD; - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * If this is an invalid state, then return an error - */ - if( ORTE_ERRMGR_MIGRATE_MAX < status ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: Invalid state %d < (Max %d)", - status, ORTE_ERRMGR_MIGRATE_MAX); - return ORTE_ERR_BAD_PARAM; - } - - /* - * Report the status over the notifier interface - */ - orte_errmgr_base_migrate_state_notify(status); - - /* - * If the caller is indicating that they are finished and ready for another - * command, then repost the RML listener. - */ - if( ORTE_ERRMGR_MIGRATE_STATE_NONE == status ) { - if( ORTE_SUCCESS != (ret = errmgr_base_tool_start_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - return ORTE_SUCCESS; - } - - /* - * Noop if invalid peer, or peer not specified - */ - if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, &errmgr_cmdline_sender) ) { - return ORTE_SUCCESS; - } - - /* - * Do not send to self, as that is silly. - */ - if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, &errmgr_cmdline_sender) ) { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Warning: Do not send to self!\n")); - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Sending update command \n", - status)); - - /******************** - * Send over the status of the checkpoint - * - migration state - ********************/ - if (NULL == (loc_buffer = OBJ_NEW(opal_buffer_t))) { - exit_status = ORTE_ERROR; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(loc_buffer, &command, 1, ORTE_ERRMGR_MIGRATE_TOOL_CMD)) ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: DSS Pack (cmd) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(loc_buffer, &status, 1, OPAL_INT))) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: DSS Pack (status) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&errmgr_cmdline_sender, - loc_buffer, ORTE_RML_TAG_MIGRATE, - orte_rml_send_callback, NULL))) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: Send (status) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - if(NULL != loc_buffer) { - OBJ_RELEASE(loc_buffer); - loc_buffer = NULL; - } - - return exit_status; -} - -/******************** - * Utility functions - ********************/ - -/******************** - * Local Functions - ********************/ -static int errmgr_base_tool_start_cmdline_listener(void) -{ - if (errmgr_cmdline_recv_issued && ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool: Startup Command Line Channel")); - - /* - * Coordinator command listener - */ - errmgr_cmdline_sender.jobid = ORTE_JOBID_INVALID; - errmgr_cmdline_sender.vpid = ORTE_VPID_INVALID; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MIGRATE, - 0, errmgr_base_tool_cmdline_recv, NULL); - - errmgr_cmdline_recv_issued = true; - - return ORTE_SUCCESS; -} - - -static int errmgr_base_tool_stop_cmdline_listener(void) -{ - int exit_status = ORTE_SUCCESS; - - if (!errmgr_cmdline_recv_issued && ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool: Shutdown Command Line Channel")); - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MIGRATE); - - errmgr_cmdline_recv_issued = false; - - return exit_status; -} - -/***************** - * Listener Callbacks - *****************/ -static void errmgr_base_tool_cmdline_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - int ret; - orte_process_name_t swap_dest; - orte_errmgr_tool_cmd_flag_t command; - orte_std_cntr_t count = 1; - char *off_nodes = NULL; - char *off_procs = NULL; - char *onto_nodes = NULL; - char **split_off_nodes = NULL; - char **split_off_procs = NULL; - char **split_onto_nodes = NULL; - opal_list_t *proc_list = NULL; - opal_list_t *node_list = NULL; - opal_list_t *suggested_map_list = NULL; - orte_errmgr_predicted_proc_t *off_proc = NULL; - orte_errmgr_predicted_node_t *off_node = NULL; - orte_errmgr_predicted_map_t *onto_map = NULL; - int cnt = 0, i; - - - if( ORTE_RML_TAG_MIGRATE != tag ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Error: Unknown tag: Received a command message from %s (tag = %d).", - ORTE_NAME_PRINT(sender), tag); - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return; - } - - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command Line: Start a migration operation [Sender = %s]", - ORTE_NAME_PRINT(sender))); - - errmgr_cmdline_recv_issued = false; /* Not a persistent RML message */ - - /* - * If we are already interacting with a command line tool then reject this - * request. Since we only allow the processing of one tool command at a - * time. - */ - if( OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, &errmgr_cmdline_sender) ) { - swap_dest.jobid = errmgr_cmdline_sender.jobid; - swap_dest.vpid = errmgr_cmdline_sender.vpid; - - errmgr_cmdline_sender = *sender; - orte_errmgr_base_migrate_update(ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS); - - errmgr_cmdline_sender.jobid = swap_dest.jobid; - errmgr_cmdline_sender.vpid = swap_dest.vpid; - - return; - } - - errmgr_cmdline_sender = *sender; - - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_ERRMGR_MIGRATE_TOOL_CMD))) { - ORTE_ERROR_LOG(ret); - return; - } - - /* - * orte-migrate has requested that a checkpoint be taken - */ - if (ORTE_ERRMGR_MIGRATE_TOOL_INIT_CMD == command) { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command line requested process migration [command %d]\n", - command)); - - /* - * Unpack the buffer from the orte-migrate command - */ - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(off_procs), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(off_nodes), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(onto_nodes), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - /* - * Parse the comma separated list - */ - proc_list = OBJ_NEW(opal_list_t); - node_list = OBJ_NEW(opal_list_t); - suggested_map_list = OBJ_NEW(opal_list_t); - - split_off_procs = opal_argv_split(off_procs, ','); - cnt = opal_argv_count(split_off_procs); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - off_proc = OBJ_NEW(orte_errmgr_predicted_proc_t); - off_proc->proc_name.vpid = atoi(split_off_procs[i]); - opal_list_append(proc_list, &(off_proc->super)); - } - } - - split_off_nodes = opal_argv_split(off_nodes, ','); - cnt = opal_argv_count(split_off_nodes); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - off_node = OBJ_NEW(orte_errmgr_predicted_node_t); - off_node->node_name = strdup(split_off_nodes[i]); - opal_list_append(node_list, &(off_node->super)); - } - } - - split_onto_nodes = opal_argv_split(onto_nodes, ','); - cnt = opal_argv_count(split_onto_nodes); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - onto_map = OBJ_NEW(orte_errmgr_predicted_map_t); - onto_map->map_node_name = strdup(split_onto_nodes[i]); - opal_list_append(suggested_map_list, &(onto_map->super)); - } - } - - /* - * Pass to the predicted fault function to see how they would like to progress - */ - orte_errmgr.predicted_fault(proc_list, node_list, suggested_map_list); - } - /* - * Unknown command - */ - else { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command line sent an unknown command (command %d)\n", - command)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); - } - - return; -} -#endif diff --git a/orte/mca/errmgr/base/errmgr_private.h b/orte/mca/errmgr/base/errmgr_private.h index b49bb57478a..8dd6967743f 100644 --- a/orte/mca/errmgr/base/errmgr_private.h +++ b/orte/mca/errmgr/base/errmgr_private.h @@ -12,6 +12,7 @@ * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,14 +54,6 @@ typedef struct { ORTE_DECLSPEC extern orte_errmgr_base_t orte_errmgr_base; -/* define a struct to hold registered error callbacks */ -typedef struct { - opal_list_item_t super; - orte_errmgr_error_order_t order; - orte_errmgr_error_callback_fn_t *callback; -} orte_errmgr_cback_t; -OBJ_CLASS_DECLARATION(orte_errmgr_cback_t); - /* declare the base default module */ ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr_default_fns; @@ -75,12 +68,5 @@ ORTE_DECLSPEC int orte_errmgr_base_abort_peers(orte_process_name_t *procs, orte_std_cntr_t num_procs, int error_code); -ORTE_DECLSPEC void orte_errmgr_base_register_migration_warning(struct timeval *tv); - -ORTE_DECLSPEC int orte_errmgr_base_register_error_callback(orte_errmgr_error_callback_fn_t *cbfunc, - orte_errmgr_error_order_t order); - -ORTE_DECLSPEC void orte_errmgr_base_execute_error_callbacks(opal_pointer_array_t *errors); - END_C_DECLS #endif diff --git a/orte/mca/errmgr/base/help-errmgr-base.txt b/orte/mca/errmgr/base/help-errmgr-base.txt index 1470bd13a93..07a9f71909f 100644 --- a/orte/mca/errmgr/base/help-errmgr-base.txt +++ b/orte/mca/errmgr/base/help-errmgr-base.txt @@ -98,3 +98,10 @@ then it could be an internal programming error that should be reported to the developers. In the meantime, a workaround may be to set the MCA param routed=direct on the command line or in your environment. +# +[simple-message] +An internal error has occurred in ORTE: + +%s + +This is something that should be reported to the developers. diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.c b/orte/mca/errmgr/default_app/errmgr_default_app.c index 8e605bf1737..065f2de28e3 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app.c @@ -9,7 +9,7 @@ * reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +32,9 @@ #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wait.h" #include "orte/mca/rml/rml.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/state/state.h" @@ -55,17 +57,11 @@ * HNP module ******************/ orte_errmgr_base_module_t orte_errmgr_default_app_module = { - init, - finalize, - orte_errmgr_base_log, - orte_errmgr_base_abort, - abort_peers, - NULL, - NULL, - NULL, - orte_errmgr_base_register_migration_warning, - orte_errmgr_base_register_error_callback, - orte_errmgr_base_execute_error_callbacks + .init = init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = orte_errmgr_base_abort, + .abort_peers = abort_peers }; static void proc_errors(int fd, short args, void *cbdata); @@ -74,7 +70,10 @@ static size_t myerrhandle = SIZE_MAX; static void register_cbfunc(int status, size_t errhndler, void *cbdata) { + orte_lock_t *lk = (orte_lock_t*)cbdata; myerrhandle = errhndler; + ORTE_POST_OBJECT(lk); + ORTE_WAKEUP_THREAD(lk); } static void notify_cbfunc(int status, @@ -109,7 +108,7 @@ static void notify_cbfunc(int status, } /* push it into our event base */ - ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, state); + ORTE_ACTIVATE_PROC_STATE((orte_process_name_t*)source, state); } /************************ @@ -117,11 +116,25 @@ static void notify_cbfunc(int status, ************************/ static int init(void) { + opal_list_t directives; + orte_lock_t lock; + opal_value_t *kv; + /* setup state machine to trap proc errors */ orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); /* tie the default PMIx event handler back to us */ - opal_pmix.register_evhandler(NULL, NULL, notify_cbfunc, register_cbfunc, NULL); + ORTE_CONSTRUCT_LOCK(&lock); + OBJ_CONSTRUCT(&directives, opal_list_t); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME); + kv->type = OPAL_STRING; + kv->data.string = strdup("ORTE-APP-DEFAULT"); + opal_list_append(&directives, &kv->super); + opal_pmix.register_evhandler(NULL, &directives, notify_cbfunc, register_cbfunc, (void*)&lock); + ORTE_WAIT_THREAD(&lock); + ORTE_DESTRUCT_LOCK(&lock); + OPAL_LIST_DESTRUCT(&directives); return ORTE_SUCCESS; } @@ -138,8 +151,8 @@ static void proc_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; char *nodename; - orte_error_t err; - opal_pointer_array_t errors; + + ORTE_ACQUIRE_OBJECT(caddy); OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_app: proc %s state %s", @@ -155,14 +168,6 @@ static void proc_errors(int fd, short args, void *cbdata) return; } - /* pass the error to the error_callbacks for processing */ - OBJ_CONSTRUCT(&errors, opal_pointer_array_t); - opal_pointer_array_init(&errors, 1, INT_MAX, 1); - err.errcode = caddy->proc_state; - err.proc = caddy->name; - opal_pointer_array_add(&errors, &err); - - if (ORTE_PROC_STATE_UNABLE_TO_SEND_MSG == caddy->proc_state) { /* we can't send a message - print a message */ nodename = orte_get_proc_hostname(&caddy->name); @@ -181,9 +186,6 @@ static void proc_errors(int fd, short args, void *cbdata) orte_abnormal_term_ordered = true; } - orte_errmgr_base_execute_error_callbacks(&errors); - OBJ_DESTRUCT(&errors); - OBJ_RELEASE(caddy); } diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index 59c8e87a0f1..9c653910655 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -50,6 +50,7 @@ #include "orte/util/proc_info.h" #include "orte/util/show_help.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_locks.h" @@ -64,33 +65,17 @@ static int init(void); static int finalize(void); - -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map); - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list); - -static int ft_event(int state); - +static void hnp_abort(int error_code, char *fmt, ...); /****************** * default_hnp module ******************/ orte_errmgr_base_module_t orte_errmgr_default_hnp_module = { - init, - finalize, - orte_errmgr_base_log, - orte_errmgr_base_abort, - orte_errmgr_base_abort_peers, - predicted_fault, - suggest_map_targets, - ft_event, - orte_errmgr_base_register_migration_warning, - NULL, - orte_errmgr_base_execute_error_callbacks + .init = init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = hnp_abort, + .abort_peers = orte_errmgr_base_abort_peers }; @@ -125,6 +110,81 @@ static int finalize(void) return ORTE_SUCCESS; } +static void wakeup(int sd, short args, void *cbdata) +{ + /* nothing more we can do */ + ORTE_ACQUIRE_OBJECT(cbdata); + orte_quit(0, 0, NULL); +} + +/* this function only gets called when FORCED_TERMINATE + * has been invoked, which means that there is some + * internal failure (e.g., to pack/unpack a correct value). + * We could just exit, but that doesn't result in any + * meaningful error message to the user. Likewise, just + * printing something to stdout/stderr won't necessarily + * get back to the user. Instead, we will send an error + * report to mpirun and give it a chance to order our + * termination. In order to ensure we _do_ terminate, + * we set a timer - if it fires before we receive the + * termination command, then we will exit on our own. This + * protects us in the case that the failure is in the + * messaging system itself */ +static void hnp_abort(int error_code, char *fmt, ...) +{ + va_list arglist; + char *outmsg = NULL; + orte_timer_t *timer; + + /* only do this once */ + if (orte_abnormal_term_ordered) { + return; + } + + /* ensure we exit with non-zero status */ + ORTE_UPDATE_EXIT_STATUS(error_code); + + /* set the aborting flag */ + orte_abnormal_term_ordered = true; + + /* If there was a message, construct it */ + va_start(arglist, fmt); + if (NULL != fmt) { + vasprintf(&outmsg, fmt, arglist); + } + va_end(arglist); + + /* use the show-help system to get the message out */ + orte_show_help("help-errmgr-base.txt", "simple-message", true, outmsg); + + /* this could have happened very early, so see if it happened + * before we started anything - if so, we can just finalize */ + if (orte_never_launched) { + orte_quit(0, 0, NULL); + return; + } + + /* tell the daemons to terminate */ + if (ORTE_SUCCESS != orte_plm.terminate_orteds()) { + orte_quit(0, 0, NULL); + return; + } + + /* set a timer for exiting - this also gives the message a chance + * to get out! */ + if (NULL == (timer = OBJ_NEW(orte_timer_t))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return; + } + timer->tv.tv_sec = 5; + timer->tv.tv_usec = 0; + opal_event_evtimer_set(orte_event_base, timer->ev, wakeup, NULL); + opal_event_set_priority(timer->ev, ORTE_ERROR_PRI); + ORTE_POST_OBJECT(timer); + opal_event_evtimer_add(timer->ev, &timer->tv); +} + + static void job_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; @@ -136,6 +196,8 @@ static void job_errors(int fd, short args, void *cbdata) int32_t rc, ret; int room, *rmptr; + ORTE_ACQUIRE_OBJECT(caddy); + /* * if orte is trying to shutdown, just let it */ @@ -297,6 +359,8 @@ static void proc_errors(int fd, short args, void *cbdata) int32_t i32, *i32ptr; char *rtmod; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_hnp: for proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -431,7 +495,7 @@ static void proc_errors(int fd, short args, void *cbdata) } } - keep_going: + keep_going: /* if this is a continuously operating job, then there is nothing more * to do - we let the job continue to run */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL)) { @@ -732,25 +796,6 @@ static void proc_errors(int fd, short args, void *cbdata) OBJ_RELEASE(caddy); } -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int ft_event(int state) -{ - return ORTE_SUCCESS; -} - /***************** * Local Functions *****************/ diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index a58733020e7..05e5e3e414a 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -33,6 +33,7 @@ #include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/rml/rml.h" @@ -59,33 +60,17 @@ */ static int init(void); static int finalize(void); - -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map); - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list); - -static int ft_event(int state); - +static void orted_abort(int error_code, char *fmt, ...); /****************** * default_orted module ******************/ orte_errmgr_base_module_t orte_errmgr_default_orted_module = { - init, - finalize, - orte_errmgr_base_log, - orte_errmgr_base_abort, - orte_errmgr_base_abort_peers, - predicted_fault, - suggest_map_targets, - ft_event, - orte_errmgr_base_register_migration_warning, - NULL, - orte_errmgr_base_execute_error_callbacks + .init = init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = orted_abort, + .abort_peers = orte_errmgr_base_abort_peers }; /* Local functions */ @@ -122,6 +107,129 @@ static int finalize(void) return ORTE_SUCCESS; } +static void wakeup(int sd, short args, void *cbdata) +{ + /* nothing more we can do */ + ORTE_ACQUIRE_OBJECT(cbdata); + orte_quit(0, 0, NULL); +} + +/* this function only gets called when FORCED_TERMINATE + * has been invoked, which means that there is some + * internal failure (e.g., to pack/unpack a correct value). + * We could just exit, but that doesn't result in any + * meaningful error message to the user. Likewise, just + * printing something to stdout/stderr won't necessarily + * get back to the user. Instead, we will send an error + * report to mpirun and give it a chance to order our + * termination. In order to ensure we _do_ terminate, + * we set a timer - if it fires before we receive the + * termination command, then we will exit on our own. This + * protects us in the case that the failure is in the + * messaging system itself */ +static void orted_abort(int error_code, char *fmt, ...) +{ + va_list arglist; + char *outmsg = NULL; + orte_plm_cmd_flag_t cmd; + opal_buffer_t *alert; + orte_vpid_t null=ORTE_VPID_INVALID; + orte_proc_state_t state = ORTE_PROC_STATE_CALLED_ABORT; + orte_timer_t *timer; + int rc; + + /* only do this once */ + if (orte_abnormal_term_ordered) { + return; + } + + /* set the aborting flag */ + orte_abnormal_term_ordered = true; + + /* If there was a message, construct it */ + va_start(arglist, fmt); + if (NULL != fmt) { + vasprintf(&outmsg, fmt, arglist); + } + va_end(arglist); + + /* use the show-help system to get the message out */ + orte_show_help("help-errmgr-base.txt", "simple-message", true, outmsg); + + /* tell the HNP we are in distress */ + alert = OBJ_NEW(opal_buffer_t); + /* pack update state command */ + cmd = ORTE_PLM_UPDATE_PROC_STATE; + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &ORTE_PROC_MY_NAME->jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack our vpid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &ORTE_PROC_MY_NAME->vpid, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack our pid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &orte_process_info.pid, 1, OPAL_PID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack our state */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &state, 1, ORTE_PROC_STATE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack our exit code */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &error_code, 1, ORTE_EXIT_CODE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* flag that this job is complete so the receiver can know */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &null, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + + /* send it */ + if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, + ORTE_PROC_MY_HNP, alert, + ORTE_RML_TAG_PLM, + orte_rml_send_callback, NULL))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + /* we can't communicate, so give up */ + orte_quit(0, 0, NULL); + return; + } + + cleanup: + /* set a timer for exiting - this also gives the message a chance + * to get out! */ + if (NULL == (timer = OBJ_NEW(orte_timer_t))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return; + } + timer->tv.tv_sec = 5; + timer->tv.tv_usec = 0; + opal_event_evtimer_set(orte_event_base, timer->ev, wakeup, NULL); + opal_event_set_priority(timer->ev, ORTE_ERROR_PRI); + ORTE_POST_OBJECT(timer); + opal_event_evtimer_add(timer->ev, &timer->tv); + +} + static void job_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; @@ -131,6 +239,8 @@ static void job_errors(int fd, short args, void *cbdata) orte_plm_cmd_flag_t cmd; opal_buffer_t *alert; + ORTE_ACQUIRE_OBJECT(caddy); + /* * if orte is trying to shutdown, just let it */ @@ -217,6 +327,8 @@ static void proc_errors(int fd, short args, void *cbdata) int rc=ORTE_SUCCESS; int i; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default_orted:proc_errors process %s error state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -259,7 +371,7 @@ static void proc_errors(int fd, short args, void *cbdata) /* terminate - our routed children will see * us leave and automatically die */ - ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + orte_quit(0, 0, NULL); goto cleanup; } @@ -607,30 +719,10 @@ static void proc_errors(int fd, short args, void *cbdata) return; } - cleanup: + cleanup: OBJ_RELEASE(caddy); } -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int ft_event(int state) -{ - return ORTE_SUCCESS; -} - - /***************** * Local Functions *****************/ diff --git a/orte/mca/errmgr/default_tool/errmgr_default_tool.c b/orte/mca/errmgr/default_tool/errmgr_default_tool.c index 7ecc82a916c..68dba9cfe34 100644 --- a/orte/mca/errmgr/default_tool/errmgr_default_tool.c +++ b/orte/mca/errmgr/default_tool/errmgr_default_tool.c @@ -31,6 +31,7 @@ #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" #include "orte/mca/odls/odls_types.h" @@ -54,17 +55,11 @@ static int abort_peers(orte_process_name_t *procs, * HNP module ******************/ orte_errmgr_base_module_t orte_errmgr_default_tool_module = { - init, - finalize, - orte_errmgr_base_log, - orte_errmgr_base_abort, - abort_peers, - NULL, - NULL, - NULL, - orte_errmgr_base_register_migration_warning, - orte_errmgr_base_register_error_callback, - orte_errmgr_base_execute_error_callbacks + .init= init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = orte_errmgr_base_abort, + .abort_peers = abort_peers }; static void proc_errors(int fd, short args, void *cbdata); @@ -89,6 +84,8 @@ static void proc_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_tool: proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -106,6 +103,7 @@ static void proc_errors(int fd, short args, void *cbdata) /* if we lost our lifeline, then just stop the event loop * so the main program can cleanly terminate */ if (ORTE_PROC_STATE_LIFELINE_LOST == caddy->proc_state) { + ORTE_POST_OBJECT(caddy); orte_event_base_active = false; } else { /* all other errors require abort */ diff --git a/orte/mca/errmgr/dvm/errmgr_dvm.c b/orte/mca/errmgr/dvm/errmgr_dvm.c index ccb2684e738..60604e15346 100644 --- a/orte/mca/errmgr/dvm/errmgr_dvm.c +++ b/orte/mca/errmgr/dvm/errmgr_dvm.c @@ -50,6 +50,7 @@ #include "orte/util/proc_info.h" #include "orte/util/show_help.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_locks.h" @@ -65,32 +66,15 @@ static int init(void); static int finalize(void); -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map); - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list); - -static int ft_event(int state); - - /****************** * dvm module ******************/ orte_errmgr_base_module_t orte_errmgr_dvm_module = { - init, - finalize, - orte_errmgr_base_log, - orte_errmgr_base_abort, - orte_errmgr_base_abort_peers, - predicted_fault, - suggest_map_targets, - ft_event, - orte_errmgr_base_register_migration_warning, - NULL, - orte_errmgr_base_execute_error_callbacks + .init = init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = orte_errmgr_base_abort, + .abort_peers = orte_errmgr_base_abort_peers }; @@ -146,6 +130,8 @@ static void job_errors(int fd, short args, void *cbdata) int32_t rc, ret; int room, *rmptr; + ORTE_ACQUIRE_OBJECT(caddy); + /* * if orte is trying to shutdown, just let it */ @@ -248,6 +234,8 @@ static void proc_errors(int fd, short args, void *cbdata) int32_t i32, *i32ptr; char *rtmod; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:dvm: for proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -386,7 +374,7 @@ static void proc_errors(int fd, short args, void *cbdata) } } - keep_going: + keep_going: /* ensure we record the failed proc properly so we can report * the error once we terminate */ @@ -643,22 +631,3 @@ static void proc_errors(int fd, short args, void *cbdata) cleanup: OBJ_RELEASE(caddy); } - -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int ft_event(int state) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/errmgr.h b/orte/mca/errmgr/errmgr.h index de27a379195..e9dd10db21d 100644 --- a/orte/mca/errmgr/errmgr.h +++ b/orte/mca/errmgr/errmgr.h @@ -14,7 +14,7 @@ * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * @@ -63,70 +63,6 @@ BEGIN_C_DECLS -/* - * Structure to describe a predicted process fault. - * - * This can be expanded in the future to support assurance levels, and - * additional information that may wish to be conveyed. - */ -struct orte_errmgr_predicted_proc_t { - /** This is an object, so must have a super */ - opal_list_item_t super; - - /** Process Name */ - orte_process_name_t proc_name; -}; -typedef struct orte_errmgr_predicted_proc_t orte_errmgr_predicted_proc_t; -OBJ_CLASS_DECLARATION(orte_errmgr_predicted_proc_t); - -/* - * Structure to describe a predicted node fault. - * - * This can be expanded in the future to support assurance levels, and - * additional information that may wish to be conveyed. - */ -struct orte_errmgr_predicted_node_t { - /** This is an object, so must have a super */ - opal_list_item_t super; - - /** Node Name */ - char * node_name; -}; -typedef struct orte_errmgr_predicted_node_t orte_errmgr_predicted_node_t; -OBJ_CLASS_DECLARATION(orte_errmgr_predicted_node_t); - -/* - * Structure to describe a suggested remapping element for a predicted fault. - * - * This can be expanded in the future to support weights , and - * additional information that may wish to be conveyed. - */ -struct orte_errmgr_predicted_map_t { - /** This is an object, so must have a super */ - opal_list_item_t super; - - /** Process Name (predicted to fail) */ - orte_process_name_t proc_name; - - /** Node Name (predicted to fail) */ - char * node_name; - - /** Process Name (Map to) */ - orte_process_name_t map_proc_name; - - /** Node Name (Map to) */ - char * map_node_name; - - /** Just off current node */ - bool off_current_node; - - /** Pre-map fixed node assignment */ - char * pre_map_fixed_node; -}; -typedef struct orte_errmgr_predicted_map_t orte_errmgr_predicted_map_t; -OBJ_CLASS_DECLARATION(orte_errmgr_predicted_map_t); - - /* * Macro definitions */ @@ -183,84 +119,6 @@ typedef int (*orte_errmgr_base_module_abort_peers_fn_t)(orte_process_name_t *pro orte_std_cntr_t num_procs, int error_code); -/** - * Predicted process/node failure notification - * - * @param[in] proc_list List of processes (or NULL if none) - * @param[in] node_list List of nodes (or NULL if none) - * @param[in] suggested_map List of mapping suggestions to use on recovery (or NULL if none) - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_errmgr_base_module_predicted_fault_fn_t)(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map); - -/** - * Suggest a node to map a restarting process onto - * - * @param[in] proc Process that is being mapped - * @param[in] oldnode Previous node where this process resided - * @param[in|out] node_list List of nodes to select from - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_errmgr_base_module_suggest_map_targets_fn_t)(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list); - -/** - * Handle fault tolerance updates - * - * @param[in] state Fault tolerance state update - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_errmgr_base_module_ft_event_fn_t)(int state); - -/** - * Function to perform actions that require the rest of the ORTE layer to be up - * and running. - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecified error occured - */ -typedef void (*orte_errmgr_base_module_register_migration_warning_fn_t)(struct timeval *tv); - -typedef enum { - ORTE_ERRMGR_CALLBACK_FIRST, - ORTE_ERRMGR_CALLBACK_LAST, - ORTE_ERRMGR_CALLBACK_PREPEND, - ORTE_ERRMGR_CALLBACK_APPEND -} orte_errmgr_error_order_t; - -/** - * Register a callback function for faults. - * - * This callback function will be used anytime (other than during finalize) the - * runtime detects and handles a critical failure. The runtime will complete all - * its stabilization before cycling thru all registered callbacks. The order of - * the callbacks will proceed in the indicated order with which they were registered. - * - * The parameter to the callback function will be the orte_process_name_t - * of the process involved in the error. - * - * @param[in] cbfunc The callback function. - * - */ -typedef struct { - orte_process_name_t proc; - int errcode; -} orte_error_t; - -typedef int (orte_errmgr_error_callback_fn_t)(opal_pointer_array_t *errors); -typedef int (*orte_errmgr_base_module_register_error_callback_fn_t)(orte_errmgr_error_callback_fn_t *cbfunc, - orte_errmgr_error_order_t order); -typedef void (*orte_errmgr_base_module_execute_error_callbacks_fn_t)(opal_pointer_array_t *errors); - /* * Module Structure */ @@ -273,21 +131,6 @@ struct orte_errmgr_base_module_2_3_0_t { orte_errmgr_base_module_log_fn_t logfn; orte_errmgr_base_module_abort_fn_t abort; orte_errmgr_base_module_abort_peers_fn_t abort_peers; - - /** Predicted process/node failure notification */ - orte_errmgr_base_module_predicted_fault_fn_t predicted_fault; - /** Suggest a node to map a restarting process onto */ - orte_errmgr_base_module_suggest_map_targets_fn_t suggest_map_targets; - - /** Handle any FT Notifications */ - orte_errmgr_base_module_ft_event_fn_t ft_event; - - /* Register to be warned of impending migration */ - orte_errmgr_base_module_register_migration_warning_fn_t register_migration_warning; - - /* Register a callback function */ - orte_errmgr_base_module_register_error_callback_fn_t register_error_callback; - orte_errmgr_base_module_execute_error_callbacks_fn_t execute_error_callbacks; }; typedef struct orte_errmgr_base_module_2_3_0_t orte_errmgr_base_module_2_3_0_t; typedef orte_errmgr_base_module_2_3_0_t orte_errmgr_base_module_t; diff --git a/orte/mca/ess/base/Makefile.am b/orte/mca/ess/base/Makefile.am index 9e2d31367a5..db1903699cf 100644 --- a/orte/mca/ess/base/Makefile.am +++ b/orte/mca/ess/base/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,10 +26,9 @@ headers += \ libmca_ess_la_SOURCES += \ base/ess_base_frame.c \ base/ess_base_select.c \ - base/ess_base_get.c \ - base/ess_base_std_tool.c \ - base/ess_base_std_app.c \ - base/ess_base_std_orted.c \ + base/ess_base_get.c \ + base/ess_base_std_tool.c \ + base/ess_base_std_app.c \ + base/ess_base_std_orted.c \ base/ess_base_std_prolog.c \ base/ess_base_fns.c - diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index 2fefed08455..de3734b0ed3 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -52,6 +52,7 @@ ORTE_DECLSPEC extern int orte_ess_base_std_buffering; ORTE_DECLSPEC extern int orte_ess_base_num_procs; ORTE_DECLSPEC extern char *orte_ess_base_jobid; ORTE_DECLSPEC extern char *orte_ess_base_vpid; +ORTE_DECLSPEC extern opal_list_t orte_ess_base_signals; /* * Internal helper functions used by components @@ -82,6 +83,13 @@ ORTE_DECLSPEC int orte_ess_env_put(orte_std_cntr_t num_procs, orte_std_cntr_t num_local_procs, char ***env); +typedef struct { + opal_list_item_t super; + char *signame; + int signal; +} orte_ess_base_signal_t; +OBJ_CLASS_DECLARATION(orte_ess_base_signal_t); + END_C_DECLS #endif diff --git a/orte/mca/ess/base/ess_base_frame.c b/orte/mca/ess/base/ess_base_frame.c index c05f6b7d6ff..0eba2c98e91 100644 --- a/orte/mca/ess/base/ess_base_frame.c +++ b/orte/mca/ess/base/ess_base_frame.c @@ -9,8 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,9 +23,13 @@ #include "orte_config.h" #include "orte/constants.h" +#include + #include "orte/mca/mca.h" +#include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" +#include "orte/util/show_help.h" #include "orte/mca/ess/base/base.h" @@ -46,6 +51,7 @@ int orte_ess_base_std_buffering = -1; int orte_ess_base_num_procs = -1; char *orte_ess_base_jobid = NULL; char *orte_ess_base_vpid = NULL; +opal_list_t orte_ess_base_signals = {0}; static mca_base_var_enum_value_t stream_buffering_values[] = { {-1, "default"}, @@ -55,6 +61,9 @@ static mca_base_var_enum_value_t stream_buffering_values[] = { {0, NULL} }; +static int setup_signals(void); +static char *forwarded_signals = NULL; + static int orte_ess_base_register(mca_base_register_flag_t flags) { mca_base_var_enum_t *new_enum; @@ -96,16 +105,38 @@ static int orte_ess_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &orte_ess_base_num_procs); mca_base_var_register_synonym(ret, "orte", "orte", "ess", "num_procs", 0); + forwarded_signals = NULL; + ret = mca_base_var_register ("orte", "ess", "base", "forward_signals", + "Comma-delimited list of additional signals (names or integers) to forward to " + "application processes [\"none\" => forward nothing]. Signals provided by " + "default include SIGTSTP, SIGUSR1, SIGUSR2, SIGABRT, SIGALRM, and SIGCONT", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY, + &forwarded_signals); + mca_base_var_register_synonym(ret, "orte", "ess", "hnp", "forward_signals", 0); + + return ORTE_SUCCESS; } static int orte_ess_base_close(void) { + OPAL_LIST_DESTRUCT(&orte_ess_base_signals); + return mca_base_framework_components_close(&orte_ess_base_framework, NULL); } static int orte_ess_base_open(mca_base_open_flag_t flags) { + int rc; + + OBJ_CONSTRUCT(&orte_ess_base_signals, opal_list_t); + + if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { + if (ORTE_SUCCESS != (rc = setup_signals())) { + return rc; + } + } return mca_base_framework_components_open(&orte_ess_base_framework, flags); } @@ -113,4 +144,161 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, ess, "ORTE Environmenal System Setup", orte_ess_base_register, orte_ess_base_open, orte_ess_base_close, mca_ess_base_static_components, 0); +/* signal forwarding */ + +/* setup signal forwarding list */ +struct known_signal { + /** signal number */ + int signal; + /** signal name */ + char *signame; + /** can this signal be forwarded */ + bool can_forward; +}; + +static struct known_signal known_signals[] = { + {SIGTERM, "SIGTERM", false}, + {SIGHUP, "SIGHUP", false}, + {SIGINT, "SIGINT", false}, + {SIGKILL, "SIGKILL", false}, +#ifdef SIGSYS + {SIGSYS, "SIGSYS", true}, +#endif +#ifdef SIGXCPU + {SIGXCPU, "SIGXCPU", true}, +#endif + {SIGXFSZ, "SIGXFSZ", true}, +#ifdef SIGVTALRM + {SIGVTALRM, "SIGVTALRM", true}, +#endif +#ifdef SIGPROF + {SIGPROF, "SIGPROF", true}, +#endif +#ifdef SIGINFO + {SIGINFO, "SIGINFO", true}, +#endif +#ifdef SIGPWR + {SIGPWR, "SIGPWR", true}, +#endif +#ifdef SIGURG + {SIGURG, "SIGURG", true}, +#endif +#ifdef SIGUSR1 + {SIGUSR1, "SIGUSR1", true}, +#endif +#ifdef SIGUSR2 + {SIGUSR2, "SIGUSR2", true}, +#endif + {0, NULL}, +}; + +#define ESS_ADDSIGNAL(x, s) \ + do { \ + orte_ess_base_signal_t *_sig; \ + _sig = OBJ_NEW(orte_ess_base_signal_t); \ + _sig->signal = (x); \ + _sig->signame = strdup((s)); \ + opal_list_append(&orte_ess_base_signals, &_sig->super); \ + } while(0) + +static int setup_signals(void) +{ + int i, sval, nsigs; + char **signals, *tmp; + orte_ess_base_signal_t *sig; + bool ignore, found; + + /* if they told us "none", then nothing to do */ + if (NULL != forwarded_signals && + 0 == strcmp(forwarded_signals, "none")) { + return ORTE_SUCCESS; + } + + /* we know that some signals are (nearly) always defined, regardless + * of environment, so add them here */ + nsigs = sizeof(known_signals) / sizeof(struct known_signal); + for (i=0; i < nsigs; i++) { + if (known_signals[i].can_forward) { + ESS_ADDSIGNAL(known_signals[i].signal, known_signals[i].signame); + } + } + + /* see if they asked for anything beyond those - note that they may + * have asked for some we already cover, and so we ignore any duplicates */ + if (NULL != forwarded_signals) { + /* if they told us "none", then dump the list */ + signals = opal_argv_split(forwarded_signals, ','); + for (i=0; NULL != signals[i]; i++) { + sval = 0; + if (0 != strncmp(signals[i], "SIG", 3)) { + /* treat it like a number */ + errno = 0; + sval = strtoul(signals[i], &tmp, 10); + if (0 != errno || '\0' != *tmp) { + orte_show_help("help-ess-base.txt", "ess-base:unknown-signal", + true, signals[i], forwarded_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + } + + /* see if it is one we already covered */ + ignore = false; + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { + if (0 == strcasecmp(signals[i], sig->signame) || sval == sig->signal) { + /* got it - we will ignore */ + ignore = true; + break; + } + } + + if (ignore) { + continue; + } + + /* see if they gave us a signal name */ + found = false; + for (int j = 0 ; known_signals[j].signame ; ++j) { + if (0 == strcasecmp (signals[i], known_signals[j].signame) || sval == known_signals[j].signal) { + if (!known_signals[j].can_forward) { + orte_show_help("help-ess-base.txt", "ess-base:cannot-forward", + true, known_signals[j].signame, forwarded_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + found = true; + ESS_ADDSIGNAL(known_signals[j].signal, known_signals[j].signame); + break; + } + } + + if (!found) { + if (0 == strncmp(signals[i], "SIG", 3)) { + orte_show_help("help-ess-base.txt", "ess-base:unknown-signal", + true, signals[i], forwarded_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + ESS_ADDSIGNAL(sval, signals[i]); + } + } + opal_argv_free (signals); + } + return ORTE_SUCCESS; +} + +/* instantiate the class */ +static void scon(orte_ess_base_signal_t *t) +{ + t->signame = NULL; +} +static void sdes(orte_ess_base_signal_t *t) +{ + if (NULL != t->signame) { + free(t->signame); + } +} +OBJ_CLASS_INSTANCE(orte_ess_base_signal_t, + opal_list_item_t, + scon, sdes); diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 5fff0ce3d01..79e3a1fe486 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -12,7 +12,7 @@ * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -223,13 +223,21 @@ int orte_ess_base_app_setup(bool db_restrict_local) OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - orte_mgmt_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_mgmt_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - orte_coll_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_coll_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); /* diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index a3e3e2d44fc..167c308ae16 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -424,13 +424,21 @@ int orte_ess_base_orted_setup(void) OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - orte_mgmt_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_mgmt_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - orte_coll_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_coll_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); /* add our contact info to our proc object */ diff --git a/orte/mca/ess/base/help-ess-base.txt b/orte/mca/ess/base/help-ess-base.txt index 257a64a7279..ba33cb2d165 100644 --- a/orte/mca/ess/base/help-ess-base.txt +++ b/orte/mca/ess/base/help-ess-base.txt @@ -10,6 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -32,3 +33,19 @@ and got the error %s. This could mean that your PATH or executable name is wrong, or that you do not have the necessary permissions. Please ensure that the executable is able to be found and executed as it is required for singleton operations. +[ess-base:cannot-forward] +The system does not support trapping and forwarding of the +specified signal: + + signal: %s + param: %s + +Please remove that signal from the ess_base_forward_signals MCA parameter. +[ess-base:unknown-signal] +The following signal was included in the ess_base_forward_signals +MCA parameter: + + signal: %s + param: %s + +This is not a recognized signal value. Please fix or remove it. diff --git a/orte/mca/ess/hnp/Makefile.am b/orte/mca/ess/hnp/Makefile.am index 88a92ed56fc..4280bb0472b 100644 --- a/orte/mca/ess/hnp/Makefile.am +++ b/orte/mca/ess/hnp/Makefile.am @@ -20,8 +20,6 @@ # $HEADER$ # -dist_ortedata_DATA = help-ess-hnp.txt - sources = \ ess_hnp.h \ ess_hnp_component.c \ diff --git a/orte/mca/ess/hnp/ess_hnp.h b/orte/mca/ess/hnp/ess_hnp.h index 0c177210ef5..a26321edcd1 100644 --- a/orte/mca/ess/hnp/ess_hnp.h +++ b/orte/mca/ess/hnp/ess_hnp.h @@ -28,19 +28,8 @@ BEGIN_C_DECLS /* * Module open / close */ -typedef struct { - opal_list_item_t super; - char *signame; - int signal; -} ess_hnp_signal_t; -OBJ_CLASS_DECLARATION(ess_hnp_signal_t); -typedef struct { - orte_ess_base_component_t base; - opal_list_t signals; -} orte_ess_hnp_component_t; - -ORTE_MODULE_DECLSPEC extern orte_ess_hnp_component_t mca_ess_hnp_component; +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_hnp_component; END_C_DECLS diff --git a/orte/mca/ess/hnp/ess_hnp_component.c b/orte/mca/ess/hnp/ess_hnp_component.c index 84d8d4da191..b6b33476640 100644 --- a/orte/mca/ess/hnp/ess_hnp_component.c +++ b/orte/mca/ess/hnp/ess_hnp_component.c @@ -41,187 +41,36 @@ #include "orte/runtime/orte_globals.h" extern orte_ess_base_module_t orte_ess_hnp_module; -static int hnp_component_register (void); static int hnp_component_open(void); static int hnp_component_close(void); static int hnp_component_query(mca_base_module_t **module, int *priority); -struct known_signal { - /** signal number */ - int signal; - /** signal name */ - char *signame; - /** can this signal be forwarded */ - bool can_forward; -}; - -static struct known_signal known_signals[] = { - {SIGTERM, "SIGTERM", false}, - {SIGHUP, "SIGHUP", false}, - {SIGINT, "SIGINT", false}, - {SIGKILL, "SIGKILL", false}, -#ifdef SIGSYS - {SIGSYS, "SIGSYS", true}, -#endif -#ifdef SIGXCPU - {SIGXCPU, "SIGXCPU", true}, -#endif - {SIGXFSZ, "SIGXFSZ", true}, -#ifdef SIGVTALRM - {SIGVTALRM, "SIGVTALRM", true}, -#endif -#ifdef SIGPROF - {SIGPROF, "SIGPROF", true}, -#endif -#ifdef SIGINFO - {SIGINFO, "SIGINFO", true}, -#endif -#ifdef SIGPWR - {SIGPWR, "SIGPWR", true}, -#endif - {0, NULL}, -}; - /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_ess_hnp_component_t mca_ess_hnp_component = { - .base = { - .base_version = { - ORTE_ESS_BASE_VERSION_3_0_0, - - /* Component name and version */ - .mca_component_name = "hnp", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = hnp_component_open, - .mca_close_component = hnp_component_close, - .mca_query_component = hnp_component_query, - .mca_register_component_params = hnp_component_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } +orte_ess_base_component_t mca_ess_hnp_component = { + .base_version = { + ORTE_ESS_BASE_VERSION_3_0_0, + + /* Component name and version */ + .mca_component_name = "hnp", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = hnp_component_open, + .mca_close_component = hnp_component_close, + .mca_query_component = hnp_component_query + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT } }; -static char *additional_signals; - -static int hnp_component_register (void) -{ - additional_signals = NULL; - (void) mca_base_component_var_register (&mca_ess_hnp_component.base.base_version, - "forward_signals", "Comma-delimited list " - "of additional signals (names or integers) to forward to " - "application processes [\"none\" => forward nothing]", MCA_BASE_VAR_TYPE_STRING, - NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY, - &additional_signals); - - return ORTE_SUCCESS; -} - -#define ESS_ADDSIGNAL(x, s) \ - do { \ - ess_hnp_signal_t *_sig; \ - _sig = OBJ_NEW(ess_hnp_signal_t); \ - _sig->signal = (x); \ - _sig->signame = strdup((s)); \ - opal_list_append(&mca_ess_hnp_component.signals, &_sig->super); \ - } while(0) - static int hnp_component_open(void) { - int i, sval; - char **signals, *tmp; - ess_hnp_signal_t *sig; - bool ignore, found; - - OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t); - - /* we know that some signals are (nearly) always defined, regardless - * of environment, so add them here */ - ESS_ADDSIGNAL(SIGTSTP, "SIGTSTP"); - ESS_ADDSIGNAL(SIGUSR1, "SIGUSR1"); - ESS_ADDSIGNAL(SIGUSR2, "SIGUSR2"); - ESS_ADDSIGNAL(SIGABRT, "SIGABRT"); - ESS_ADDSIGNAL(SIGALRM, "SIGALRM"); - ESS_ADDSIGNAL(SIGCONT, "SIGCONT"); -#ifdef SIGURG - ESS_ADDSIGNAL(SIGURG, "SIGURG"); -#endif - - /* see if they asked for anything beyond those - note that they may - * have asked for some we already cover, and so we ignore any duplicates */ - if (NULL != additional_signals) { - /* if they told us "none", then dump the list */ - if (0 == strcmp(additional_signals, "none")) { - OPAL_LIST_DESTRUCT(&mca_ess_hnp_component.signals); - /* need to reconstruct it for when we close */ - OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t); - return ORTE_SUCCESS; - } - signals = opal_argv_split(additional_signals, ','); - for (i=0; NULL != signals[i]; i++) { - sval = 0; - if (0 != strncmp(signals[i], "SIG", 3)) { - /* treat it like a number */ - errno = 0; - sval = strtoul(signals[i], &tmp, 10); - if (0 != errno || '\0' != *tmp) { - orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal", - true, signals[i], additional_signals); - opal_argv_free(signals); - return OPAL_ERR_SILENT; - } - } - - /* see if it is one we already covered */ - ignore = false; - OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { - if (0 == strcasecmp(signals[i], sig->signame) || sval == sig->signal) { - /* got it - we will ignore */ - ignore = true; - break; - } - } - - if (ignore) { - continue; - } - - /* see if they gave us a signal name */ - found = false; - for (int j = 0 ; known_signals[j].signame ; ++j) { - if (0 == strcasecmp (signals[i], known_signals[j].signame) || sval == known_signals[j].signal) { - if (!known_signals[j].can_forward) { - orte_show_help("help-ess-hnp.txt", "ess-hnp:cannot-forward", - true, known_signals[j].signame, additional_signals); - opal_argv_free(signals); - return OPAL_ERR_SILENT; - } - found = true; - ESS_ADDSIGNAL(known_signals[j].signal, known_signals[j].signame); - break; - } - } - - if (!found) { - if (0 == strncmp(signals[i], "SIG", 3)) { - orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal", - true, signals[i], additional_signals); - opal_argv_free(signals); - return OPAL_ERR_SILENT; - } - - ESS_ADDSIGNAL(sval, signals[i]); - } - } - opal_argv_free (signals); - } return ORTE_SUCCESS; } @@ -250,18 +99,3 @@ static int hnp_component_close(void) { return ORTE_SUCCESS; } - -/* instantiate the class */ -static void scon(ess_hnp_signal_t *t) -{ - t->signame = NULL; -} -static void sdes(ess_hnp_signal_t *t) -{ - if (NULL != t->signame) { - free(t->signame); - } -} -OBJ_CLASS_INSTANCE(ess_hnp_signal_t, - opal_list_item_t, - scon, sdes); diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 6e5b221d4e9..f240daaa38c 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -149,7 +149,7 @@ static int rte_init(void) int idx; orte_topology_t *t; opal_list_t transports; - ess_hnp_signal_t *sig; + orte_ess_base_signal_t *sig; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -193,7 +193,7 @@ static int rte_init(void) signal(SIGHUP, abort_signal_callback); /** setup callbacks for signals we should forward */ - if (0 < (idx = opal_list_get_size(&mca_ess_hnp_component.signals))) { + if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) { forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx); if (NULL == forward_signals_events) { ret = ORTE_ERR_OUT_OF_RESOURCE; @@ -201,7 +201,7 @@ static int rte_init(void) goto error; } idx = 0; - OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback); ++idx; } @@ -355,13 +355,21 @@ static int rte_init(void) OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - orte_mgmt_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_mgmt_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - orte_coll_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_coll_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); /* @@ -789,7 +797,7 @@ static int rte_finalize(void) char *contact_path; orte_job_t *jdata; uint32_t key; - ess_hnp_signal_t *sig; + orte_ess_base_signal_t *sig; unsigned int i; if (signals_set) { @@ -799,7 +807,7 @@ static int rte_finalize(void) opal_event_del(&term_handler); /** Remove the USR signal handlers */ i = 0; - OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { opal_event_signal_del(forward_signals_events + i); ++i; } diff --git a/orte/mca/ess/hnp/help-ess-hnp.txt b/orte/mca/ess/hnp/help-ess-hnp.txt deleted file mode 100644 index 7bce2ccbb62..00000000000 --- a/orte/mca/ess/hnp/help-ess-hnp.txt +++ /dev/null @@ -1,27 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for the SDS base. -# -[ess-hnp:cannot-forward] -The system does not support trapping and forwarding of the -specified signal: - - signal: %s - param: %s - -Please remove that signal from the ess_hnp_forward_signals MCA parameter. -[ess-hnp:unknown-signal] -The following signal was included in the ess_hnp_forward_signals -MCA parameter: - - signal: %s - param: %s - -This is not a recognized signal value. Please fix or remove it. diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 6ed504f3413..4ad414236af 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -124,7 +124,7 @@ static int rte_init(void) /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); /* initialize the selected module */ - if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { + if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) { /* we cannot run */ error = "pmix init"; goto error; diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 2f2e5376ac8..6ddca461244 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -189,7 +189,7 @@ static int rte_init(void) /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); /* initialize the selected module */ - if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { + if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) { /* we cannot run */ error = "pmix init"; goto error; diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index c645c4ecaa0..7982fe10aa0 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -39,6 +39,7 @@ #include "orte/util/regex.h" #include "orte/util/show_help.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" @@ -58,10 +59,24 @@ orte_ess_base_module_t orte_ess_slurm_module = { NULL /* ft_event */ }; +static void signal_forward_callback(int fd, short event, void *arg); +static opal_event_t *forward_signals_events = NULL; +static bool signals_set=false; + +static void setup_sighandler(int signal, opal_event_t *ev, + opal_event_cbfunc_t cbfunc) +{ + opal_event_signal_set(orte_event_base, ev, signal, cbfunc, ev); + opal_event_set_priority(ev, ORTE_ERROR_PRI); + opal_event_signal_add(ev, NULL); +} + static int rte_init(void) { int ret; char *error = NULL; + orte_ess_base_signal_t *sig; + int idx; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -76,11 +91,29 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { + /** setup callbacks for signals we should forward */ + if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) { + forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx); + if (NULL == forward_signals_events) { + ret = ORTE_ERR_OUT_OF_RESOURCE; + error = "unable to malloc"; + goto error; + } + idx = 0; + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { + setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback); + ++idx; + } + } + signals_set = true; + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } + /* setup the signal handlers */ + return ORTE_SUCCESS; } @@ -112,9 +145,23 @@ static int rte_init(void) static int rte_finalize(void) { int ret; + orte_ess_base_signal_t *sig; + unsigned int i; /* if I am a daemon, finalize using the default procedure */ if (ORTE_PROC_IS_DAEMON) { + if (signals_set) { + /** Remove the USR signal handlers */ + i = 0; + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { + opal_event_signal_del(forward_signals_events + i); + ++i; + } + free (forward_signals_events); + forward_signals_events = NULL; + signals_set = false; + } + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { ORTE_ERROR_LOG(ret); return ret; @@ -199,3 +246,52 @@ static int slurm_set_name(void) return ORTE_SUCCESS; } + +/* Pass user signals to the local application processes */ +static void signal_forward_callback(int fd, short event, void *arg) +{ + opal_event_t *signal = (opal_event_t*)arg; + int32_t signum, rc; + opal_buffer_t *cmd; + orte_daemon_cmd_flag_t command=ORTE_DAEMON_SIGNAL_LOCAL_PROCS; + orte_jobid_t job = ORTE_JOBID_WILDCARD; + + signum = OPAL_EVENT_SIGNAL(signal); + if (!orte_execute_quiet){ + fprintf(stderr, "%s: Forwarding signal %d to job\n", + orte_basename, signum); + } + + cmd = OBJ_NEW(opal_buffer_t); + + /* pack the command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* pack the signal */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &signum, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* send it to ourselves */ + if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, + ORTE_PROC_MY_NAME, cmd, + ORTE_RML_TAG_DAEMON, + NULL, NULL))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + } + +} diff --git a/orte/mca/filem/base/filem_base_frame.c b/orte/mca/filem/base/filem_base_frame.c index b4a8479ee8a..5ee6219d8f9 100644 --- a/orte/mca/filem/base/filem_base_frame.c +++ b/orte/mca/filem/base/filem_base_frame.c @@ -9,6 +9,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,18 +33,18 @@ * Globals */ ORTE_DECLSPEC orte_filem_base_module_t orte_filem = { - orte_filem_base_module_init, - orte_filem_base_module_finalize, - orte_filem_base_none_put, - orte_filem_base_none_put_nb, - orte_filem_base_none_get, - orte_filem_base_none_get_nb, - orte_filem_base_none_rm, - orte_filem_base_none_rm_nb, - orte_filem_base_none_wait, - orte_filem_base_none_wait_all, - orte_filem_base_none_preposition_files, - orte_filem_base_none_link_local_files + .filem_init = orte_filem_base_module_init, + .filem_finalize = orte_filem_base_module_finalize, + .put = orte_filem_base_none_put, + .put_nb = orte_filem_base_none_put_nb, + .get = orte_filem_base_none_get, + .get_nb = orte_filem_base_none_get_nb, + .rm = orte_filem_base_none_rm, + .rm_nb = orte_filem_base_none_rm_nb, + .wait = orte_filem_base_none_wait, + .wait_all = orte_filem_base_none_wait_all, + .preposition_files = orte_filem_base_none_preposition_files, + .link_local_files = orte_filem_base_none_link_local_files }; bool orte_filem_base_is_active = false; @@ -69,4 +70,3 @@ static int orte_filem_base_open(mca_base_open_flag_t flags) MCA_BASE_FRAMEWORK_DECLARE(orte, filem, NULL, NULL, orte_filem_base_open, orte_filem_base_close, mca_filem_base_static_components, 0); - diff --git a/orte/mca/filem/raw/filem_raw_module.c b/orte/mca/filem/raw/filem_raw_module.c index c810998c624..90f7322b96d 100644 --- a/orte/mca/filem/raw/filem_raw_module.c +++ b/orte/mca/filem/raw/filem_raw_module.c @@ -2,7 +2,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -49,6 +49,7 @@ #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/base/base.h" @@ -61,14 +62,6 @@ static int raw_init(void); static int raw_finalize(void); -static int raw_put(orte_filem_base_request_t *req); -static int raw_put_nb(orte_filem_base_request_t *req); -static int raw_get(orte_filem_base_request_t *req); -static int raw_get_nb(orte_filem_base_request_t *req); -static int raw_rm(orte_filem_base_request_t *req); -static int raw_rm_nb(orte_filem_base_request_t *req); -static int raw_wait(orte_filem_base_request_t *req); -static int raw_wait_all(opal_list_t *reqs); static int raw_preposition_files(orte_job_t *jdata, orte_filem_completion_cbfunc_t cbfunc, void *cbdata); @@ -76,20 +69,20 @@ static int raw_link_local_files(orte_job_t *jdata, orte_app_context_t *app); orte_filem_base_module_t mca_filem_raw_module = { - raw_init, - raw_finalize, + .filem_init = raw_init, + .filem_finalize = raw_finalize, /* we don't use any of the following */ - raw_put, - raw_put_nb, - raw_get, - raw_get_nb, - raw_rm, - raw_rm_nb, - raw_wait, - raw_wait_all, + .put = orte_filem_base_none_put, + .put_nb = orte_filem_base_none_put_nb, + .get = orte_filem_base_none_get, + .get_nb = orte_filem_base_none_get_nb, + .rm = orte_filem_base_none_rm, + .rm_nb = orte_filem_base_none_rm_nb, + .wait = orte_filem_base_none_wait, + .wait_all = orte_filem_base_none_wait_all, /* now the APIs we *do* use */ - raw_preposition_files, - raw_link_local_files + .preposition_files = raw_preposition_files, + .link_local_files = raw_link_local_files }; static opal_list_t outbound_files; @@ -164,46 +157,6 @@ static int raw_finalize(void) return ORTE_SUCCESS; } -static int raw_put(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_put_nb(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_get(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_get_nb(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_rm(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_rm_nb(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_wait(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_wait_all(opal_list_t *reqs) -{ - return ORTE_SUCCESS; -} - static void xfer_complete(int status, orte_filem_raw_xfer_t *xfer) { orte_filem_raw_outbound_t *outbound = xfer->outbound; @@ -586,8 +539,9 @@ static int raw_preposition_files(orte_job_t *jdata, opal_list_append(&outbound->xfers, &xfer->super); opal_event_set(orte_event_base, &xfer->ev, fd, OPAL_EV_READ, send_chunk, xfer); opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_add(&xfer->ev, 0); xfer->pending = true; + ORTE_POST_OBJECT(xfer); + opal_event_add(&xfer->ev, 0); OBJ_RELEASE(item); } OBJ_DESTRUCT(&fsets); @@ -804,6 +758,8 @@ static void send_chunk(int fd, short argc, void *cbdata) opal_buffer_t chunk; orte_grpcomm_signature_t *sig; + ORTE_ACQUIRE_OBJECT(rev); + /* flag that event has fired */ rev->pending = false; @@ -815,6 +771,7 @@ static void send_chunk(int fd, short argc, void *cbdata) /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { + ORTE_POST_OBJECT(rev); opal_event_add(&rev->ev, 0); return; } @@ -891,8 +848,9 @@ static void send_chunk(int fd, short argc, void *cbdata) return; } else { /* restart the read event */ - opal_event_add(&rev->ev, 0); rev->pending = true; + ORTE_POST_OBJECT(rev); + opal_event_add(&rev->ev, 0); } } @@ -1116,7 +1074,8 @@ static void recv_files(int status, orte_process_name_t* sender, } } free(tmp); - opal_event_set(orte_event_base, &incoming->ev, incoming->fd, OPAL_EV_WRITE, write_handler, incoming); + opal_event_set(orte_event_base, &incoming->ev, incoming->fd, + OPAL_EV_WRITE, write_handler, incoming); opal_event_set_priority(&incoming->ev, ORTE_MSG_PRI); } /* create an output object for this data */ @@ -1135,8 +1094,9 @@ static void recv_files(int status, orte_process_name_t* sender, if (!incoming->pending) { /* add the event */ - opal_event_add(&incoming->ev, 0); incoming->pending = true; + ORTE_POST_OBJECT(incoming); + opal_event_add(&incoming->ev, 0); } /* cleanup */ @@ -1154,6 +1114,8 @@ static void write_handler(int fd, short event, void *cbdata) char homedir[MAXPATHLEN]; int rc; + ORTE_ACQUIRE_OBJECT(sink); + OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output, "%s write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1226,8 +1188,9 @@ static void write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready. */ - opal_event_add(&sink->ev, 0); sink->pending = true; + ORTE_POST_OBJECT(sink); + opal_event_add(&sink->ev, 0); return; } /* otherwise, something bad happened so all we can do is abort @@ -1250,8 +1213,9 @@ static void write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready */ - opal_event_add(&sink->ev, 0); sink->pending = true; + ORTE_POST_OBJECT(sink); + opal_event_add(&sink->ev, 0); return; } OBJ_RELEASE(output); diff --git a/orte/mca/grpcomm/base/grpcomm_base_stubs.c b/orte/mca/grpcomm/base/grpcomm_base_stubs.c index 7ff8e9afa5b..b787a502913 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_stubs.c +++ b/orte/mca/grpcomm/base/grpcomm_base_stubs.c @@ -44,6 +44,7 @@ #include "orte/mca/state/state.h" #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/grpcomm/grpcomm.h" @@ -144,6 +145,8 @@ static void allgather_stub(int fd, short args, void *cbdata) orte_grpcomm_coll_t *coll; uint32_t *seq_number; + ORTE_ACQUIRE_OBJECT(cd); + OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, "%s grpcomm:base:allgather stub", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -212,6 +215,7 @@ int orte_grpcomm_API_allgather(orte_grpcomm_signature_t *sig, cd->cbdata = cbdata; opal_event_set(orte_event_base, &cd->ev, -1, OPAL_EV_WRITE, allgather_stub, cd); opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); + ORTE_POST_OBJECT(cd); opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); return ORTE_SUCCESS; } diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index 0621d5db124..c247c854f4b 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -383,6 +383,10 @@ static void xcast_recv(int status, orte_process_name_t* sender, if (ORTE_DAEMON_EXIT_CMD == command || ORTE_DAEMON_HALT_VM_CMD == command) { orte_orteds_term_ordered = true; + if (ORTE_DAEMON_HALT_VM_CMD == command) { + /* this is an abnormal termination */ + orte_abnormal_term_ordered = true; + } /* copy the msg for relay to ourselves */ relay = OBJ_NEW(opal_buffer_t); /* repack the command */ @@ -522,18 +526,26 @@ static void xcast_recv(int status, orte_process_name_t* sender, */ jdata = orte_get_job_data_object(nm->name.jobid); if (NULL == (rec = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, nm->name.vpid))) { - opal_output(0, "%s grpcomm:direct:send_relay proc %s not found - cannot relay", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); + if (!orte_abnormal_term_ordered && !orte_orteds_term_ordered) { + opal_output(0, "%s grpcomm:direct:send_relay proc %s not found - cannot relay", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); + } OBJ_RELEASE(rly); OBJ_RELEASE(item); + ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); continue; } - if (ORTE_PROC_STATE_RUNNING < rec->state || + if ((ORTE_PROC_STATE_RUNNING < rec->state && + ORTE_PROC_STATE_CALLED_ABORT != rec->state) || !ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE)) { - opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); + if (!orte_abnormal_term_ordered && !orte_orteds_term_ordered) { + opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay: %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name), + ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE) ? orte_proc_state_to_str(rec->state) : "NOT ALIVE"); + } OBJ_RELEASE(rly); OBJ_RELEASE(item); + ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); continue; } if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(orte_coll_conduit, @@ -542,6 +554,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); OBJ_RELEASE(rly); OBJ_RELEASE(item); + ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); continue; } OBJ_RELEASE(item); diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index 1bd87921cb2..a67043ff53f 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -52,6 +52,7 @@ #include "orte/mca/iof/iof.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml_types.h" +#include "orte/util/threads.h" BEGIN_C_DECLS @@ -163,6 +164,7 @@ typedef struct orte_iof_base_t orte_iof_base_t; opal_event_set_priority(ep->wev->ev, ORTE_MSG_PRI); \ } \ *(snk) = ep; \ + ORTE_POST_OBJECT(ep); \ } while(0); /* add list of structs that has name of proc + orte_iof_tag_t - when @@ -192,6 +194,7 @@ typedef struct orte_iof_base_t orte_iof_base_t; opal_event_set_priority(rev->ev, ORTE_MSG_PRI); \ if ((actv)) { \ rev->active = true; \ + ORTE_POST_OBJECT(rev); \ opal_event_add(rev->ev, 0); \ } \ } while(0); diff --git a/orte/mca/iof/base/iof_base_output.c b/orte/mca/iof/base/iof_base_output.c index 24d9176f2ba..844a3fc6fc0 100644 --- a/orte/mca/iof/base/iof_base_output.c +++ b/orte/mca/iof/base/iof_base_output.c @@ -38,6 +38,7 @@ #include "opal/util/output.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/state/state.h" @@ -147,7 +148,7 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s output->numbytes = numbytes; goto process; -construct: + construct: starttaglen = strlen(starttag); endtaglen = strlen(endtag); endtagged = false; @@ -249,7 +250,7 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s } output->numbytes = k; -process: + process: /* add this data to the write list for this fd */ opal_list_append(&channel->outputs, &output->super); @@ -262,8 +263,9 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s write:output adding write event", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - opal_event_add(channel->ev, 0); channel->pending = true; + ORTE_POST_OBJECT(channel); + opal_event_add(channel->ev, 0); } return num_buffered; @@ -303,6 +305,8 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) orte_iof_write_output_t *output; int num_written; + ORTE_ACQUIRE_OBJECT(sink); + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -356,8 +360,8 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) } OBJ_RELEASE(output); } -ABORT: + ABORT: opal_event_del(wev->ev); wev->pending = false; - + ORTE_POST_OBJECT(wev); } diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index 249e84718ea..cbcddd0012d 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -47,6 +47,7 @@ #include "orte/mca/ess/ess.h" #include "orte/mca/rml/rml.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/iof/base/base.h" @@ -214,10 +215,13 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, } } proct->revstdout->active = true; + ORTE_POST_OBJECT(proct->revstdout); opal_event_add(proct->revstdout->ev, 0); proct->revstderr->active = true; + ORTE_POST_OBJECT(proct->revstderr); opal_event_add(proct->revstderr->ev, 0); proct->revstddiag->active = true; + ORTE_POST_OBJECT(proct->revstddiag); opal_event_add(proct->revstddiag->ev, 0); } return ORTE_SUCCESS; @@ -299,6 +303,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, */ if (!(src_tag & ORTE_IOF_STDIN) || orte_iof_hnp_stdin_check(fd)) { mca_iof_hnp_component.stdinev->active = true; + ORTE_POST_OBJECT(proct->revstdout); opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } } else { @@ -515,6 +520,8 @@ static void stdin_write_handler(int fd, short event, void *cbdata) orte_iof_write_output_t *output; int num_written; + ORTE_ACQUIRE_OBJECT(sink); + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s hnp:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -558,6 +565,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) * when the fd is ready. */ wev->pending = true; + ORTE_POST_OBJECT(wev); opal_event_add(wev->ev, 0); goto CHECK; } @@ -583,13 +591,14 @@ static void stdin_write_handler(int fd, short event, void *cbdata) * when the fd is ready. */ wev->pending = true; + ORTE_POST_OBJECT(wev); opal_event_add(wev->ev, 0); goto CHECK; } OBJ_RELEASE(output); } -CHECK: + CHECK: if (NULL != mca_iof_hnp_component.stdinev && !orte_abnormal_term_ordered && !mca_iof_hnp_component.stdinev->active) { @@ -610,6 +619,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "restarting read event")); mca_iof_hnp_component.stdinev->active = true; + ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } } diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index 8e73d3c72be..55978e527d0 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -35,6 +35,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/odls/odls_types.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -48,10 +49,13 @@ static void restart_stdin(int fd, short event, void *cbdata) { orte_timer_t *tm = (orte_timer_t*)cbdata; + ORTE_ACQUIRE_OBJECT(tm); + if (NULL != mca_iof_hnp_component.stdinev && !orte_job_term_ordered && !mca_iof_hnp_component.stdinev->active) { mca_iof_hnp_component.stdinev->active = true; + ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } @@ -74,7 +78,11 @@ bool orte_iof_hnp_stdin_check(int fd) void orte_iof_hnp_stdin_cb(int fd, short event, void *cbdata) { - bool should_process = orte_iof_hnp_stdin_check(0); + bool should_process; + + ORTE_ACQUIRE_OBJECT(mca_iof_hnp_component.stdinev); + + should_process = orte_iof_hnp_stdin_check(0); if (should_process) { mca_iof_hnp_component.stdinev->active = true; @@ -99,6 +107,8 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) bool exclusive; orte_iof_sink_t *sink; + ORTE_ACQUIRE_OBJECT(rev); + /* read up to the fragment size */ numbytes = read(fd, data, sizeof(data)); @@ -293,6 +303,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) } /* re-add the event */ + ORTE_POST_OBJECT(rev); opal_event_add(rev->ev, 0); return; diff --git a/orte/mca/iof/hnp/iof_hnp_receive.c b/orte/mca/iof/hnp/iof_hnp_receive.c index 5fd27a004a0..17307ba6f6d 100644 --- a/orte/mca/iof/hnp/iof_hnp_receive.c +++ b/orte/mca/iof/hnp/iof_hnp_receive.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,6 +41,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/iof/iof.h" @@ -81,6 +82,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, !orte_job_term_ordered && !mca_iof_hnp_component.stdinev->active) { mca_iof_hnp_component.stdinev->active = true; + ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } goto CLEAN_RETURN; diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index 266e5d2cc5e..ddfec3e073c 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -42,6 +42,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/rml/rml.h" @@ -190,10 +191,13 @@ static int orted_push(const orte_process_name_t* dst_name, */ if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) { proct->revstdout->active = true; + ORTE_POST_OBJECT(proct->revstdout); opal_event_add(proct->revstdout->ev, 0); proct->revstderr->active = true; + ORTE_POST_OBJECT(proct->revstderr); opal_event_add(proct->revstderr->ev, 0); proct->revstddiag->active = true; + ORTE_POST_OBJECT(proct->revstddiag); opal_event_add(proct->revstddiag->ev, 0); } return ORTE_SUCCESS; @@ -367,6 +371,8 @@ static void stdin_write_handler(int fd, short event, void *cbdata) orte_iof_write_output_t *output; int num_written; + ORTE_ACQUIRE_OBJECT(sink); + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s orted:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -400,6 +406,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) * when the fd is ready. */ wev->pending = true; + ORTE_POST_OBJECT(wev); opal_event_add(wev->ev, 0); goto CHECK; } @@ -430,6 +437,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) * when the fd is ready. */ wev->pending = true; + ORTE_POST_OBJECT(wev); opal_event_add(wev->ev, 0); goto CHECK; } diff --git a/orte/mca/iof/orted/iof_orted_read.c b/orte/mca/iof/orted/iof_orted_read.c index 4901285a449..728f21162ff 100644 --- a/orte/mca/iof/orted/iof_orted_read.c +++ b/orte/mca/iof/orted/iof_orted_read.c @@ -35,6 +35,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/odls/odls_types.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_globals.h" @@ -52,6 +53,8 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) int32_t numbytes; orte_iof_proc_t *proct = (orte_iof_proc_t*)rev->proc; + ORTE_ACQUIRE_OBJECT(rev); + /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); @@ -100,6 +103,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) } if (!proct->copy) { /* re-add the event */ + ORTE_POST_OBJECT(rev); opal_event_add(rev->ev, 0); return; } @@ -137,6 +141,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) orte_rml_send_callback, NULL); /* re-add the event */ + ORTE_POST_OBJECT(rev); opal_event_add(rev->ev, 0); return; diff --git a/orte/mca/notifier/base/notifier_base_fns.c b/orte/mca/notifier/base/notifier_base_fns.c index 61e139807ff..1a6751a2085 100644 --- a/orte/mca/notifier/base/notifier_base_fns.c +++ b/orte/mca/notifier/base/notifier_base_fns.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +25,7 @@ #include "opal/util/argv.h" #include "orte/util/attr.h" +#include "orte/util/threads.h" #include "orte/mca/notifier/base/base.h" @@ -38,6 +39,8 @@ void orte_notifier_base_log(int sd, short args, void *cbdata) orte_notifier_active_module_t *imod; int i; + ORTE_ACQUIRE_OBJECT(req); + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; @@ -74,6 +77,8 @@ void orte_notifier_base_event(int sd, short args, void *cbdata) orte_notifier_active_module_t *imod; int i; + ORTE_ACQUIRE_OBJECT(req); + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; @@ -110,6 +115,8 @@ void orte_notifier_base_report(int sd, short args, void *cbdata) orte_notifier_active_module_t *imod; int i; + ORTE_ACQUIRE_OBJECT(req); + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; diff --git a/orte/mca/notifier/notifier.h b/orte/mca/notifier/notifier.h index cc40297c574..d7ca73e7e69 100644 --- a/orte/mca/notifier/notifier.h +++ b/orte/mca/notifier/notifier.h @@ -13,7 +13,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All Rights Reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,6 +51,7 @@ #include "orte/types.h" #include "orte/runtime/orte_globals.h" +#include "orte/util/threads.h" BEGIN_C_DECLS @@ -63,7 +64,7 @@ ORTE_DECLSPEC extern int orte_notifier_debug_output; * The code has NOT been auditied for use of malloc, so this still * may fail to get the "OUT_OF_RESOURCE" message out. Oh Well. */ -#define ORTE_NOTIFIER_MAX_BUF 512 +#define ORTE_NOTIFIER_MAX_BUF 512 /* Severities */ typedef enum { @@ -136,6 +137,7 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ OPAL_EV_WRITE, orte_notifier_base_log, (_n)); \ opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ + ORTE_POST_OBJECT(_n); \ opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ } while(0); @@ -160,6 +162,7 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ OPAL_EV_WRITE, orte_notifier_base_report, (_n)); \ opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ + ORTE_POST_OBJECT(_n); \ opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ } while(0); @@ -183,6 +186,7 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ OPAL_EV_WRITE, orte_notifier_base_event, (_n)); \ opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ + ORTE_POST_OBJECT(_n); \ opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ } while(0); diff --git a/orte/mca/notifier/smtp/notifier_smtp_module.c b/orte/mca/notifier/smtp/notifier_smtp_module.c index 53a035fe881..666fd080281 100644 --- a/orte/mca/notifier/smtp/notifier_smtp_module.c +++ b/orte/mca/notifier/smtp/notifier_smtp_module.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,18 +50,10 @@ /* Static API's */ static void mylog(orte_notifier_base_severity_t severity, int errcode, const char *msg, va_list ap); -static void myhelplog(orte_notifier_base_severity_t severity, int errcode, - const char *filename, - const char *topic, va_list ap); -static void mypeerlog(orte_notifier_base_severity_t severity, int errcode, - orte_process_name_t *peer_proc, - const char *msg, va_list ap); /* Module */ orte_notifier_base_module_t orte_notifier_smtp_module = { - NULL, - NULL, - mylog, + .log = mylog }; typedef enum { diff --git a/orte/mca/notifier/syslog/notifier_syslog_module.c b/orte/mca/notifier/syslog/notifier_syslog_module.c index a8121685a33..d488ca392f0 100644 --- a/orte/mca/notifier/syslog/notifier_syslog_module.c +++ b/orte/mca/notifier/syslog/notifier_syslog_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,11 +48,11 @@ static void myreport(orte_notifier_request_t *req); /* Module def */ orte_notifier_base_module_t orte_notifier_syslog_module = { - init, - finalize, - mylog, - myevent, - myreport + .init = init, + .finalize = finalize, + .log = mylog, + .event = myevent, + .report = myreport }; @@ -130,4 +130,3 @@ static void myreport(orte_notifier_request_t *req) orte_job_state_to_str(req->state), (NULL == req->msg) ? "" : req->msg); } - diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 932980d3e15..54f1b53e00b 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -81,6 +81,7 @@ #include "orte/util/proc_info.h" #include "orte/util/nidmap.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/orted/orted.h" @@ -278,6 +279,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, int rc; orte_std_cntr_t cnt; orte_job_t *jdata=NULL, *daemons; + orte_node_t *node; int32_t n, k; opal_buffer_t *bptr; orte_proc_t *pptr, *dmn; @@ -435,7 +437,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, /* not ready for use yet */ continue; } - if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + if (!ORTE_PROC_IS_HNP && + orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { /* the parser will have already made the connection, but the fully described * case won't have done it, so connect the proc to its node here */ opal_output_verbose(5, orte_odls_base_framework.framework_output, @@ -456,6 +459,17 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } OBJ_RETAIN(dmn->node); pptr->node = dmn->node; + /* add the node to the job map, if needed */ + if (!ORTE_FLAG_TEST(pptr->node, ORTE_NODE_FLAG_MAPPED)) { + OBJ_RETAIN(pptr->node); + opal_pointer_array_add(jdata->map->nodes, pptr->node); + jdata->map->num_nodes++; + ORTE_FLAG_SET(pptr->node, ORTE_NODE_FLAG_MAPPED); + } + /* add this proc to that node */ + OBJ_RETAIN(pptr); + opal_pointer_array_add(pptr->node->procs, pptr); + pptr->node->num_procs++; } /* see if it belongs to us */ if (pptr->parent == ORTE_PROC_MY_NAME->vpid) { @@ -484,6 +498,14 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE); } } + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + /* reset the mapped flags */ + for (n=0; n < jdata->map->nodes->size; n++) { + if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + } if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { /* compute and save bindings of local children */ @@ -582,6 +604,8 @@ static void timer_cb(int fd, short event, void *cbdata) orte_timer_t *tm = (orte_timer_t*)cbdata; orte_odls_launch_local_t *ll = (orte_odls_launch_local_t*)tm->payload; + ORTE_ACQUIRE_OBJECT(tm); + /* increment the number of retries */ ll->retries++; @@ -629,6 +653,8 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) char *pathenv = NULL, *mpiexec_pathenv = NULL; char *full_search; + ORTE_ACQUIRE_OBJECT(cd); + /* thread-protect common values */ cd->env = opal_argv_copy(app->env); @@ -820,6 +846,8 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) opal_event_base_t *evb; char *effective_dir = NULL; + ORTE_ACQUIRE_OBJECT(caddy); + opal_output_verbose(5, orte_odls_base_framework.framework_output, "%s local:launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); diff --git a/orte/mca/odls/base/odls_base_frame.c b/orte/mca/odls/base/odls_base_frame.c index 919e303c6b5..810cf43131a 100644 --- a/orte/mca/odls/base/odls_base_frame.c +++ b/orte/mca/odls/base/odls_base_frame.c @@ -86,6 +86,15 @@ static int orte_odls_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &orte_odls_globals.num_threads); + orte_odls_globals.signal_direct_children_only = false; + (void) mca_base_var_register("orte", "odls", "base", "signal_direct_children_only", + "Whether to restrict signals (e.g., SIGTERM) to direct children, or " + "to apply them as well to any children spawned by those processes", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &orte_odls_globals.signal_direct_children_only); + return ORTE_SUCCESS; } diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index 4d93c2ceb2c..81cf44e30a6 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -62,6 +62,7 @@ typedef struct { opal_event_base_t **ev_bases; // event base array for progress threads char** ev_threads; // event progress thread names int next_base; // counter to load-level thread use + bool signal_direct_children_only; } orte_odls_globals_t; ORTE_DECLSPEC extern orte_odls_globals_t orte_odls_globals; diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 6eb4f4280f5..6ef8aa683dc 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -127,6 +127,7 @@ #include "orte/mca/plm/plm.h" #include "orte/mca/rtc/rtc.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/odls/base/odls_private.h" @@ -157,11 +158,11 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) * Module */ orte_odls_base_module_t orte_odls_default_module = { - orte_odls_base_default_get_add_procs_data, - orte_odls_default_launch_local_procs, - orte_odls_default_kill_local_procs, - orte_odls_default_signal_local_procs, - orte_odls_default_restart_proc + .get_add_procs_data = orte_odls_base_default_get_add_procs_data, + .launch_local_procs = orte_odls_default_launch_local_procs, + .kill_local_procs = orte_odls_default_kill_local_procs, + .signal_local_procs = orte_odls_default_signal_local_procs, + .restart_proc = orte_odls_default_restart_proc }; @@ -643,9 +644,22 @@ int orte_odls_default_launch_local_procs(opal_buffer_t *data) * Send a signal to a pid. Note that if we get an error, we set the * return value and let the upper layer print out the message. */ -static int send_signal(pid_t pid, int signal) +static int send_signal(pid_t pd, int signal) { int rc = ORTE_SUCCESS; + pid_t pid; + + if (orte_odls_globals.signal_direct_children_only) { + pid = pd; + } else { +#if HAVE_SETPGID + /* send to the process group so that any children of our children + * also receive the signal*/ + pid = -pd; +#else + pid = pd; +#endif + } OPAL_OUTPUT_VERBOSE((1, orte_odls_base_framework.framework_output, "%s sending signal %d to pid %ld", diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index 322ba0be1b1..c7b634b6ace 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -42,9 +42,11 @@ #include "opal/class/opal_hash_table.h" #include "opal/class/opal_list.h" #include "opal/util/timings.h" -#include "orte/mca/mca.h" #include "opal/mca/event/event.h" +#include "orte/mca/mca.h" +#include "orte/util/threads.h" + #include "orte/mca/oob/oob.h" BEGIN_C_DECLS @@ -119,11 +121,8 @@ ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata); __FILE__, __LINE__); \ cd = OBJ_NEW(orte_oob_send_t); \ cd->msg = (m); \ - opal_event_set(orte_oob_base.ev_base, &cd->ev, -1, \ - OPAL_EV_WRITE, \ - orte_oob_base_send_nb, cd); \ - opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); \ - opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT(cd, orte_oob_base.ev_base, \ + orte_oob_base_send_nb, ORTE_MSG_PRI); \ }while(0) /* Our contact info is actually subject to change as transports @@ -168,11 +167,11 @@ typedef struct { } mca_oob_uri_req_t; OBJ_CLASS_DECLARATION(mca_oob_uri_req_t); -#define ORTE_OOB_SET_URI(u) \ - do { \ - mca_oob_uri_req_t *rq; \ - rq = OBJ_NEW(mca_oob_uri_req_t); \ - rq->uri = strdup((u)); \ +#define ORTE_OOB_SET_URI(u) \ + do { \ + mca_oob_uri_req_t *rq; \ + rq = OBJ_NEW(mca_oob_uri_req_t); \ + rq->uri = strdup((u)); \ orte_oob_base_set_addr(0, 0, (void*)rq); \ }while(0) diff --git a/orte/mca/oob/base/oob_base_stubs.c b/orte/mca/oob/base/oob_base_stubs.c index ccc333ba43e..e6da454ed83 100644 --- a/orte/mca/oob/base/oob_base_stubs.c +++ b/orte/mca/oob/base/oob_base_stubs.c @@ -21,7 +21,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/state/state.h" #include "orte/mca/rml/rml.h" - +#include "orte/util/threads.h" #include "orte/mca/oob/base/base.h" #if OPAL_ENABLE_FT_CR == 1 #include "orte/mca/state/base/base.h" @@ -32,7 +32,7 @@ static void process_uri(char *uri); void orte_oob_base_send_nb(int fd, short args, void *cbdata) { orte_oob_send_t *cd = (orte_oob_send_t*)cbdata; - orte_rml_send_t *msg = cd->msg; + orte_rml_send_t *msg; mca_base_component_list_item_t *cli; orte_oob_base_peer_t *pr; int rc; @@ -42,7 +42,10 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) bool reachable; char *uri; + ORTE_ACQUIRE_OBJECT(cd); + /* done with this. release it now */ + msg = cd->msg; OBJ_RELEASE(cd); opal_output_verbose(5, orte_oob_base_framework.framework_output, @@ -276,7 +279,7 @@ void orte_oob_base_get_addr(char **uri) } } - unblock: + unblock: *uri = final; } @@ -303,7 +306,10 @@ OBJ_CLASS_INSTANCE(mca_oob_uri_req_t, void orte_oob_base_set_addr(int fd, short args, void *cbdata) { mca_oob_uri_req_t *req = (mca_oob_uri_req_t*)cbdata; - char *uri = req->uri; + char *uri; + + ORTE_ACQUIRE_OBJECT(req); + uri = req->uri; opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s: set_addr to uri %s", diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index 6d7e9c8c35a..d5f5ce9c55d 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -62,6 +62,7 @@ #include "orte/util/name_fns.h" #include "orte/util/parse_options.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/oob/tcp/oob_tcp.h" @@ -253,6 +254,8 @@ static void recv_handler(int sd, short flg, void *cbdata) mca_oob_tcp_hdr_t hdr; mca_oob_tcp_peer_t *peer; + ORTE_ACQUIRE_OBJECT(op); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 27810ec2457..0915e726e61 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -74,6 +74,7 @@ #include "orte/util/name_fns.h" #include "orte/util/parse_options.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -184,7 +185,7 @@ static int tcp_component_open(void) static int tcp_component_close(void) { /* cleanup listen event list */ - OBJ_DESTRUCT(&mca_oob_tcp_component.listeners); + OPAL_LIST_DESTRUCT(&mca_oob_tcp_component.listeners); OBJ_DESTRUCT(&mca_oob_tcp_component.peers); @@ -694,24 +695,11 @@ static int component_startup(void) return rc; } -static void cleanup(int sd, short args, void *cbdata) -{ - opal_list_item_t * item; - bool *active = (bool*)cbdata; - while (NULL != (item = opal_list_remove_first(&mca_oob_tcp_component.listeners))) { - OBJ_RELEASE(item); - } - if (NULL != active) { - *active = false; - } -} - static void component_shutdown(void) { mca_oob_tcp_peer_t *peer; uint64_t ui64; int i = 0; - bool active; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN", @@ -746,23 +734,6 @@ static void component_shutdown(void) "no hnp or not active"); } - /* because the listeners are in a separate - * async thread for apps, we can't just release them here. - * Instead, we push it into that event thread and release - * them there */ - if (ORTE_PROC_IS_APP) { - opal_event_t ev; - active = true; - opal_event_set(orte_event_base, &ev, -1, - OPAL_EV_WRITE, cleanup, &active); - opal_event_set_priority(&ev, ORTE_ERROR_PRI); - opal_event_active(&ev, OPAL_EV_WRITE, 1); - ORTE_WAIT_FOR_COMPLETION(active); - } else { - /* we can call the destruct directly */ - cleanup(0, 0, NULL); - } - opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN done", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -1062,6 +1033,8 @@ void mca_oob_tcp_component_set_module(int fd, short args, void *cbdata) int rc; orte_oob_base_peer_t *bpr; + ORTE_ACQUIRE_OBJECT(pop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:set_module called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1093,6 +1066,8 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) orte_oob_base_peer_t *bpr; int rc; + ORTE_ACQUIRE_OBJECT(pop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:lost connection called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1128,6 +1103,8 @@ void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata) int rc; orte_oob_base_peer_t *bpr; + ORTE_ACQUIRE_OBJECT(mop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:no route called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1162,6 +1139,8 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) orte_rml_send_t *snd; orte_oob_base_peer_t *bpr; + ORTE_ACQUIRE_OBJECT(mop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:unknown hop called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1235,6 +1214,8 @@ void mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata) { mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; + ORTE_ACQUIRE_OBJECT(pop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:failed_to_connect called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/oob/tcp/oob_tcp_connection.c b/orte/mca/oob/tcp/oob_tcp_connection.c index 704398649ad..14f606640fe 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.c +++ b/orte/mca/oob/tcp/oob_tcp_connection.c @@ -63,6 +63,7 @@ #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" @@ -152,7 +153,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer) void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) { mca_oob_tcp_conn_op_t *op = (mca_oob_tcp_conn_op_t*)cbdata; - mca_oob_tcp_peer_t *peer = op->peer; + mca_oob_tcp_peer_t *peer; int rc; opal_socklen_t addrlen = 0; mca_oob_tcp_addr_t *addr; @@ -160,6 +161,9 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) mca_oob_tcp_send_t *snd; bool connected = false; + ORTE_ACQUIRE_OBJECT(op); + peer = op->peer; + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s orte_tcp_peer_try_connect: " "attempting to connect to proc %s", @@ -586,8 +590,9 @@ void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t *peer) ORTE_NAME_PRINT(&(peer->name))); if (!peer->recv_ev_active) { - opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->recv_event, 0); } } else { opal_output(0, "%s tcp_peer_complete_connect: unable to send connect ack to %s", @@ -608,6 +613,8 @@ static int tcp_peer_send_blocking(int sd, void* data, size_t size) size_t cnt = 0; int retval; + ORTE_ACQUIRE_OBJECT(ptr); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s send blocking of %"PRIsize_t" bytes to socket %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -949,8 +956,9 @@ static void tcp_peer_connected(mca_oob_tcp_peer_t* peer) opal_list_remove_first(&peer->send_queue); } if (NULL != peer->send_msg && !peer->send_ev_active) { - opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->send_event, 0); } } @@ -1214,8 +1222,9 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer) tcp_peer_connected(peer); if (!peer->recv_ev_active) { - opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->recv_event, 0); } if (OOB_TCP_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) { mca_oob_tcp_peer_dump(peer, "accepted"); diff --git a/orte/mca/oob/tcp/oob_tcp_connection.h b/orte/mca/oob/tcp/oob_tcp_connection.h index dc172e627d7..e1392fe781c 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.h +++ b/orte/mca/oob/tcp/oob_tcp_connection.h @@ -32,6 +32,7 @@ #include #endif +#include "orte/util/threads.h" #include "oob_tcp.h" #include "oob_tcp_peer.h" @@ -59,10 +60,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t); ORTE_NAME_PRINT((&(p)->name))); \ cop = OBJ_NEW(mca_oob_tcp_conn_op_t); \ cop->peer = (p); \ - opal_event_set((p)->ev_base, &cop->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), cop); \ - opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \ - opal_event_active(&cop->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT(cop, (p)->ev_base, (cbfunc), ORTE_MSG_PRI); \ } while(0); #define ORTE_ACTIVATE_TCP_ACCEPT_STATE(s, a, cbfunc) \ @@ -72,6 +70,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t); opal_event_set(orte_oob_base.ev_base, &cop->ev, s, \ OPAL_EV_READ, (cbfunc), cop); \ opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(cop); \ opal_event_add(&cop->ev, 0); \ } while(0); @@ -88,6 +87,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t); opal_event_evtimer_set((p)->ev_base, \ &cop->ev, \ (cbfunc), cop); \ + ORTE_POST_OBJECT(cop); \ opal_event_evtimer_add(&cop->ev, (tv)); \ } while(0); diff --git a/orte/mca/oob/tcp/oob_tcp_listener.c b/orte/mca/oob/tcp/oob_tcp_listener.c index 1312ce0b69c..f452f7b5ef5 100644 --- a/orte/mca/oob/tcp/oob_tcp_listener.c +++ b/orte/mca/oob/tcp/oob_tcp_listener.c @@ -66,6 +66,7 @@ #include "orte/util/name_fns.h" #include "orte/util/parse_options.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/oob/tcp/oob_tcp.h" @@ -162,6 +163,7 @@ int orte_oob_tcp_start_listening(void) connection_event_handler, 0); opal_event_set_priority(&listener->event, ORTE_MSG_PRI); + ORTE_POST_OBJECT(listener); opal_event_add(&listener->event, 0); } @@ -816,6 +818,7 @@ static void* listen_thread(opal_object_t *obj) } /* activate the event */ + ORTE_POST_OBJECT(pending_connection); opal_event_active(&pending_connection->ev, OPAL_EV_WRITE, 1); accepted_connections++; } @@ -858,6 +861,8 @@ static void connection_handler(int sd, short flags, void* cbdata) new_connection = (mca_oob_tcp_pending_connection_t*)cbdata; + ORTE_ACQUIRE_OBJECT(new_connection); + opal_output_verbose(4, orte_oob_base_framework.framework_output, "%s connection_handler: working connection " "(%d, %d) %s:%d\n", diff --git a/orte/mca/oob/tcp/oob_tcp_peer.h b/orte/mca/oob/tcp/oob_tcp_peer.h index 12bcf05bec8..8d04fd44387 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.h +++ b/orte/mca/oob/tcp/oob_tcp_peer.h @@ -27,6 +27,7 @@ #include "opal/mca/event/event.h" +#include "orte/util/threads.h" #include "oob_tcp.h" #include "oob_tcp_sendrecv.h" @@ -87,10 +88,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t); if (NULL != proxy) { \ pop->rtmod = strdup(proxy); \ } \ - opal_event_set(orte_oob_base.ev_base, &pop->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), pop); \ - opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \ - opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT(pop, orte_oob_base.ev_base, \ + (cbfunc), ORTE_MSG_PRI); \ } while(0); #endif /* _MCA_OOB_TCP_PEER_H_ */ diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 70a4c134128..6db0243ed5d 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -64,6 +64,7 @@ #include "opal/mca/event/event.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ess/ess.h" @@ -82,7 +83,10 @@ void mca_oob_tcp_queue_msg(int sd, short args, void *cbdata) { mca_oob_tcp_send_t *snd = (mca_oob_tcp_send_t*)cbdata; - mca_oob_tcp_peer_t *peer = (mca_oob_tcp_peer_t*)snd->peer; + mca_oob_tcp_peer_t *peer; + + ORTE_ACQUIRE_OBJECT(snd); + peer = (mca_oob_tcp_peer_t*)snd->peer; /* if there is no message on-deck, put this one there */ if (NULL == peer->send_msg) { @@ -99,8 +103,9 @@ void mca_oob_tcp_queue_msg(int sd, short args, void *cbdata) } else { /* ensure the send event is active */ if (!peer->send_ev_active) { - opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->send_event, 0); } } } @@ -196,9 +201,12 @@ static int send_msg(mca_oob_tcp_peer_t* peer, mca_oob_tcp_send_t* msg) void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) { mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata; - mca_oob_tcp_send_t* msg = peer->send_msg; + mca_oob_tcp_send_t* msg; int rc; + ORTE_ACQUIRE_OBJECT(peer); + msg = peer->send_msg; + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:send_handler called to send to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -424,6 +432,8 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) int rc; orte_rml_send_t *snd; + ORTE_ACQUIRE_OBJECT(peer); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -437,8 +447,9 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* we connected! Start the send/recv events */ if (!peer->recv_ev_active) { - opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->recv_event, 0); } if (peer->timer_ev_active) { opal_event_del(&peer->timer_event); @@ -449,8 +460,9 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) peer->send_msg = (mca_oob_tcp_send_t*)opal_list_remove_first(&peer->send_queue); } if (NULL != peer->send_msg && !peer->send_ev_active) { - opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->send_event, 0); } /* update our state */ peer->state = MCA_OOB_TCP_CONNECTED; diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index 64deb35a96f..9412a4e0fd6 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -28,7 +28,7 @@ #include "opal/class/opal_list.h" #include "orte/mca/rml/base/base.h" - +#include "orte/util/threads.h" #include "oob_tcp.h" #include "oob_tcp_hdr.h" @@ -82,10 +82,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); do { \ (s)->peer = (struct mca_oob_tcp_peer_t*)(p); \ (s)->activate = (f); \ - opal_event_set((p)->ev_base, &(s)->ev, -1, \ - OPAL_EV_WRITE, mca_oob_tcp_queue_msg, (s)); \ - opal_event_set_priority(&(s)->ev, ORTE_MSG_PRI); \ - opal_event_active(&(s)->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT((s), (p)->ev_base, \ + mca_oob_tcp_queue_msg, ORTE_MSG_PRI); \ } while(0) /* queue a message to be sent by one of our modules - must @@ -134,7 +132,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->sdbytes = sizeof(mca_oob_tcp_hdr_t); \ /* add to the msg queue for this peer */ \ MCA_OOB_TCP_QUEUE_MSG((p), _s, true); \ - }while(0); + } while(0) /* queue a message to be sent by one of our modules upon completing * the connection process - must provide the following params: @@ -182,7 +180,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->sdbytes = sizeof(mca_oob_tcp_hdr_t); \ /* add to the msg queue for this peer */ \ MCA_OOB_TCP_QUEUE_MSG((p), _s, false); \ - }while(0); + } while(0) /* queue a message for relay by one of our modules - must * provide the following params: @@ -217,7 +215,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->sdbytes = sizeof(mca_oob_tcp_hdr_t); \ /* add to the msg queue for this peer */ \ MCA_OOB_TCP_QUEUE_MSG((p), _s, true); \ - }while(0); + } while(0) /* State machine for processing message */ typedef struct { @@ -237,10 +235,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_op_t); ORTE_NAME_PRINT(&((ms)->dst))); \ mop = OBJ_NEW(mca_oob_tcp_msg_op_t); \ mop->msg = (ms); \ - opal_event_set((ms)->peer->ev_base, &mop->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), mop); \ - opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \ - opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT(mop, (ms)->peer->ev_base, \ + (cbfunc), ORTE_MSG_PRI); \ } while(0); typedef struct { @@ -285,11 +281,9 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_error_t); mop->hop.jobid = (h)->jobid; \ mop->hop.vpid = (h)->vpid; \ /* this goes to the OOB framework, so use that event base */ \ - opal_event_set(orte_oob_base.ev_base, &mop->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), mop); \ - opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \ - opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \ - } while(0); + ORTE_THREADSHIFT(mop, orte_oob_base.ev_base, \ + (cbfunc), ORTE_MSG_PRI); \ + } while(0) #define ORTE_ACTIVATE_TCP_NO_ROUTE(r, h, c) \ do { \ @@ -305,10 +299,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_error_t); mop->hop.vpid = (h)->vpid; \ /* this goes to the component, so use the framework \ * event base */ \ - opal_event_set(orte_oob_base.ev_base, &mop->ev, -1, \ - OPAL_EV_WRITE, (c), mop); \ - opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \ - opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \ - } while(0); + ORTE_THREADSHIFT(mop, orte_oob_base.ev_base, \ + (c), ORTE_MSG_PRI); \ + } while(0) #endif /* _MCA_OOB_TCP_SENDRECV_H_ */ diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index 61b1c32dba6..c77704e6da0 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -55,13 +55,14 @@ #include "opal/mca/installdirs/installdirs.h" #include "opal/util/argv.h" #include "opal/util/output.h" -#include "orte/util/show_help.h" #include "opal/util/opal_environ.h" #include "opal/util/path.h" #include "opal/util/basename.h" #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" +#include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/rmaps.h" @@ -187,6 +188,8 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; char *ltmp; + ORTE_ACQUIRE_OBJECT(state); + /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 0c54807a7e6..6fcb44ae6fc 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -74,6 +74,7 @@ #include "orte/util/pre_condition_transports.h" #include "orte/util/proc_info.h" #include "orte/util/regex.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/mca/state/base/base.h" #include "orte/util/hostfile/hostfile.h" @@ -129,6 +130,8 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) orte_node_t *node; int i; + ORTE_ACQUIRE_OBJECT(caddy); + /* if we are not launching, then we just assume that all * daemons share our topology */ if (orte_do_not_launch) { @@ -182,6 +185,8 @@ void orte_plm_base_allocation_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* move the state machine along */ caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS); @@ -194,6 +199,8 @@ void orte_plm_base_daemons_launched(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* do NOT increment the state - we wait for the * daemons to report that they have actually * started before moving to the right state @@ -217,6 +224,8 @@ void orte_plm_base_vm_ready(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* progress the job */ caddy->jdata->state = ORTE_JOB_STATE_VM_READY; @@ -233,6 +242,8 @@ void orte_plm_base_mapping_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* move the state machine along */ caddy->jdata->state = ORTE_JOB_STATE_MAP_COMPLETE; ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_SYSTEM_PREP); @@ -252,6 +263,8 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata) orte_job_t *parent; orte_process_name_t name, *nptr; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s plm:base:setup_job", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -357,6 +370,8 @@ void orte_plm_base_setup_job_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* nothing to do here but move along */ ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_ALLOCATE); OBJ_RELEASE(caddy); @@ -372,6 +387,8 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata) int i, rc; char *serial_number; + ORTE_ACQUIRE_OBJECT(caddy); + opal_output_verbose(5, orte_plm_base_framework.framework_output, "%s complete_setup on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -465,6 +482,8 @@ static void timer_cb(int fd, short event, void *cbdata) orte_job_t *jdata = (orte_job_t*)cbdata; orte_timer_t *timer=NULL; + ORTE_ACQUIRE_OBJECT(jdata); + /* declare launch failed */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START); @@ -486,6 +505,8 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) orte_timer_t *timer; orte_grpcomm_signature_t *sig; + ORTE_ACQUIRE_OBJECT(caddy); + /* convenience */ jdata = caddy->jdata; @@ -587,6 +608,7 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) timer->tv.tv_sec = orte_startup_timeout; timer->tv.tv_usec = 0; orte_set_attribute(&jdata->attributes, ORTE_JOB_FAILURE_TIMER_EVENT, ORTE_ATTR_LOCAL, timer, OPAL_PTR); + ORTE_POST_OBJECT(timer); opal_event_evtimer_add(timer->ev, &timer->tv); } @@ -605,6 +627,8 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) opal_buffer_t *answer; int room, *rmptr; + ORTE_ACQUIRE_OBJECT(caddy); + /* convenience */ jdata = caddy->jdata; @@ -720,6 +744,8 @@ void orte_plm_base_registered(int fd, short args, void *cbdata) opal_buffer_t *answer; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* convenience */ jdata = caddy->jdata; @@ -793,7 +819,7 @@ void orte_plm_base_registered(int fd, short args, void *cbdata) return; } - cleanup: + cleanup: /* if this wasn't a debugger job, then need to init_after_spawn for debuggers */ if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_READY_FOR_DEBUGGERS); @@ -2125,11 +2151,13 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) } /* ensure we are not on the list */ - item = opal_list_get_first(&nodes); - node = (orte_node_t*)item; - if (0 == node->index) { - opal_list_remove_item(&nodes, item); - OBJ_RELEASE(item); + if (0 < opal_list_get_size(&nodes)) { + item = opal_list_get_first(&nodes); + node = (orte_node_t*)item; + if (0 == node->index) { + opal_list_remove_item(&nodes, item); + OBJ_RELEASE(item); + } } /* if we didn't get anything, then we are the only node in the diff --git a/orte/mca/plm/isolated/plm_isolated.c b/orte/mca/plm/isolated/plm_isolated.c index 4663e9554ed..f237a503b09 100644 --- a/orte/mca/plm/isolated/plm_isolated.c +++ b/orte/mca/plm/isolated/plm_isolated.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 IBM Corporation. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +38,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" +#include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/state/state.h" @@ -114,6 +115,8 @@ static void launch_daemons(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(state); + /* there are no daemons to launch, so just trigger the * daemon-launch-complete state */ diff --git a/orte/mca/plm/lsf/plm_lsf_module.c b/orte/mca/plm/lsf/plm_lsf_module.c index 461feda8684..c3429c1893b 100644 --- a/orte/mca/plm/lsf/plm_lsf_module.c +++ b/orte/mca/plm/lsf/plm_lsf_module.c @@ -66,6 +66,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/state/state.h" +#include "orte/util/threads.h" #include "orte/mca/plm/plm.h" #include "orte/mca/plm/base/base.h" @@ -171,7 +172,10 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_std_cntr_t nnode; orte_job_t *daemons; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = state->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(state); + jdata = state->jdata; /* start by setting up the virtual machine */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index 9164f5870fa..92ee33e21d2 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -80,6 +80,7 @@ #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" #include "orte/util/proc_info.h" +#include "orte/util/threads.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" @@ -926,6 +927,8 @@ static void process_launch_list(int fd, short args, void *cbdata) pid_t pid; orte_plm_rsh_caddy_t *caddy; + ORTE_ACQUIRE_OBJECT(caddy); + while (num_in_progress < mca_plm_rsh_component.num_concurrent) { item = opal_list_remove_first(&launch_list); if (NULL == item) { @@ -1021,6 +1024,8 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_namelist_t *child; char *rtmod; + ORTE_ACQUIRE_OBJECT(state); + /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ @@ -1285,6 +1290,7 @@ static void launch_daemons(int fd, short args, void *cbdata) OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:rsh: activating launch event", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + ORTE_POST_OBJECT(state); opal_event_active(&launch_event, EV_WRITE, 1); /* now that we've launched the daemons, let the daemon callback diff --git a/orte/mca/plm/slurm/help-plm-slurm.txt b/orte/mca/plm/slurm/help-plm-slurm.txt index 9cc5af5b444..fac0b9b67dd 100644 --- a/orte/mca/plm/slurm/help-plm-slurm.txt +++ b/orte/mca/plm/slurm/help-plm-slurm.txt @@ -49,3 +49,7 @@ are running. Please consult with your system administrator about obtaining such support. +[no-srun] +The SLURM process starter for OpenMPI was unable to locate a +usable "srun" command in its path. Please check your path +and try again. diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 4c5e7e11672..2944a86f57f 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -61,6 +61,7 @@ #include "orte/types.h" #include "orte/util/show_help.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_quit.h" @@ -108,7 +109,6 @@ orte_plm_base_module_1_0_0_t orte_plm_slurm_module = { */ static pid_t primary_srun_pid = 0; static bool primary_pid_set = false; -static bool launching_daemons; static void launch_daemons(int fd, short args, void *cbdata); /** @@ -189,6 +189,8 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_job_t *daemons; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(state); + OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:slurm: LAUNCH DAEMONS CALLED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -545,27 +547,18 @@ static void srun_wait_cb(orte_proc_t *proc, void* cbdata){ jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - /* if we are in the launch phase, then any termination is bad */ - if (launching_daemons) { - /* report that one or more daemons failed to launch so we can exit */ + /* abort only if the status returned is non-zero - i.e., if + * the orteds exited with an error + */ + if (0 != proc->exit_code) { + /* an orted must have died unexpectedly - report + * that the daemon has failed so we exit + */ OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, - "%s plm:slurm: daemon failed during launch", + "%s plm:slurm: daemon failed while running", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* notify the error manager */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ABORTED); } else { - /* if this is after launch, then we need to abort only if the status - * returned is non-zero - i.e., if the orteds exited with an error - */ - if (0 != proc->exit_code) { - /* an orted must have died unexpectedly after launch - report - * that the daemon has failed so we exit - */ - OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, - "%s plm:slurm: daemon failed while running", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ABORTED); - } /* otherwise, check to see if this is the primary pid */ if (primary_srun_pid == proc->pid) { /* in this case, we just want to fire the proper trigger so @@ -579,6 +572,7 @@ static void srun_wait_cb(orte_proc_t *proc, void* cbdata){ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED); } } + /* done with this dummy */ OBJ_RELEASE(proc); } @@ -593,7 +587,8 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env, orte_proc_t *dummy; if (NULL == exec_argv) { - return ORTE_ERR_NOT_FOUND; + orte_show_help("help-plm-slurm.txt", "no-srun", true); + return ORTE_ERR_SILENT; } srun_pid = fork(); @@ -602,6 +597,13 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env, free(exec_argv); return ORTE_ERR_SYS_LIMITS_CHILDREN; } + /* if this is the primary launch - i.e., not a comm_spawn of a + * child job - then save the pid + */ + if (0 < srun_pid && !primary_pid_set) { + primary_srun_pid = srun_pid; + primary_pid_set = true; + } /* setup a dummy proc object to track the srun */ dummy = OBJ_NEW(orte_proc_t); @@ -692,14 +694,6 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env, sides of the fork... */ setpgid(srun_pid, srun_pid); - /* if this is the primary launch - i.e., not a comm_spawn of a - * child job - then save the pid - */ - if (!primary_pid_set) { - primary_srun_pid = srun_pid; - primary_pid_set = true; - } - free(exec_argv); } diff --git a/orte/mca/plm/tm/plm_tm_module.c b/orte/mca/plm/tm/plm_tm_module.c index 915d78aa0ea..c3ec16d8a6f 100644 --- a/orte/mca/plm/tm/plm_tm_module.c +++ b/orte/mca/plm/tm/plm_tm_module.c @@ -63,6 +63,7 @@ #include "opal/util/basename.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/errmgr/errmgr.h" @@ -185,6 +186,8 @@ static void launch_daemons(int fd, short args, void *cbdata) int32_t launchid, *ldptr; char *prefix_dir = NULL; + ORTE_ACQUIRE_OBJECT(state); + jdata = state->jdata; /* if we are launching debugger daemons, then just go @@ -403,7 +406,7 @@ static void launch_daemons(int fd, short args, void *cbdata) "%s plm:tm:launch: finished spawning orteds", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - cleanup: + cleanup: /* cleanup */ OBJ_RELEASE(state); @@ -421,6 +424,8 @@ static void poll_spawns(int fd, short args, void *cbdata) int local_err; tm_event_t event; + ORTE_ACQUIRE_OBJECT(state); + /* TM poll for all the spawns */ for (i = 0; i < launched; ++i) { rc = tm_poll(TM_NULL_EVENT, &event, 1, &local_err); @@ -435,7 +440,7 @@ static void poll_spawns(int fd, short args, void *cbdata) } failed_launch = false; - cleanup: + cleanup: /* cleanup */ OBJ_RELEASE(state); diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index 0cf4eefcd4e..436c0e1ea8f 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -45,6 +45,7 @@ #include "orte/util/proc_info.h" #include "orte/util/comm/comm.h" #include "orte/util/error_strings.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_quit.h" @@ -115,6 +116,8 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata) orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; char *hosts=NULL; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index d5e2ac304dc..209a651ae2d 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -36,6 +36,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/mca/rmaps/base/base.h" @@ -45,7 +46,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; + orte_job_t *jdata; orte_node_t *node; int rc, i, ppx = 0; bool did_map, given, pernode = false; @@ -54,6 +55,9 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) orte_vpid_t nprocs; orte_app_context_t *app; + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; + jdata->state = ORTE_JOB_STATE_MAP; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, @@ -374,26 +378,18 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) */ if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } } - /* reset any node map flags we used so the next job will start clean */ - for (i=0; i < jdata->map->nodes->size; i++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); - } - } if (did_map && ORTE_ERR_RESOURCE_BUSY == rc) { /* the map was done but nothing could be mapped * for launch as all the resources were busy */ orte_show_help("help-orte-rmaps-base.txt", "cannot-launch", true); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } /* if we get here without doing the map, or with zero procs in @@ -403,9 +399,8 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) orte_show_help("help-orte-rmaps-base.txt", "failed-map", true, did_map ? "mapped" : "unmapped", jdata->num_procs, jdata->map->num_nodes); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } /* if any node is oversubscribed, then check to see if a binding @@ -419,28 +414,38 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) } if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + /* we didn't add the nodes to the node map as it would cause them to + * be in a different order than on the backend if this is a dynamic + * spawn (which means we may have started somewhere other than at + * the beginning of the allocation) */ + for (i=0; i < orte_node_pool->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { + OBJ_RETAIN(node); + opal_pointer_array_add(jdata->map->nodes, node); + } + } /* compute and save location assignments */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } } else { /* compute and save local ranks */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } /* compute and save bindings */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } } @@ -461,6 +466,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) /* set the job state to the next position */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE); + cleanup: + /* reset any node map flags we used so the next job will start clean */ + for (i=0; i < jdata->map->nodes->size; i++) { + if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + /* cleanup */ OBJ_RELEASE(caddy); } diff --git a/orte/mca/rmaps/ppr/rmaps_ppr.c b/orte/mca/rmaps/ppr/rmaps_ppr.c index 41523de3b6b..6524337dfd3 100644 --- a/orte/mca/rmaps/ppr/rmaps_ppr.c +++ b/orte/mca/rmaps/ppr/rmaps_ppr.c @@ -275,12 +275,7 @@ static int ppr_mapper(orte_job_t *jdata) } /* add the node to the map, if needed */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(rc); - goto error; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ jdata->map->num_nodes++; } /* if we are mapping solely at the node level, just put @@ -407,7 +402,7 @@ static int ppr_mapper(orte_job_t *jdata) } return ORTE_SUCCESS; - error: + error: while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index 505e05b35e8..e4799856a25 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -43,7 +43,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, orte_std_cntr_t num_slots, orte_vpid_t num_procs) { - int rc, i, nprocs_mapped; + int i, nprocs_mapped; orte_node_t *node; orte_proc_t *proc; int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0; @@ -94,12 +94,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) { /* add this node to the map - do it only once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(rc); - return rc; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { @@ -149,12 +144,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, /* add this node to the map - do it only once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(rc); - return rc; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (add_one) { @@ -221,7 +211,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, int j, nprocs_mapped, nnodes; orte_node_t *node; orte_proc_t *proc; - int num_procs_to_assign, navg, idx; + int num_procs_to_assign, navg; int extra_procs_to_assign=0, nxtra_nodes=0; hwloc_obj_t obj=NULL; float balance; @@ -293,12 +283,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, } /* add this node to the map, but only do so once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(idx); - return idx; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (oversubscribed) { @@ -456,7 +441,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_node_t *node; orte_proc_t *proc; int nprocs, start; - int idx; hwloc_obj_t obj=NULL; unsigned int nobjs; bool add_one; @@ -547,12 +531,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, } /* add this node to the map, if reqd */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(idx); - return idx; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } nmapped = 0; @@ -638,7 +617,6 @@ static int byobj_span(orte_job_t *jdata, orte_node_t *node; orte_proc_t *proc; int nprocs, nxtra_objs; - int idx; hwloc_obj_t obj=NULL; unsigned int nobjs; @@ -699,12 +677,7 @@ static int byobj_span(orte_job_t *jdata, OPAL_LIST_FOREACH(node, node_list, orte_node_t) { /* add this node to the map, if reqd */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(idx); - return idx; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } /* get the number of objects of this type on this node */ diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index f8cc4b1c0b9..253e3904967 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -202,9 +202,9 @@ OBJ_CLASS_DECLARATION(orte_self_send_xfer_t); do { \ orte_rml_recv_t *msg; \ opal_output_verbose(5, orte_rml_base_framework.framework_output, \ - "%s Message posted at %s:%d", \ + "%s Message posted at %s:%d for tag %d", \ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__); \ + __FILE__, __LINE__, (t)); \ msg = OBJ_NEW(orte_rml_recv_t); \ msg->sender.jobid = (p)->jobid; \ msg->sender.vpid = (p)->vpid; \ diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 803bf2db975..790b90b72a9 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -29,6 +29,7 @@ #include "orte/mca/state/state.h" #include "orte/runtime/orte_wait.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/rml/base/base.h" @@ -85,17 +86,19 @@ static int orte_rml_base_register(mca_base_register_flag_t flags) static void cleanup(int sd, short args, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + orte_lock_t *lk = (orte_lock_t*)cbdata; + ORTE_ACQUIRE_OBJECT(active); OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs); - if (NULL != active) { - *active = false; + if (NULL != lk) { + ORTE_POST_OBJECT(lk); + ORTE_WAKEUP_THREAD(lk); } } static int orte_rml_base_close(void) { - volatile bool active; + orte_lock_t lock; int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits); orte_rml_base_module_t *mod; orte_rml_component_t *comp; @@ -124,12 +127,14 @@ static int orte_rml_base_close(void) * it there */ if (ORTE_PROC_IS_APP) { opal_event_t ev; - active = true; + ORTE_CONSTRUCT_LOCK(&lock); opal_event_set(orte_event_base, &ev, -1, - OPAL_EV_WRITE, cleanup, (void*)&active); + OPAL_EV_WRITE, cleanup, (void*)&lock); opal_event_set_priority(&ev, ORTE_ERROR_PRI); + ORTE_POST_OBJECT(ev); opal_event_active(&ev, OPAL_EV_WRITE, 1); - ORTE_WAIT_FOR_COMPLETION(active); + ORTE_WAIT_THREAD(&lock); + ORTE_DESTRUCT_LOCK(&lock); } else { /* we can call the destruct directly */ cleanup(0, 0, NULL); @@ -146,7 +151,7 @@ static int orte_rml_base_open(mca_base_open_flag_t flags) OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t); - opal_pointer_array_init(&orte_rml_base.conduits,1,INT_MAX,1); + opal_pointer_array_init(&orte_rml_base.conduits,1,INT16_MAX,1); /* Open up all available components */ return mca_base_framework_components_open(&orte_rml_base_framework, flags); @@ -243,12 +248,14 @@ void orte_rml_recv_callback(int status, orte_process_name_t* sender, { orte_rml_recv_cb_t *blob = (orte_rml_recv_cb_t*)cbdata; + ORTE_ACQUIRE_OBJECT(blob); /* transfer the sender */ blob->name.jobid = sender->jobid; blob->name.vpid = sender->vpid; /* just copy the payload to the buf */ opal_dss.copy_payload(&blob->data, buffer); /* flag as complete */ + ORTE_POST_OBJECT(blob); blob->active = false; } diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 0772a5d3a6e..69c2ade7ae1 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -42,6 +42,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/base/base.h" @@ -57,6 +58,8 @@ void orte_rml_base_post_recv(int sd, short args, void *cbdata) orte_rml_posted_recv_t *post, *recv; orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL | ORTE_NS_CMP_WILD; + ORTE_ACQUIRE_OBJECT(req); + opal_output_verbose(5, orte_rml_base_framework.framework_output, "%s posting recv", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -159,6 +162,8 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL | ORTE_NS_CMP_WILD; opal_buffer_t buf; + ORTE_ACQUIRE_OBJECT(msg); + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, "%s message received from %s for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/rml/base/rml_base_stubs.c b/orte/mca/rml/base/rml_base_stubs.c index 9197e10423c..7224fe653d1 100644 --- a/orte/mca/rml/base/rml_base_stubs.c +++ b/orte/mca/rml/base/rml_base_stubs.c @@ -5,7 +5,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -30,6 +30,7 @@ #include "orte/mca/state/state.h" #include "orte/runtime/orte_wait.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/rml/base/base.h" @@ -82,10 +83,14 @@ orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes) if (NULL != ourmod) { /* we got an answer - store this conduit in our array */ rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod); + if (rc < 0) { + return ORTE_RML_CONDUIT_INVALID; + } return rc; } /* we get here if nobody could support it */ - return ORTE_ERR_NOT_SUPPORTED; + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + return ORTE_RML_CONDUIT_INVALID; } @@ -265,11 +270,7 @@ void orte_rml_API_recv_nb(orte_process_name_t* peer, req->post->persistent = persistent; req->post->cbfunc.iov = cbfunc; req->post->cbdata = cbdata; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); } /** Receive non-blocking buffer message */ @@ -296,11 +297,7 @@ void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer, req->post->persistent = persistent; req->post->cbfunc.buffer = cbfunc; req->post->cbdata = cbdata; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); } /** Cancel posted non-blocking receive */ @@ -312,6 +309,8 @@ void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) "%s rml_recv_cancel for peer %s tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), tag); + + ORTE_ACQUIRE_OBJECT(orte_event_base_active); if (!orte_event_base_active) { /* no event will be processed any more, so simply return. */ return; @@ -324,11 +323,7 @@ void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) req->post->peer.jobid = peer->jobid; req->post->peer.vpid = peer->vpid; req->post->tag = tag; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); } /** Purge information */ diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index 7c5ffac6d20..1bd744450d6 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -207,7 +207,8 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) NULL != comp_attrib) { comps = opal_argv_split(comp_attrib, ','); for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "Ethernet")) { + if (0 == strcasecmp(comps[i], "Ethernet") || + 0 == strcasecmp(comps[i], "oob")) { /* we are a candidate */ opal_argv_free(comps); md = make_module(); @@ -254,7 +255,14 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) opal_argv_free(comps); free(comp_attrib); return NULL; + } + /* if they didn't specify a protocol or a transport, then we can be considered */ + if (!orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, NULL, OPAL_STRING) || + !orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, NULL, OPAL_STRING)) { + md = make_module(); + md->routed = orte_routed.assign_module(NULL); + return md; } /* if we get here, we cannot handle it */ diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index 7b56c60bdae..7e5330e944f 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -29,6 +29,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/oob/base/base.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/base/base.h" @@ -39,6 +40,8 @@ static void send_self_exe(int fd, short args, void* data) { orte_self_send_xfer_t *xfer = (orte_self_send_xfer_t*)data; + ORTE_ACQUIRE_OBJECT(xfer); + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_send_to_self callback executing for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), xfer->tag)); @@ -130,9 +133,7 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, xfer->tag = tag; xfer->cbdata = cbdata; /* setup the event for the send callback */ - opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); - opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(xfer, orte_event_base, send_self_exe, ORTE_MSG_PRI); /* copy the message for the recv */ rcv = OBJ_NEW(orte_rml_recv_t); @@ -235,9 +236,7 @@ int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, xfer->tag = tag; xfer->cbdata = cbdata; /* setup the event for the send callback */ - opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); - opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(xfer, orte_event_base, send_self_exe, ORTE_MSG_PRI); /* copy the message for the recv */ rcv = OBJ_NEW(orte_rml_recv_t); diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index 9efe8416417..5cfbb07072c 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -198,6 +198,7 @@ typedef uint32_t orte_rml_tag_t; /* Conduit ID */ typedef uint16_t orte_rml_conduit_t; +#define ORTE_RML_CONDUIT_INVALID 0xff /* define an object for reporting transports */ typedef struct { diff --git a/orte/mca/rtc/hwloc/rtc_hwloc.c b/orte/mca/rtc/hwloc/rtc_hwloc.c index 8c56efa9793..6a84a7daf76 100644 --- a/orte/mca/rtc/hwloc/rtc_hwloc.c +++ b/orte/mca/rtc/hwloc/rtc_hwloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * @@ -39,11 +39,9 @@ static void set(orte_job_t *jdata, int write_fd); orte_rtc_base_module_t orte_rtc_hwloc_module = { - init, - finalize, - NULL, - set, - NULL + .init = init, + .finalize = finalize, + .set = set }; static int init(void) diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index e01198a7c97..af733b8825f 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -970,7 +970,9 @@ static int setup_fork(orte_job_t *jdata, * any binding policy was applied by us (e.g., so that * MPI_INIT doesn't try to bind itself) */ - opal_setenv("OMPI_MCA_orte_bound_at_launch", "1", true, &app->env); + if (OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(jdata->map->binding)) { + opal_setenv("OMPI_MCA_orte_bound_at_launch", "1", true, &app->env); + } /* tell the ESS to avoid the singleton component - but don't override * anything that may have been provided elsewhere diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index dc4de766730..1fc9ece4fb9 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -23,6 +23,7 @@ #include "opal/class/opal_list.h" #include "opal/mca/event/event.h" #include "opal/mca/pmix/pmix.h" +#include "opal/util/argv.h" #include "orte/orted/pmix/pmix_server_internal.h" #include "orte/runtime/orte_data_server.h" @@ -36,6 +37,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/base/state_private.h" @@ -78,9 +80,7 @@ void orte_state_base_activate_job_state(orte_job_t *jdata, caddy->job_state = state; OBJ_RETAIN(jdata); } - opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); - opal_event_set_priority(&caddy->ev, s->priority); - opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(caddy, orte_event_base, s->cbfunc, s->priority); return; } } @@ -107,14 +107,12 @@ void orte_state_base_activate_job_state(orte_job_t *jdata, caddy->job_state = state; OBJ_RETAIN(jdata); } - OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, - "%s ACTIVATING JOB %s STATE %s PRI %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid), - orte_job_state_to_str(state), s->priority)); - opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); - opal_event_set_priority(&caddy->ev, s->priority); - opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); + OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, + "%s ACTIVATING JOB %s STATE %s PRI %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid), + orte_job_state_to_str(state), s->priority)); + ORTE_THREADSHIFT(caddy, orte_event_base, s->cbfunc, s->priority); } @@ -262,9 +260,7 @@ void orte_state_base_activate_proc_state(orte_process_name_t *proc, caddy = OBJ_NEW(orte_state_caddy_t); caddy->name = *proc; caddy->proc_state = state; - opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); - opal_event_set_priority(&caddy->ev, s->priority); - opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(caddy, orte_event_base, s->cbfunc, s->priority); return; } } @@ -288,14 +284,12 @@ void orte_state_base_activate_proc_state(orte_process_name_t *proc, caddy = OBJ_NEW(orte_state_caddy_t); caddy->name = *proc; caddy->proc_state = state; - OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, - "%s ACTIVATING PROC %s STATE %s PRI %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - orte_proc_state_to_str(state), s->priority)); - opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); - opal_event_set_priority(&caddy->ev, s->priority); - opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); + OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, + "%s ACTIVATING PROC %s STATE %s PRI %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc), + orte_proc_state_to_str(state), s->priority)); + ORTE_THREADSHIFT(caddy, orte_event_base, s->cbfunc, s->priority); } int orte_state_base_add_proc_state(orte_proc_state_t state, @@ -443,7 +437,10 @@ void orte_state_base_local_launch_complete(int fd, short argc, void *cbdata) void orte_state_base_cleanup_job(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, "%s state:base:cleanup on job %s", @@ -460,9 +457,12 @@ void orte_state_base_cleanup_job(int fd, short argc, void *cbdata) void orte_state_base_report_progress(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; + orte_job_t *jdata; - opal_output(orte_clean_output, "App launch reported: %d (out of %d) daemons - %d (out of %d) procs", + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; + + opal_output(orte_clean_output, "App launch reported: %d (out of %d) daemons - %d (out of %d) procs", (int)jdata->num_daemons_reported, (int)orte_process_info.num_procs, (int)jdata->num_launched, (int)jdata->num_procs); OBJ_RELEASE(caddy); @@ -659,14 +659,18 @@ static void _send_notification(int status, void orte_state_base_track_procs(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_process_name_t *proc = &caddy->name; - orte_proc_state_t state = caddy->proc_state; + orte_process_name_t *proc; + orte_proc_state_t state; orte_job_t *jdata; orte_proc_t *pdata; int i; char *rtmod; orte_process_name_t parent, target, *npptr; + ORTE_ACQUIRE_OBJECT(caddy); + proc = &caddy->name; + state = caddy->proc_state; + opal_output_verbose(5, orte_state_base_framework.framework_output, "%s state:base:track_procs called for proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -811,8 +815,7 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) void orte_state_base_check_all_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; - + orte_job_t *jdata; orte_proc_t *proc; int i; orte_std_cntr_t j; @@ -827,6 +830,9 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) void *nptr; char *rtmod; + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; + opal_output_verbose(2, orte_state_base_framework.framework_output, "%s state:base:check_job_complete on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index df74280669c..7eae2838545 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -31,6 +31,7 @@ #include "orte/mca/routed/routed.h" #include "orte/util/nidmap.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_quit.h" #include "orte/runtime/orte_wait.h" @@ -223,6 +224,8 @@ static void init_complete(int sd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* nothing to do here but move along - if it is the * daemon job, then next step is allocate */ if (caddy->jdata->jobid == ORTE_PROC_MY_NAME->jobid) { @@ -249,6 +252,8 @@ static void vm_ready(int fd, short args, void *cbdata) int32_t numbytes; char *nidmap; + ORTE_ACQUIRE_OBJECT(caddy); + /* if this is my job, then we are done */ if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) { /* send the daemon map to every daemon in this DVM - we @@ -353,8 +358,7 @@ static void vm_ready(int fd, short args, void *cbdata) static void check_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; - + orte_job_t *jdata; orte_proc_t *proc; int i; orte_node_t *node; @@ -362,6 +366,9 @@ static void check_complete(int fd, short args, void *cbdata) orte_std_cntr_t index; char *rtmod; + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; + opal_output_verbose(2, orte_state_base_framework.framework_output, "%s state:dvm:check_job_complete on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -472,7 +479,10 @@ static void check_complete(int fd, short args, void *cbdata) static void cleanup_job(int sd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; /* remove this object from the job array */ opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, NULL); diff --git a/orte/mca/state/novm/state_novm.c b/orte/mca/state/novm/state_novm.c index 72d7c0bd397..2bc36181a3c 100644 --- a/orte/mca/state/novm/state_novm.c +++ b/orte/mca/state/novm/state_novm.c @@ -26,6 +26,7 @@ #include "orte/mca/rmaps/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_quit.h" #include "orte/mca/state/state.h" @@ -196,12 +197,15 @@ static int finalize(void) static void allocation_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = state->jdata; + orte_job_t *jdata; orte_job_t *daemons; orte_topology_t *t; orte_node_t *node; int i; + ORTE_ACQUIRE_OBJECT(caddy); + jdata = state->jdata; + jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; /* get the daemon job object */ @@ -252,7 +256,10 @@ static void allocation_complete(int fd, short args, void *cbdata) static void map_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = state->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(caddy); + jdata = state->jdata; jdata->state = ORTE_JOB_STATE_MAP_COMPLETE; /* move to the map stage */ @@ -265,7 +272,10 @@ static void map_complete(int fd, short args, void *cbdata) static void vm_ready(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = state->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(caddy); + jdata = state->jdata; /* now that the daemons are launched, we are ready * to roll diff --git a/orte/mca/state/orted/state_orted.c b/orte/mca/state/orted/state_orted.c index 1c9243b3a42..39b02485889 100644 --- a/orte/mca/state/orted/state_orted.c +++ b/orte/mca/state/orted/state_orted.c @@ -27,6 +27,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/orted/pmix/pmix_server_internal.h" #include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_quit.h" @@ -165,6 +166,8 @@ static void track_jobs(int fd, short argc, void *cbdata) orte_proc_t *child; orte_vpid_t null=ORTE_VPID_INVALID; + ORTE_ACQUIRE_OBJECT(caddy); + if (ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE == caddy->job_state) { OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output, "%s state:orted:track_jobs sending local launch complete for job %s", @@ -251,8 +254,8 @@ static void track_jobs(int fd, short argc, void *cbdata) static void track_procs(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_process_name_t *proc = &caddy->name; - orte_proc_state_t state = caddy->proc_state; + orte_process_name_t *proc; + orte_proc_state_t state; orte_job_t *jdata; orte_proc_t *pdata, *pptr; opal_buffer_t *alert; @@ -264,6 +267,10 @@ static void track_procs(int fd, short argc, void *cbdata) orte_node_t *node; orte_process_name_t target; + ORTE_ACQUIRE_OBJECT(caddy); + proc = &caddy->name; + state = caddy->proc_state; + OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output, "%s state:orted:track_procs called for proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/state/state.h b/orte/mca/state/state.h index 964d563b221..ee3ec8378cd 100644 --- a/orte/mca/state/state.h +++ b/orte/mca/state/state.h @@ -48,6 +48,7 @@ #include "opal/class/opal_list.h" #include "opal/mca/event/event.h" +#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/plm/plm_types.h" #include "orte/runtime/orte_globals.h" @@ -64,42 +65,40 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_state_base_framework; /* For ease in debugging the state machine, it is STRONGLY recommended * that the functions be accessed using the following macros */ -#define ORTE_FORCED_TERMINATE(x) \ - do { \ - if (!orte_abnormal_term_ordered) { \ - opal_output_verbose(1, orte_state_base_framework.framework_output, \ - "%s FORCE-TERMINATE AT %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__); \ - ORTE_UPDATE_EXIT_STATUS(x); \ - ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FORCED_EXIT); \ - } \ +#define ORTE_FORCED_TERMINATE(x) \ + do { \ + if (!orte_abnormal_term_ordered) { \ + orte_errmgr.abort((x), "%s FORCE-TERMINATE AT %s:%d - error %s(%d)", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + ORTE_ERROR_NAME((x)), (x), \ + __FILE__, __LINE__); \ + } \ } while(0); -#define ORTE_ACTIVATE_JOB_STATE(j, s) \ - do { \ - orte_job_t *shadow=(j); \ - opal_output_verbose(1, orte_state_base_framework.framework_output, \ - "%s ACTIVATE JOB %s STATE %s AT %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - (NULL == shadow) ? "NULL" : \ - ORTE_JOBID_PRINT(shadow->jobid), \ - orte_job_state_to_str((s)), \ - __FILE__, __LINE__); \ - orte_state.activate_job_state(shadow, (s)); \ +#define ORTE_ACTIVATE_JOB_STATE(j, s) \ + do { \ + orte_job_t *shadow=(j); \ + opal_output_verbose(1, orte_state_base_framework.framework_output, \ + "%s ACTIVATE JOB %s STATE %s AT %s:%d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + (NULL == shadow) ? "NULL" : \ + ORTE_JOBID_PRINT(shadow->jobid), \ + orte_job_state_to_str((s)), \ + __FILE__, __LINE__); \ + orte_state.activate_job_state(shadow, (s)); \ } while(0); -#define ORTE_ACTIVATE_PROC_STATE(p, s) \ - do { \ - orte_process_name_t *shadow=(p); \ - opal_output_verbose(1, orte_state_base_framework.framework_output, \ - "%s ACTIVATE PROC %s STATE %s AT %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - (NULL == shadow) ? "NULL" : \ - ORTE_NAME_PRINT(shadow), \ - orte_proc_state_to_str((s)), \ - __FILE__, __LINE__); \ - orte_state.activate_proc_state(shadow, (s)); \ +#define ORTE_ACTIVATE_PROC_STATE(p, s) \ + do { \ + orte_process_name_t *shadow=(p); \ + opal_output_verbose(1, orte_state_base_framework.framework_output, \ + "%s ACTIVATE PROC %s STATE %s AT %s:%d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + (NULL == shadow) ? "NULL" : \ + ORTE_NAME_PRINT(shadow), \ + orte_proc_state_to_str((s)), \ + __FILE__, __LINE__); \ + orte_state.activate_proc_state(shadow, (s)); \ } while(0); /** diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index c21e0f54f66..bab19c67390 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -77,6 +77,7 @@ #include "orte/mca/rml/base/rml_contact.h" #include "orte/util/pre_condition_transports.h" #include "orte/util/compress.h" +#include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ess/ess.h" @@ -919,6 +920,7 @@ int orte_daemon(int argc, char *argv[]) while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); /* ensure all local procs are dead */ orte_odls.kill_local_procs(NULL); diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 32e7410609e..d5aaa2468d4 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -68,6 +68,7 @@ #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "pmix_server.h" @@ -350,6 +351,8 @@ static void _mdxresp(int sd, short args, void *cbdata) int rc; opal_buffer_t *reply; + ORTE_ACQUIRE_OBJECT(req); + /* check us out of the hotel */ opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); @@ -399,6 +402,8 @@ static void modex_resp(int status, pmix_server_req_t *req = (pmix_server_req_t*)cbdata; opal_buffer_t xfer; + ORTE_ACQUIRE_OBJECT(req); + req->status = status; /* we need to preserve the data as the caller * will free it upon our return */ @@ -413,6 +418,7 @@ static void modex_resp(int status, opal_event_set(orte_event_base, &(req->ev), -1, OPAL_EV_WRITE, _mdxresp, req); opal_event_set_priority(&(req->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(req); opal_event_active(&(req->ev), OPAL_EV_WRITE, 1); } static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 389c65a5fc8..8eacbbfe401 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -44,6 +44,7 @@ #include "orte/mca/rmaps/base/base.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" @@ -103,6 +104,8 @@ static void spawn(int sd, short args, void *cbdata) opal_buffer_t *buf; orte_plm_cmd_flag_t command; + ORTE_ACQUIRE_OBJECT(req); + /* add this request to our tracker hotel */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); @@ -276,11 +279,20 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, jdata->num_apps++; if (NULL != papp->cmd) { app->app = strdup(papp->cmd); + } else if (NULL == papp->argv || + NULL == papp->argv[0]) { + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + OBJ_RELEASE(jdata); + return ORTE_ERR_BAD_PARAM; } else { app->app = strdup(papp->argv[0]); } - app->argv = opal_argv_copy(papp->argv); - app->env = opal_argv_copy(papp->env); + if (NULL != papp->argv) { + app->argv = opal_argv_copy(papp->argv); + } + if (NULL != papp->env) { + app->env = opal_argv_copy(papp->env); + } if (NULL != papp->cwd) { app->cwd = strdup(papp->cwd); } @@ -351,6 +363,8 @@ static void _cnlk(int status, opal_list_t *data, void *cbdata) orte_job_t *jdata; opal_buffer_t buf; + ORTE_ACQUIRE_OBJECT(cd); + /* if we failed to get the required data, then just inform * the embedded server that the connect cannot succeed */ if (ORTE_SUCCESS != status || NULL == data) { @@ -402,6 +416,8 @@ static void _cnct(int sd, short args, void *cbdata) orte_job_t *jdata; int rc = ORTE_SUCCESS; + ORTE_ACQUIRE_OBJECT(cd); + /* at some point, we need to add bookeeping to track which * procs are "connected" so we know who to notify upon * termination or failure. For now, we have to ensure @@ -477,6 +493,8 @@ static void mdxcbfunc(int status, { orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(cd); + /* ack the call */ if (NULL != cd->cbfunc) { cd->cbfunc(status, cd->cbdata); diff --git a/orte/orted/pmix/pmix_server_fence.c b/orte/orted/pmix/pmix_server_fence.c index 750ad09b398..59d5cd1902d 100644 --- a/orte/orted/pmix/pmix_server_fence.c +++ b/orte/orted/pmix/pmix_server_fence.c @@ -38,6 +38,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/rml/rml.h" @@ -59,6 +60,8 @@ static void pmix_server_release(int status, opal_buffer_t *buf, void *cbdata) int32_t ndata = 0; int rc = OPAL_SUCCESS; + ORTE_ACQUIRE_OBJECT(cd); + /* unload the buffer */ if (NULL != buf) { rc = opal_dss.unload(buf, (void**)&data, &ndata); @@ -135,6 +138,8 @@ static void dmodex_req(int sd, short args, void *cbdata) uint8_t *data=NULL; int32_t sz=0; + ORTE_ACQUIRE_OBJECT(rq); + /* a race condition exists here because of the thread-shift - it is * possible that data for the specified proc arrived while we were * waiting to be serviced. In that case, the tracker that would have diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 9f2ae9eb76c..7cff1dcbf30 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -43,6 +43,7 @@ #include "orte/mca/schizo/schizo.h" #include "orte/mca/state/state.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" #include "orte/mca/plm/plm.h" @@ -57,6 +58,8 @@ static void _client_conn(int sd, short args, void *cbdata) orte_proc_t *p, *ptr; int i; + ORTE_ACQUIRE_OBJECT(cd); + if (NULL != cd->server_object) { /* we were passed back the orte_proc_t */ p = (orte_proc_t*)cd->server_object; @@ -106,6 +109,8 @@ static void _client_finalized(int sd, short args, void *cbdata) orte_proc_t *p, *ptr; int i; + ORTE_ACQUIRE_OBJECT(cd); + if (NULL != cd->server_object) { /* we were passed back the orte_proc_t */ p = (orte_proc_t*)cd->server_object; @@ -164,6 +169,8 @@ static void _client_abort(int sd, short args, void *cbdata) orte_proc_t *p, *ptr; int i; + ORTE_ACQUIRE_OBJECT(cd); + if (NULL != cd->server_object) { p = (orte_proc_t*)cd->server_object; } else { @@ -214,6 +221,8 @@ static void _register_events(int sd, short args, void *cbdata) orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; opal_value_t *info; + ORTE_ACQUIRE_OBJECT(cd); + /* the OPAL layer "owns" the list, but let's deconstruct it * here so we don't have to duplicate the data */ while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) { @@ -246,6 +255,8 @@ static void _deregister_events(int sd, short args, void *cbdata) orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; opal_value_t *info, *iptr, *nptr; + ORTE_ACQUIRE_OBJECT(cd); + /* the OPAL layer "owns" the list, but let's deconstruct it * here for consistency */ while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) { @@ -281,6 +292,8 @@ static void _notify_release(int status, void *cbdata) { orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(cd); + if (NULL != cd->info) { OPAL_LIST_RELEASE(cd->info); } @@ -465,6 +478,8 @@ static void _query(int sd, short args, void *cbdata) opal_pstats_t pstat; float pss; + ORTE_ACQUIRE_OBJECT(cd); + opal_output_verbose(2, orte_pmix_server_globals.output, "%s processing query", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -654,6 +669,7 @@ int pmix_server_query_fn(opal_process_name_t *requestor, opal_event_set(orte_event_base, &(cd->ev), -1, OPAL_EV_WRITE, _query, cd); opal_event_set_priority(&(cd->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(cd); opal_event_active(&(cd->ev), OPAL_EV_WRITE, 1); return ORTE_SUCCESS; @@ -669,6 +685,8 @@ static void _toolconn(int sd, short args, void *cbdata) orte_process_name_t tool; int rc; + ORTE_ACQUIRE_OBJECT(cd); + opal_output_verbose(2, orte_pmix_server_globals.output, "%s TOOL CONNECTION PROCESSING", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -768,6 +786,7 @@ void pmix_tool_connected_fn(opal_list_t *info, opal_event_set(orte_event_base, &(cd->ev), -1, OPAL_EV_WRITE, _toolconn, cd); opal_event_set_priority(&(cd->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(cd); opal_event_active(&(cd->ev), OPAL_EV_WRITE, 1); } diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 7046cc0a17f..d923c6a89f4 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -43,9 +43,11 @@ #include "opal/mca/event/event.h" #include "opal/mca/pmix/pmix.h" #include "opal/util/proc.h" +#include "opal/sys/atomic.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/runtime/orte_globals.h" +#include "orte/util/threads.h" BEGIN_C_DECLS @@ -119,6 +121,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); opal_event_set(orte_event_base, &(_req->ev), \ -1, OPAL_EV_WRITE, (cf), _req); \ opal_event_set_priority(&(_req->ev), ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(_req); \ opal_event_active(&(_req->ev), OPAL_EV_WRITE, 1); \ } while(0); @@ -133,6 +136,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); opal_event_set(orte_event_base, &(_req->ev), \ -1, OPAL_EV_WRITE, (cf), _req); \ opal_event_set_priority(&(_req->ev), ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(_req); \ opal_event_active(&(_req->ev), OPAL_EV_WRITE, 1); \ } while(0); @@ -147,6 +151,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); opal_event_set(orte_event_base, &(_cd->ev), -1, \ OPAL_EV_WRITE, (fn), _cd); \ opal_event_set_priority(&(_cd->ev), ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(_cd); \ opal_event_active(&(_cd->ev), OPAL_EV_WRITE, 1); \ } while(0); @@ -165,6 +170,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); opal_event_set(orte_event_base, &(_cd->ev), -1, \ OPAL_EV_WRITE, (fn), _cd); \ opal_event_set_priority(&(_cd->ev), ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(_cd); \ opal_event_active(&(_cd->ev), OPAL_EV_WRITE, 1); \ } while(0); diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index 4f44799979a..42cc8f70ceb 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -39,6 +39,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" @@ -150,6 +151,8 @@ static void execute(int sd, short args, void *cbdata) opal_buffer_t *xfer; orte_process_name_t *target; + ORTE_ACQUIRE_OBJECT(req); + if (!orte_pmix_server_globals.pubsub_init) { /* we need to initialize our connection to the server */ if (ORTE_SUCCESS != (rc = init_server())) { @@ -298,6 +301,7 @@ int pmix_server_publish_fn(opal_process_name_t *proc, opal_event_set(orte_event_base, &(req->ev), -1, OPAL_EV_WRITE, execute, req); opal_event_set_priority(&(req->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(req); opal_event_active(&(req->ev), OPAL_EV_WRITE, 1); return OPAL_SUCCESS; @@ -395,6 +399,7 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys, opal_event_set(orte_event_base, &(req->ev), -1, OPAL_EV_WRITE, execute, req); opal_event_set_priority(&(req->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(req); opal_event_active(&(req->ev), OPAL_EV_WRITE, 1); return OPAL_SUCCESS; @@ -483,6 +488,7 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys, opal_event_set(orte_event_base, &(req->ev), -1, OPAL_EV_WRITE, execute, req); opal_event_set_priority(&(req->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(req); opal_event_active(&(req->ev), OPAL_EV_WRITE, 1); return OPAL_SUCCESS; diff --git a/orte/runtime/data_type_support/orte_dt_packing_fns.c b/orte/runtime/data_type_support/orte_dt_packing_fns.c index 04e434645f6..b0550f18464 100644 --- a/orte/runtime/data_type_support/orte_dt_packing_fns.c +++ b/orte/runtime/data_type_support/orte_dt_packing_fns.c @@ -64,7 +64,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src, int32_t num_vals, opal_data_type_t type) { int rc; - int32_t i, j, count; + int32_t i, j, count, bookmark; orte_job_t **jobs; orte_app_context_t *app; orte_proc_t *proc; @@ -241,7 +241,16 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src, } } - /* do not pack the bookmark or oversubscribe_override flags */ + /* pack the bookmark */ + if (NULL == jobs[i]->bookmark) { + bookmark = -1; + } else { + bookmark = jobs[i]->bookmark->index; + } + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &bookmark, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + return rc; + } /* pack the job state */ if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, diff --git a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c index 6e49c160520..954b741c318 100644 --- a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c +++ b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c @@ -61,7 +61,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest, int32_t *num_vals, opal_data_type_t type) { int rc; - int32_t i, k, n, count; + int32_t i, k, n, count, bookmark; orte_job_t **jobs; orte_app_idx_t j; orte_attribute_t *kv; @@ -237,7 +237,17 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest, } } - /* no bookmark of oversubscribe_override flags to unpack */ + /* unpack the bookmark */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, + &bookmark, &n, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (0 <= bookmark) { + /* retrieve it */ + jobs[i]->bookmark = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, bookmark); + } /* unpack the job state */ n = 1; diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index a4e4bee5969..5f415a71965 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -152,7 +152,6 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) opal_snprintf_jobid = orte_util_snprintf_jobid; opal_convert_string_to_jobid = _convert_string_to_jobid; - /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 3e642ac5bb6..35f82413224 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -768,7 +768,7 @@ int orte_register_params(void) MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_coll_transport); - orte_mgmt_transport = "oob,ethernet"; + orte_mgmt_transport = "oob"; (void) mca_base_var_register("orte", "orte", "mgmt", "transports", "Comma-separated list of transports to use for ORTE management messages", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, @@ -777,7 +777,8 @@ int orte_register_params(void) /* Amount of time to wait for a stack trace to return from the daemons */ orte_stack_trace_wait_timeout = 30; (void) mca_base_var_register ("orte", "orte", NULL, "timeout_for_stack_trace", - "Seconds to wait for stack traces to return before terminating the job (<= 0 wait forever)", + "Seconds to wait for stack traces to return before terminating " + "the job (<= 0 wait forever)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_stack_trace_wait_timeout); @@ -796,6 +797,5 @@ int orte_register_params(void) OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL, &orte_data_server_uri); - return ORTE_SUCCESS; } diff --git a/orte/runtime/orte_quit.c b/orte/runtime/orte_quit.c index d665556d13e..1a952ffb8c0 100644 --- a/orte/runtime/orte_quit.c +++ b/orte/runtime/orte_quit.c @@ -54,6 +54,7 @@ #include "orte/util/session_dir.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" @@ -75,6 +76,8 @@ void orte_quit(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* cleanup */ if (NULL != caddy) { OBJ_RELEASE(caddy); @@ -135,6 +138,7 @@ void orte_quit(int fd, short args, void *cbdata) * so we will exit */ orte_event_base_active = false; + ORTE_POST_OBJECT(orte_event_base_active); /* break out of the event loop */ opal_event_base_loopbreak(orte_event_base); } diff --git a/orte/runtime/orte_wait.c b/orte/runtime/orte_wait.c index c22681a7363..2e10e8770df 100644 --- a/orte/runtime/orte_wait.c +++ b/orte/runtime/orte_wait.c @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2008 Institut National de Recherche en Informatique * et Automatique. All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,6 +60,7 @@ #include "orte/constants.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -188,6 +189,8 @@ static void cancel_callback(int fd, short args, void *cbdata) orte_wait_tracker_t *trk = (orte_wait_tracker_t*)cbdata; orte_wait_tracker_t *t2; + ORTE_ACQUIRE_OBJECT(trk); + OPAL_LIST_FOREACH(t2, &pending_cbs, orte_wait_tracker_t) { if (t2->child == trk->child) { opal_list_remove_item(&pending_cbs, &t2->super); @@ -214,9 +217,7 @@ void orte_wait_cb_cancel(orte_proc_t *child) trk = OBJ_NEW(orte_wait_tracker_t); OBJ_RETAIN(child); // protect against race conditions trk->child = child; - opal_event_set(orte_event_base, &trk->ev, -1, OPAL_EV_WRITE, cancel_callback, trk); - opal_event_set_priority(&trk->ev, ORTE_SYS_PRI); - opal_event_active(&trk->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(trk, orte_event_base, cancel_callback, ORTE_SYS_PRI); } @@ -228,6 +229,8 @@ static void wait_signal_callback(int fd, short event, void *arg) pid_t pid; orte_wait_tracker_t *t2; + ORTE_ACQUIRE_OBJECT(signal); + if (SIGCHLD != OPAL_EVENT_SIGNAL(signal)) { return; } diff --git a/orte/runtime/orte_wait.h b/orte/runtime/orte_wait.h index 5290b36d492..b8283f15ba1 100644 --- a/orte/runtime/orte_wait.h +++ b/orte/runtime/orte_wait.h @@ -13,7 +13,7 @@ * et Automatique. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,6 +48,7 @@ #include "orte/types.h" #include "orte/mca/rml/rml_types.h" #include "orte/runtime/orte_globals.h" +#include "orte/util/threads.h" BEGIN_C_DECLS @@ -95,6 +96,7 @@ ORTE_DECLSPEC void orte_wait_cb_cancel(orte_proc_t *proc); struct timespec tp = {0, 100000}; \ nanosleep(&tp, NULL); \ } \ + ORTE_ACQUIRE_OBJECT(flg); \ }while(0); /** @@ -135,6 +137,7 @@ ORTE_DECLSPEC void orte_wait_cb_cancel(orte_proc_t *proc); "defining timeout: %ld sec %ld usec at %s:%d", \ (long)tmp->tv.tv_sec, (long)tmp->tv.tv_usec, \ __FILE__, __LINE__)); \ + ORTE_POST_OBJECT(tmp); \ opal_event_evtimer_add(tmp->ev, &tmp->tv); \ }while(0); \ @@ -161,6 +164,7 @@ ORTE_DECLSPEC void orte_wait_cb_cancel(orte_proc_t *proc); "defining timer event: %ld sec %ld usec at %s:%d", \ (long)tm->tv.tv_sec, (long)tm->tv.tv_usec, \ __FILE__, __LINE__)); \ + ORTE_POST_OBJECT(tm); \ opal_event_evtimer_add(tm->ev, &tm->tv); \ }while(0); \ diff --git a/orte/test/mpi/Makefile.include b/orte/test/mpi/Makefile.include index 8f033e185a9..45160a8f31c 100644 --- a/orte/test/mpi/Makefile.include +++ b/orte/test/mpi/Makefile.include @@ -12,6 +12,7 @@ # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -55,5 +56,5 @@ EXTRA_DIST += \ test/mpi/singleton_client_server.c \ test/mpi/spawn_tree.c \ test/mpi/info_spawn.c \ - test/mpi/pmix.c - + test/mpi/pmix.c \ + test/mpi/xlib.c diff --git a/orte/test/mpi/nonzero.c b/orte/test/mpi/nonzero.c new file mode 100644 index 00000000000..4b7ff266bfe --- /dev/null +++ b/orte/test/mpi/nonzero.c @@ -0,0 +1,23 @@ +#include +#include +#include + +int main(int argc, char **argv) +{ + int rank; + + if(argc < 2) { + return 0; + } + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + + int i = atoi(argv[1]); + + MPI_Finalize(); + + if (i != rank) { + sleep(1); + } + return i; +} diff --git a/orte/test/mpi/simple_spawn.c b/orte/test/mpi/simple_spawn.c index 81ec1a11ff8..4809d0d7645 100644 --- a/orte/test/mpi/simple_spawn.c +++ b/orte/test/mpi/simple_spawn.c @@ -1,8 +1,8 @@ -#include "orte_config.h" - #include +#include #include #include +#include #include @@ -11,11 +11,17 @@ int main(int argc, char* argv[]) int msg, rc; MPI_Comm parent, child; int rank, size; - char hostname[OPAL_MAXHOSTNAMELEN]; + char hostname[MAXHOSTNAMELEN]; pid_t pid; + char *env_rank,*env_nspace; + env_rank = getenv("PMIX_RANK"); + env_nspace = getenv("PMIX_NAMESPACE"); pid = getpid(); - printf("[pid %ld] starting up!\n", (long)pid); + gethostname(hostname, sizeof(hostname)); + + printf("[%s:%s pid %ld] starting up on node %s!\n", env_nspace, env_rank, (long)pid, hostname); + MPI_Init(NULL, NULL); MPI_Comm_rank(MPI_COMM_WORLD, &rank); printf("%d completed MPI_Init\n", rank); @@ -43,7 +49,6 @@ int main(int argc, char* argv[]) else { MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); - gethostname(hostname, sizeof(hostname)); pid = getpid(); printf("Hello from the child %d of %d on host %s pid %ld\n", rank, 3, hostname, (long)pid); if (0 == rank) { diff --git a/orte/test/mpi/xlib.c b/orte/test/mpi/xlib.c new file mode 100644 index 00000000000..e75a874fab0 --- /dev/null +++ b/orte/test/mpi/xlib.c @@ -0,0 +1,217 @@ +#include +#include +#include +#include + +#define SIZE 20 +#define POS 10 +#define INITIAL_VALUE 10 + +static pmix_proc_t myproc; + +/* this is the event notification function we pass down below + * when registering for general events - i.e.,, the default + * handler. We don't technically need to register one, but it + * is usually good practice to catch any events that occur */ +static void notification_fn(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + /* this example doesn't do anything with default events */ + fprintf(stderr, "Default event handler called with status %s\n", PMIx_Error_string(status)); + + if (NULL != cbfunc) { + cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); + } +} + +/* this is an event notification function that we explicitly request + * be called when the PMIX_MODEL_DECLARED notification is issued. + * We could catch it in the general event notification function and test + * the status to see if the status matched, but it often is simpler + * to declare a use-specific notification callback point. In this case, + * we are asking to know whenever a programming model library is + * instantiated */ +static void model_callback(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + size_t n; + + fprintf(stderr, "Model event handler called with status %d(%s)\n", status, PMIx_Error_string(status)); + + /* check to see what model declared itself */ + for (n=0; n < ninfo; n++) { + if (PMIX_STRING == info[n].value.type) { + fprintf(stderr, "\t%s:\t%s\n", info[n].key, info[n].value.data.string); + } + } + + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } +} + +/* event handler registration is done asynchronously because it + * may involve the PMIx server registering with the host RM for + * external events. So we provide a callback function that returns + * the status of the request (success or an error), plus a numerical index + * to the registered event. The index is used later on to deregister + * an event handler - if we don't explicitly deregister it, then the + * PMIx server will do so when it see us exit */ +static void model_registration_callback(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + if (PMIX_SUCCESS != status) { + fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", + myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref); + } + *active = status; +} + +int main(int argc, char *argv[]) +{ + int i, rank, size, next, prev, tag = 201; + int array_size = SIZE; + int pos = POS; + int *send_array; + int *recv_array; + pmix_info_t *info; + size_t ninfo; + pmix_status_t code = PMIX_MODEL_DECLARED; + pmix_status_t rc; + volatile int active; + + + if (1 < argc) { + fprintf(stderr, "Declaring ourselves\n"); + /* declare ourselves as a non-MPI library prior to MPI_Init */ + ninfo = 4; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_PROGRAMMING_MODEL, "EXAMPLE", PMIX_STRING); + PMIX_INFO_LOAD(&info[1], PMIX_MODEL_LIBRARY_NAME, "FOOL", PMIX_STRING); + PMIX_INFO_LOAD(&info[2], PMIX_MODEL_LIBRARY_VERSION, "1.2.3", PMIX_STRING); + PMIX_INFO_LOAD(&info[3], PMIX_THREADING_MODEL, "NONE", PMIX_STRING); + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, info, ninfo))) { + fprintf(stderr, "PMIx Init failed: %s\n", PMIx_Error_string(rc)); + exit(1); + } + PMIX_INFO_FREE(info, ninfo); + + /* register a handler specifically for when models declare */ + active = -1; + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "APP-MODEL", PMIX_STRING); + PMIx_Register_event_handler(&code, 1, info, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (-1 == active) { + usleep(10); + } + PMIX_INFO_FREE(info, ninfo); + if (0 != active) { + exit(active); + } + } + + /* initialize the MPI library - it will declare itself */ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (argc <= 1) { + fprintf(stderr, "Registering handler\n"); + /* register a handler specifically for when models declare */ + active = -1; + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "APP-MODEL", PMIX_STRING); + + PMIx_Register_event_handler(&code, 1, info, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (-1 == active) { + usleep(10); + } + PMIX_INFO_FREE(info, ninfo); + if (0 != active) { + exit(active); + } + } + + fprintf(stderr, "Rank %d has cleared MPI_Init\n", rank); + + next = (rank + 1) % size; + prev = (rank + size - 1) % size; + send_array = malloc(sizeof(int) * SIZE); + recv_array = malloc(sizeof(int) * SIZE); + + for (i = 0; i < array_size; ++i) { + send_array[i] = 17; + recv_array[i] = -1; + } + + if (0 == rank) { + send_array[pos] = INITIAL_VALUE; + MPI_Send(send_array, array_size, MPI_INT, next, tag, + MPI_COMM_WORLD); + } + + /* if we didn't already do it, declare another model now */ + if (argc <= 1) { + fprintf(stderr, "Declaring ourselves\n"); + /* declare ourselves as a non-MPI library after MPI_Init */ + ninfo = 4; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_PROGRAMMING_MODEL, "EXAMPLE", PMIX_STRING); + PMIX_INFO_LOAD(&info[1], PMIX_MODEL_LIBRARY_NAME, "FOOL", PMIX_STRING); + PMIX_INFO_LOAD(&info[2], PMIX_MODEL_LIBRARY_VERSION, "1.2.3", PMIX_STRING); + PMIX_INFO_LOAD(&info[3], PMIX_THREADING_MODEL, "NONE", PMIX_STRING); + + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, info, ninfo))) { + fprintf(stderr, "PMIx Init failed: %s\n", PMIx_Error_string(rc)); + exit(1); + } + PMIX_INFO_FREE(info, ninfo); + } + + while (1) { + recv_array[pos] = -1; + MPI_Recv(recv_array, array_size, MPI_INT, prev, tag, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + send_array[pos] = recv_array[pos]; + if (rank == 0) { + --send_array[pos]; + } + MPI_Send(send_array, array_size, MPI_INT, next, tag, MPI_COMM_WORLD); + if (0 == send_array[pos]) { + break; + } + } + + if (rank == 0) { + MPI_Recv(recv_array, array_size, MPI_INT, prev, tag, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + fprintf(stderr, "Rank %d has completed ring\n", rank); + MPI_Barrier(MPI_COMM_WORLD); + fprintf(stderr, "Rank %d has completed MPI_Barrier\n", rank); + + /* decrement the PMIx refcount */ + PMIx_Finalize(NULL, 0); + MPI_Finalize(); + return 0; +} diff --git a/orte/test/system/threads.c b/orte/test/system/threads.c new file mode 100644 index 00000000000..3b5c813037d --- /dev/null +++ b/orte/test/system/threads.c @@ -0,0 +1,335 @@ +/* + * Test program for memory consistency in a thread shifting design + * + * + * Run: + * ./threads ITERATIONS [MODE] + * ./threads 9000000 3 + * + * Example: + * ./threads 9000000 0 --> Will fail, no memory barriers + * ./threads 9000000 1 --> Will fail, no WMB + * ./threads 9000000 2 --> Will fail, no RMB + * ./threads 9000000 3 --> Success + * ./threads 9000000 4 --> Success + * ./threads 9000000 5 --> N/A + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "opal/sys/atomic.h" + + +// Max value for an int16_t +#define MAX_VAL 32767 + +typedef struct { + int type; + union { + bool flag; + int integer; + int8_t int8; + int16_t int16; + int32_t int32; + int64_t int64; + //char padding[1]; + } data; +} my_value_t; + +// Structure to handoff work to the peer thread +typedef struct { + volatile bool working; + void *ptr; // Note that adding a volatile here has no effect +} thread_handoff_t; + +// Shared object to handoff work +thread_handoff_t handoff; + +// Indicates if the test has finished +bool time_to_stop = false; + +// Progress reporting +#define PERC_INC 10.0 +double perc_report_after = PERC_INC; +double perc_current = 0.0; + +// Memory barrier modes +#define MB_MODE_NONE 0x0 +#define MB_MODE_RMB 0x1 +#define MB_MODE_WMB 0x2 +#define MB_MODE_MB 0x4 +#define MB_MODE_XMB 0x8 +#define MB_MODE_ALL (MB_MODE_RMB | MB_MODE_WMB) +int mb_mode = MB_MODE_ALL; + + +// Shared hwloc topology (so we only have to read it once) +static hwloc_topology_t topo; +// Which object we are binding to +// 4 - sockets with 5 cores each +// 20 - cores with 8 PUs each +//#define OBJ_TYPE HWLOC_OBJ_SOCKET +#define OBJ_TYPE HWLOC_OBJ_CORE + +/* + * Some basic timing support + */ +double acc_time, start_time, stop_time, delta; +static double get_ts_gettimeofday(void) { + double ret; + struct timeval tv; + gettimeofday(&tv, NULL); + ret = tv.tv_sec; + ret += (double)tv.tv_usec / 1000000.0; + return ret; +} + +/* + * Bind either the main or support thread far away from each other + */ +void bind_me_to(bool main_thread); + +/* + * Support thread to do the memory allocation and xfer + */ +void *value_xfer_thread(void *arg); + +/* + * Main thread + */ +int main(int argc, char **argv) { + pthread_t support_thread; + int rc, i, max_iters = 10, cur_iter; + my_value_t *val = NULL; + int mode; + + /* + * Parse command line arguments + */ + if( argc > 1 ) { + max_iters = atoi(argv[1]); + } + if( argc > 2 ) { + mode = atoi(argv[2]); + if( 0 > mode || mode > 5 ) { + printf("Error: Invalid mode %d\n" + "\tNone = 0\n" + "\tRMB = 1\n" + "\tWMB = 2\n" + "\tBoth = 3\n" + "\tMB Only = 4\n", + "\tXMB Only = 5\n", + mode); + exit(-1); + } + } + else { + mode = 3; + } + switch(mode) { + case 0: + mb_mode = MB_MODE_NONE; + break; + case 1: + mb_mode = MB_MODE_RMB; + break; + case 2: + mb_mode = MB_MODE_WMB; + break; + case 3: + mb_mode = MB_MODE_ALL; + break; + case 4: + mb_mode = MB_MODE_MB; + break; + case 5: + mb_mode = MB_MODE_XMB; + break; + } + + // Load hwloc topology + hwloc_topology_init(&topo); + hwloc_topology_load(topo); + + // Display banner + printf("---------------------------\n"); + printf("Iterations: %10d\n", max_iters); + printf("Mode R MB : %10s\n", (mb_mode & MB_MODE_RMB ? "Enabled" : "Disabled") ); + printf("Mode W MB : %10s\n", (mb_mode & MB_MODE_WMB ? "Enabled" : "Disabled") ); + printf("Mode - MB : %10s\n", (mb_mode & MB_MODE_MB ? "Enabled" : "Disabled") ); + printf("Mode X MB : %10s\n", (mb_mode & MB_MODE_XMB ? "Enabled" : "Disabled") ); + printf("---------------------------\n"); + + bind_me_to(true); + handoff.working = false; + + /* + * Launch supporting thread + */ + rc = pthread_create(&support_thread, NULL, value_xfer_thread, NULL); + if( 0 != rc ) { + printf("Error: Failed to create a thread! %d\n", rc); + exit(-1); + } + + /* + * Main work loop + */ + acc_time = 0.0; + for(cur_iter = 0; cur_iter < max_iters; ++cur_iter) { + perc_current = (cur_iter / ((double)max_iters)) * 100.0; + if( perc_current > perc_report_after ) { + delta = (acc_time / cur_iter) * 1000000; + printf("%6.1f %% complete : Iteration %10d / %10d : %6.1f usec / iter\n", + perc_current, cur_iter+1, max_iters, delta); + perc_report_after += PERC_INC; + } + + start_time = get_ts_gettimeofday(); + // Initialize values + val = NULL; + handoff.ptr = &val; + if( mb_mode & MB_MODE_RMB ) { + opal_atomic_rmb(); + } + if( mb_mode & MB_MODE_MB ) { + opal_atomic_mb(); + } + handoff.working = true; + + // Wait for work to finish + while( handoff.working ) { + usleep(1); + } + if( mb_mode & MB_MODE_WMB ) { + opal_atomic_wmb(); + } + if( mb_mode & MB_MODE_MB ) { + opal_atomic_mb(); + } + + // Inspect values for correctness + if( NULL == val ) { + printf("[%10d / %10d] Error: val = %s\n", cur_iter+1, max_iters, + (NULL == val ? "NULL" : "Valid") ); + exit(-1); + } + else if( 999 != val->type ) { + printf("[%10d / %10d] Error: val->type = %d\n", cur_iter+1, max_iters, val->type); + exit(-1); + } + else if( (cur_iter+1)%MAX_VAL != val->data.int16 ) { + printf("[%10d / %10d] Error: val->data.int16 = %d\n", cur_iter+1, max_iters, val->data.int16); + exit(-1); + } + + stop_time = get_ts_gettimeofday(); + acc_time += (stop_time - start_time); + + // Yes, this is a memory leak! + // I need to make sure that the supporting thread is not reusing a + // previous storage location when it calls malloc. This is to emulate + // a program that calls malloc after the value was acquired, possibly + // reusing this memory location. + //free(val); + val = NULL; + } + delta = (acc_time / max_iters) * 1000000; + + /* + * All done - Cleanup + */ + time_to_stop = true; + + rc = pthread_join(support_thread, NULL); + if( 0 != rc ) { + printf("Error: Failed to join a thread! %d\n", rc); + exit(-1); + } + + hwloc_topology_destroy(topo); + + printf("Success - %6.1f usec / iter\n", delta); + + return 0; +} + +void *value_xfer_thread(void *arg) { + my_value_t **val = NULL; + static int var = 0; + + // Bind this thread away from the main thread + bind_me_to(false); + + while( !time_to_stop ) { + if( handoff.working ) { + // Make sure I have the right pointer + if( mb_mode & MB_MODE_WMB ) { + opal_atomic_wmb(); + } + if( mb_mode & MB_MODE_MB ) { + opal_atomic_mb(); + } + + // Allocate and set the value + val = (my_value_t**)handoff.ptr; + (*val) = malloc(sizeof(my_value_t)); + (*val)->type = 999; + (*val)->data.int16 = (++var)%MAX_VAL; + + // Make sure main thread can see the value + // See 'Examples' -> 'Global thread flag' discussion here: + // https://www.ibm.com/developerworks/systems/articles/powerpc.html + if( mb_mode & MB_MODE_RMB ) { + opal_atomic_rmb(); + } + if( mb_mode & MB_MODE_MB ) { + opal_atomic_mb(); + } + // Release main thread + handoff.working = false; + } + else { + // wait for work + usleep(1); + } + } + pthread_exit(NULL); +} + +void bind_me_to(bool main_thread) { + int num_objs; + hwloc_cpuset_t set; + char *buffer = NULL; + hwloc_obj_t obj; + + num_objs = hwloc_get_nbobjs_by_type(topo, OBJ_TYPE); + + if( main_thread ) { + obj = hwloc_get_obj_by_type(topo, OBJ_TYPE, 0); + } + else { + obj = hwloc_get_obj_by_type(topo, OBJ_TYPE, num_objs-1); + } + + if( obj->type == OBJ_TYPE ) { + hwloc_set_cpubind(topo, obj->cpuset, HWLOC_CPUBIND_THREAD); + } + else { + printf("Error: Invalid object\n"); + exit(-1); + } + + set = hwloc_bitmap_alloc(); + hwloc_get_cpubind(topo, set, HWLOC_CPUBIND_THREAD); + hwloc_bitmap_asprintf(&buffer, set); + printf("%s : [objs = %d] : cpuset is %s\n", (main_thread ? "Main" : "Peer"), num_objs, buffer); + free(buffer); + hwloc_bitmap_free(set); +} diff --git a/orte/tools/orte-clean/orte-clean.c b/orte/tools/orte-clean/orte-clean.c index 2d681a2883a..c69620ab6b7 100644 --- a/orte/tools/orte-clean/orte-clean.c +++ b/orte/tools/orte-clean/orte-clean.c @@ -16,7 +16,7 @@ * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ * @@ -128,6 +128,7 @@ main(int argc, char *argv[]) #if OPAL_ENABLE_FT_CR == 1 char *tmp_env_var; #endif + char *legacy; /* This is needed so we can print the help message */ if (ORTE_SUCCESS != (ret = opal_init_util(&argc, &argv))) { @@ -174,6 +175,18 @@ main(int argc, char *argv[]) } opal_os_dirpath_destroy(orte_process_info.top_session_dir, true, NULL); + /* also get rid of any legacy session directories */ + asprintf(&legacy, "%s/openmpi-sessions-%d@%s_0", + orte_process_info.tmpdir_base, + (int)geteuid(), orte_process_info.nodename); + opal_os_dirpath_destroy(legacy, true, NULL); + free(legacy); + + /* and finally get rid of any lingering pmix-related artifacts */ + asprintf(&legacy, "rm -f %s/pmix*", orte_process_info.tmpdir_base); + system(legacy); + free(legacy); + /* now kill any lingering procs, if we can */ kill_procs(); @@ -415,7 +428,7 @@ void kill_procs(void) { } } free(inputline); - free(procname); + free(procname); } free(this_user); pclose(psfile); diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c index 901cb90acd8..c6db9658029 100644 --- a/orte/tools/orte-dvm/orte-dvm.c +++ b/orte/tools/orte-dvm/orte-dvm.c @@ -84,6 +84,7 @@ #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/orted/orted.h" @@ -490,6 +491,7 @@ int main(int argc, char *argv[]) while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); /* cleanup and leave */ orte_finalize(); diff --git a/orte/tools/orte-info/Makefile.am b/orte/tools/orte-info/Makefile.am index 70e41435ca9..64613e30a13 100644 --- a/orte/tools/orte-info/Makefile.am +++ b/orte/tools/orte-info/Makefile.am @@ -11,6 +11,9 @@ # All rights reserved. # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,7 +26,7 @@ AM_CFLAGS = \ -DOPAL_CONFIGURE_HOST="\"@OPAL_CONFIGURE_HOST@\"" \ -DOPAL_CONFIGURE_DATE="\"@OPAL_CONFIGURE_DATE@\"" \ -DOMPI_BUILD_USER="\"$$USER\"" \ - -DOMPI_BUILD_HOST="\"`hostname`\"" \ + -DOMPI_BUILD_HOST="\"`(hostname || uname -n) | sed 1q`\"" \ -DOMPI_BUILD_DATE="\"`date`\"" \ -DOMPI_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DOMPI_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ diff --git a/orte/tools/orte-server/orte-server.c b/orte/tools/orte-server/orte-server.c index a556cfc7bd0..d31a4c07946 100644 --- a/orte/tools/orte-server/orte-server.c +++ b/orte/tools/orte-server/orte-server.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,6 +54,7 @@ #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" +#include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" #include "orte/orted/orted.h" @@ -283,6 +284,7 @@ int main(int argc, char *argv[]) while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); /* should never get here, but if we do... */ diff --git a/orte/tools/orte-top/orte-top.c b/orte/tools/orte-top/orte-top.c index f6af0e21d90..38727bc656d 100644 --- a/orte/tools/orte-top/orte-top.c +++ b/orte/tools/orte-top/orte-top.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,6 +58,7 @@ #include "orte/util/name_fns.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/runtime/orte_quit.h" @@ -532,6 +533,7 @@ main(int argc, char *argv[]) while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); /*************** * Cleanup diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 1ff6a98a34d..92220f07118 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -87,6 +87,7 @@ #include "orte/mca/state/state.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" @@ -198,6 +199,7 @@ int orterun(int argc, char *argv[]) while (orte_event_base_active && launchst.active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); if (orte_debug_flag) { opal_output(0, "Job %s has launched", (NULL == launchst.jdata) ? "UNKNOWN" : ORTE_JOBID_PRINT(launchst.jdata->jobid)); @@ -209,6 +211,7 @@ int orterun(int argc, char *argv[]) while (orte_event_base_active && completest.active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); if (ORTE_PROC_IS_HNP) { /* ensure all local procs are dead */ diff --git a/orte/util/Makefile.am b/orte/util/Makefile.am index 2eb7ef5e485..9ef926a6ce8 100644 --- a/orte/util/Makefile.am +++ b/orte/util/Makefile.am @@ -43,14 +43,14 @@ AM_LFLAGS = -Porte_util_hostfile_ LEX_OUTPUT_ROOT = lex.orte_util_hostfile_ headers += \ - util/name_fns.h \ + util/name_fns.h \ util/proc_info.h \ util/session_dir.h \ util/show_help.h \ util/error_strings.h \ - util/context_fns.h \ - util/parse_options.h \ - util/pre_condition_transports.h \ + util/context_fns.h \ + util/parse_options.h \ + util/pre_condition_transports.h \ util/hnp_contact.h \ util/hostfile/hostfile.h \ util/hostfile/hostfile_lex.h \ @@ -60,7 +60,8 @@ headers += \ util/regex.h \ util/attr.h \ util/listener.h \ - util/compress.h + util/compress.h \ + util/threads.h lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ util/error_strings.c \ @@ -68,9 +69,9 @@ lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ util/proc_info.c \ util/session_dir.c \ util/show_help.c \ - util/context_fns.c \ - util/parse_options.c \ - util/pre_condition_transports.c \ + util/context_fns.c \ + util/parse_options.c \ + util/pre_condition_transports.c \ util/hnp_contact.c \ util/hostfile/hostfile_lex.l \ util/hostfile/hostfile.c \ diff --git a/orte/util/comm/comm.c b/orte/util/comm/comm.c index 426cbc4a69c..b4944db05de 100644 --- a/orte/util/comm/comm.c +++ b/orte/util/comm/comm.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2010-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +38,7 @@ #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/routed/routed.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -807,4 +808,3 @@ int orte_util_comm_halt_vm(const orte_process_name_t *hnp) CLEANUP: return rc; } - diff --git a/orte/util/error_strings.c b/orte/util/error_strings.c index 801373cb669..30fc3c51820 100644 --- a/orte/util/error_strings.c +++ b/orte/util/error_strings.c @@ -195,39 +195,12 @@ int orte_err2str(int errnum, const char **errmsg) case ORTE_ERR_OP_IN_PROGRESS: retval = "Operation in progress"; break; - case ORTE_ERR_OPEN_CHANNEL_PEER_FAIL: - retval = "Open channel to peer failed"; - break; - case ORTE_ERR_OPEN_CHANNEL_PEER_REJECT: - retval = "Open channel to peer was rejected"; - break; - case ORTE_ERR_QOS_TYPE_UNSUPPORTED: - retval = "QoS type unsupported"; - break; - case ORTE_ERR_QOS_ACK_WINDOW_FULL: - retval = "QoS ack window full"; - break; - case ORTE_ERR_ACK_TIMEOUT_SENDER: - retval = "Send ack timed out"; - break; - case ORTE_ERR_ACK_TIMEOUT_RECEIVER: - retval = "Recv ack timed out"; - break; - case ORTE_ERR_LOST_MSG_IN_WINDOW: - retval = "Msg lost in window"; - break; - case ORTE_ERR_CHANNEL_BUSY: - retval = "Channel busy"; - break; - case ORTE_ERR_DUPLICATE_MSG: - retval = "Duplicate message"; + case ORTE_ERR_OPEN_CONDUIT_FAIL: + retval = "Open messaging conduit failed"; break; case ORTE_ERR_OUT_OF_ORDER_MSG: retval = "Out of order message"; break; - case ORTE_ERR_OPEN_CHANNEL_DUPLICATE: - retval = "Duplicate channel open request"; - break; case ORTE_ERR_FORCE_SELECT: retval = "Force select"; break; diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index cba8139224d..799fea8764c 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -599,7 +599,8 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) opal_output_verbose(5, orte_nidmap_output, "%s CONTINUE TOPOLOGY RANGE (%d) WITH NODE %s: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - tp->cnt, nptr->name, tp->t->sig); + tp->cnt, nptr->name, + (NULL == tp->t) ? "N/A" : tp->t->sig); } else { /* need to start another range */ tp = OBJ_NEW(orte_regex_range_t); diff --git a/orte/util/threads.h b/orte/util/threads.h new file mode 100644 index 00000000000..5bd1be82b5b --- /dev/null +++ b/orte/util/threads.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_THREADS_H +#define ORTE_THREADS_H + +#include "orte_config.h" + +#include "opal/sys/atomic.h" +#include "opal/threads/threads.h" + +/* provide macros for forward-proofing the shifting + * of objects between threads - at some point, we + * may revamp our threading model */ + +/* post an object to another thread - for now, we + * only have a memory barrier */ +#define ORTE_POST_OBJECT(o) opal_atomic_wmb() + +/* acquire an object from another thread - for now, + * we only have a memory barrier */ +#define ORTE_ACQUIRE_OBJECT(o) opal_atomic_rmb() + +#define orte_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t orte_condition_t; +#define orte_condition_broadcast(a) pthread_cond_broadcast(a) +#define orte_condition_signal(a) pthread_cond_signal(a) +#define ORTE_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + +/* define a threadshift macro */ +#define ORTE_THREADSHIFT(x, eb, f, p) \ + do { \ + opal_event_set((eb), &((x)->ev), -1, OPAL_EV_WRITE, (f), (x)); \ + opal_event_set_priority(&((x)->ev), (p)); \ + ORTE_POST_OBJECT((x)); \ + opal_event_active(&((x)->ev), OPAL_EV_WRITE, 1); \ + } while(0) + +typedef struct { + opal_mutex_t mutex; + orte_condition_t cond; + volatile bool active; +} orte_lock_t; + +#define ORTE_CONSTRUCT_LOCK(l) \ + do { \ + OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define ORTE_DESTRUCT_LOCK(l) \ + do { \ + OBJ_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + +#if OPAL_ENABLE_DEBUG +#define ORTE_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = true; \ + OPAL_ACQUIRE_OBJECT(lck); \ + } while(0) +#else +#define ORTE_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + (lck)->active = true; \ + OPAL_ACQUIRE_OBJECT(lck); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define ORTE_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + OPAL_ACQUIRE_OBJECT(&lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define ORTE_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + OPAL_ACQUIRE_OBJECT(lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define ORTE_RELEASE_THREAD(lck) \ + do { \ + if (opal_debug_threads) { \ + opal_output(0, "Releasing thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define ORTE_RELEASE_THREAD(lck) \ + do { \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#define ORTE_WAKEUP_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) + +#endif /* ORTE_THREADS_H */