Skip to content

Commit 2c086e5

Browse files
author
Ralph Castain
committed
Add an experimental ability to skip the RTE barriers at the end of MPI_Init and the beginning of MPI_Finalize
1 parent b85a5e6 commit 2c086e5

File tree

4 files changed

+58
-29
lines changed

4 files changed

+58
-29
lines changed

ompi/runtime/ompi_mpi_finalize.c

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -246,26 +246,28 @@ int ompi_mpi_finalize(void)
246246
del_procs behavior around May of 2014 (see
247247
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
248248
more details). */
249-
if (NULL != opal_pmix.fence_nb) {
250-
active = true;
251-
/* Note that use of the non-blocking PMIx fence will
252-
* allow us to lazily cycle calling
253-
* opal_progress(), which will allow any other pending
254-
* communications/actions to complete. See
255-
* https://github.com/open-mpi/ompi/issues/1576 for the
256-
* original bug report. */
257-
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
258-
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
259-
} else {
260-
/* However, we cannot guarantee that the provided PMIx has
261-
* fence_nb. If it doesn't, then do the best we can: an MPI
262-
* barrier on COMM_WORLD (which isn't the best because of the
263-
* reasons cited above), followed by a blocking PMIx fence
264-
* (which does not call opal_progress()). */
265-
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
266-
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
267-
268-
opal_pmix.fence(NULL, 0);
249+
if (!ompi_async_mpi_finalize) {
250+
if (NULL != opal_pmix.fence_nb) {
251+
active = true;
252+
/* Note that use of the non-blocking PMIx fence will
253+
* allow us to lazily cycle calling
254+
* opal_progress(), which will allow any other pending
255+
* communications/actions to complete. See
256+
* https://github.com/open-mpi/ompi/issues/1576 for the
257+
* original bug report. */
258+
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
259+
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
260+
} else {
261+
/* However, we cannot guarantee that the provided PMIx has
262+
* fence_nb. If it doesn't, then do the best we can: an MPI
263+
* barrier on COMM_WORLD (which isn't the best because of the
264+
* reasons cited above), followed by a blocking PMIx fence
265+
* (which does not call opal_progress()). */
266+
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
267+
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
268+
269+
opal_pmix.fence(NULL, 0);
270+
}
269271
}
270272

271273
/* check for timing request - get stop time and report elapsed

ompi/runtime/ompi_mpi_init.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -819,14 +819,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
819819
/* wait for everyone to reach this point - this is a hard
820820
* barrier requirement at this time, though we hope to relax
821821
* it at a later point */
822-
active = true;
823-
opal_pmix.commit();
824-
if (NULL != opal_pmix.fence_nb) {
825-
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
826-
fence_release, (void*)&active);
827-
OMPI_WAIT_FOR_COMPLETION(active);
828-
} else {
829-
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
822+
if (!ompi_async_mpi_init) {
823+
active = true;
824+
if (NULL != opal_pmix.fence_nb) {
825+
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
826+
fence_release, (void*)&active);
827+
OMPI_WAIT_FOR_COMPLETION(active);
828+
} else {
829+
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
830+
}
830831
}
831832

832833
/* check for timing request - get stop time and report elapsed

ompi/runtime/ompi_mpi_params.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
1515
* reserved.
1616
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
17-
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
17+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
1818
* Copyright (c) 2015 Mellanox Technologies, Inc.
1919
* All rights reserved.
2020
* $COPYRIGHT$
@@ -65,6 +65,9 @@ char *ompi_mpi_show_mca_params_string = NULL;
6565
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
6666
bool ompi_mpi_preconnect_mpi = false;
6767

68+
bool ompi_async_mpi_init = false;
69+
bool ompi_async_mpi_finalize = false;
70+
6871
#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0
6972
uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
7073
bool ompi_mpi_dynamics_enabled = true;
@@ -282,6 +285,22 @@ int ompi_mpi_register_params(void)
282285
MCA_BASE_VAR_SCOPE_READONLY,
283286
&ompi_mpi_dynamics_enabled);
284287

288+
ompi_async_mpi_init = false;
289+
(void) mca_base_var_register("ompi", "async", "mpi", "init",
290+
"Do not perform a barrier at the end of MPI_Init",
291+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
292+
OPAL_INFO_LVL_9,
293+
MCA_BASE_VAR_SCOPE_READONLY,
294+
&ompi_async_mpi_init);
295+
296+
ompi_async_mpi_finalize = false;
297+
(void) mca_base_var_register("ompi", "async", "mpi", "finalize",
298+
"Do not perform a barrier at the beginning of MPI_Finalize",
299+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
300+
OPAL_INFO_LVL_9,
301+
MCA_BASE_VAR_SCOPE_READONLY,
302+
&ompi_async_mpi_finalize);
303+
285304
value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
286305
if (0 <= value) {
287306
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",

ompi/runtime/params.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,13 @@ OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
135135
*/
136136
OMPI_DECLSPEC extern bool ompi_mpi_dynamics_enabled;
137137

138+
/* EXPERIMENTAL: do not perform an RTE barrier at the end of MPI_Init */
139+
OMPI_DECLSPEC extern bool ompi_async_mpi_init;
140+
141+
/* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */
142+
OMPI_DECLSPEC extern bool ompi_async_mpi_finalize;
143+
144+
138145
/**
139146
* Register MCA parameters used by the MPI layer.
140147
*

0 commit comments

Comments
 (0)