Skip to content

Commit 6b19e4a

Browse files
committed
Pull in the topology early and set cache line size
Init PMIx and get the topology immediately after setting up opal util as neither of those operations requires any real OPAL support. This fills in the cache line size before anything else is opened. Signed-off-by: Ralph Castain <[email protected]>
1 parent c747a1b commit 6b19e4a

File tree

4 files changed

+39
-31
lines changed

4 files changed

+39
-31
lines changed

contrib/platform/intel/bend/mac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ enable_ipv6=no
1212
enable_man_pages=no
1313
enable_mpi_fortran=no
1414
enable_memchecker=no
15-
enable_mca_no_build=memchecker,coll-adapt,coll-cuda,coll-demo,coll-ftagree,coll-han,coll-hcoll,coll-inter,coll-libnbc,coll-monitoring,coll-portals4,coll-tuned,common-monitoring,common-ompio,fbtl,fcoll,fs,io,mtl,osc,pml-cm,pml-monitoring,pml-ucx,pml-v,sharedfp,topo,vprotocol,btl-ofi,btl-portals4,btl-smcuda,btl-uct,btl-ugni,btl-usnic,common-cuda,common-ofi,common-ucx
15+
enable_mca_no_build=memchecker,common-monitoring,common-ompio,fbtl,fcoll,fs,io,mtl,osc,pml-cm,pml-monitoring,pml-ucx,pml-v,sharedfp,topo,vprotocol,btl-ofi,btl-portals4,btl-smcuda,btl-uct,btl-ugni,btl-usnic,common-cuda,common-ofi,common-ucx
1616
enable_contrib_no_build=libompitrace
1717
with_memory_manager=no
1818
with_devel_headers=yes

ompi/runtime/ompi_mpi_init.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
2727
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates.
2828
* All Rights reserved.
29+
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
2930
* $COPYRIGHT$
3031
*
3132
* Additional copyrights may follow
@@ -402,6 +403,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
402403
pmix_status_t rc;
403404
OMPI_TIMING_INIT(64);
404405
opal_pmix_lock_t mylock;
406+
opal_process_name_t pname;
405407

406408
ompi_hook_base_mpi_init_top(argc, argv, requested, provided);
407409

@@ -445,6 +447,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
445447
ompi_mpi_thread_level(requested, provided);
446448

447449
/* Setup enough to check get/set MCA params */
450+
memset(&opal_process_info, 0, sizeof(opal_process_info));
448451
if (OPAL_SUCCESS != (ret = opal_init_util(&argc, &argv))) {
449452
error = "ompi_mpi_init: opal_init_util failed";
450453
goto error;
@@ -470,6 +473,40 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
470473
goto error;
471474
}
472475

476+
/* setup our internal nspace hack */
477+
opal_pmix_setup_nspace_tracker();
478+
/* init PMIx */
479+
if (PMIX_SUCCESS != (ret = PMIx_Init(&opal_process_info.myprocid, NULL, 0))) {
480+
/* if we get PMIX_ERR_UNREACH indicating that we cannot reach the
481+
* server, then we assume we are operating as a singleton */
482+
if (PMIX_ERR_UNREACH == ret) {
483+
ompi_singleton = true;
484+
} else {
485+
/* we cannot run - this could be due to being direct launched
486+
* without the required PMI support being built, so print
487+
* out a help message indicating it */
488+
opal_show_help("help-mpi-runtime.txt", "no-pmi", true, PMIx_Error_string(ret));
489+
return OPAL_ERR_SILENT;
490+
}
491+
}
492+
/* setup the process name fields - also registers the new nspace */
493+
OPAL_PMIX_CONVERT_PROCT(ret, &pname, &opal_process_info.myprocid);
494+
if (OPAL_SUCCESS != ret) {
495+
error = "ompi_mpi_init: converting process name";
496+
goto error;
497+
}
498+
OPAL_PROC_MY_NAME.jobid = pname.jobid;
499+
OPAL_PROC_MY_NAME.vpid = pname.vpid;
500+
opal_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid;
501+
opal_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid;
502+
503+
/* get our topology and cache line size */
504+
ret = opal_hwloc_base_get_topology();
505+
if (OPAL_SUCCESS != ret) {
506+
error = "ompi_mpi_init: get topology";
507+
goto error;
508+
}
509+
473510
if (OPAL_SUCCESS != (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) {
474511
error = "ompi_mpi_init: opal_arch_set_fortran_logical_size failed";
475512
goto error;

ompi/runtime/ompi_rte.c

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
543543

544544
u32ptr = &u32;
545545
u16ptr = &u16;
546-
memset(&opal_process_info, 0, sizeof(opal_process_info));
547546

548547
/* Convince OPAL to use our naming scheme */
549548
opal_process_name_print = _process_name_print_for_opal;
@@ -561,34 +560,6 @@ int ompi_rte_init(int *pargc, char ***pargv)
561560
goto error;
562561
}
563562

564-
/* setup our internal nspace hack */
565-
opal_pmix_setup_nspace_tracker();
566-
567-
/* initialize the selected module */
568-
if (PMIX_SUCCESS != (ret = PMIx_Init(&opal_process_info.myprocid, NULL, 0))) {
569-
/* if we get PMIX_ERR_UNREACH indicating that we cannot reach the
570-
* server, then we assume we are operating as a singleton */
571-
if (PMIX_ERR_UNREACH == ret) {
572-
ompi_singleton = true;
573-
} else {
574-
/* we cannot run - this could be due to being direct launched
575-
* without the required PMI support being built, so print
576-
* out a help message indicating it */
577-
opal_show_help("help-mpi-runtime.txt", "no-pmi", true, PMIx_Error_string(ret));
578-
return OPAL_ERR_SILENT;
579-
}
580-
}
581-
582-
/* setup the process name fields - also registers the new nspace */
583-
OPAL_PMIX_CONVERT_PROCT(rc, &pname, &opal_process_info.myprocid);
584-
if (OPAL_SUCCESS != rc) {
585-
return rc;
586-
}
587-
OPAL_PROC_MY_NAME.jobid = pname.jobid;
588-
OPAL_PROC_MY_NAME.vpid = pname.vpid;
589-
opal_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid;
590-
opal_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid;
591-
592563
/* set our hostname */
593564
ev1 = NULL;
594565
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_HOSTNAME, &OPAL_PROC_MY_NAME,

0 commit comments

Comments
 (0)