From 4c1847c2402d6c5a7f4788692f81f63a1b576cd8 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Sun, 16 Mar 2025 22:10:05 -0400 Subject: [PATCH 1/5] Extract the install prefix from the shared library. If we detect a static build don't try to provide a prefix. This is part of a multi-project effort, a similar PR will be created in OpenPMIX and OMPI. The goal of each of these changes is the same: instead of using build-time generated prefix that ignore a project rebase, take the prefix from the shared library of each project and derive the necessary paths from it. The user can however overwrite this using the environment variables, and the configuration files. Signed-off-by: George Bosilca --- opal/mca/installdirs/runtime/Makefile.am | 14 +++ opal/mca/installdirs/runtime/configure.m4 | 45 +++++++++ .../runtime/opal_installdirs_runtime.c | 92 +++++++++++++++++++ opal/mca/installdirs/runtime/owner.txt | 7 ++ 4 files changed, 158 insertions(+) create mode 100644 opal/mca/installdirs/runtime/Makefile.am create mode 100644 opal/mca/installdirs/runtime/configure.m4 create mode 100644 opal/mca/installdirs/runtime/opal_installdirs_runtime.c create mode 100644 opal/mca/installdirs/runtime/owner.txt diff --git a/opal/mca/installdirs/runtime/Makefile.am b/opal/mca/installdirs/runtime/Makefile.am new file mode 100644 index 00000000000..1ba674c606f --- /dev/null +++ b/opal/mca/installdirs/runtime/Makefile.am @@ -0,0 +1,14 @@ +# +# Copyright (c) 2025 NVIDIA Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +noinst_LTLIBRARIES = libmca_installdirs_runtime.la + +libmca_installdirs_runtime_la_SOURCES = \ + opal_installdirs_runtime.c +libmca_installdirs_runtime_la_CPPFLAGS = -DOPAL_LIB_NAME=\"@OPAL_LIB_NAME@\" diff --git a/opal/mca/installdirs/runtime/configure.m4 b/opal/mca/installdirs/runtime/configure.m4 new file mode 100644 index 00000000000..b64cd461846 --- /dev/null +++ b/opal/mca/installdirs/runtime/configure.m4 @@ -0,0 +1,45 @@ +# -*- shell-script -*- +# +# Copyright (c) 2025 NVIDIA Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AC_DEFUN([MCA_opal_installdirs_runtime_PRIORITY], [5]) + +AC_DEFUN([MCA_opal_installdirs_runtime_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + +# MCA_installdirs_config_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_installdirs_runtime_CONFIG], [ + # Check if we are building a shared library or not. Disable if static + AC_MSG_CHECKING([if shared libraries are enabled]) + AS_IF([test "$enable_shared" != "yes"], + [installdirs_runtime_happy="no"], + [installdirs_runtime_happy="yes"]) + AC_MSG_RESULT([$installdirs_runtime_happy]) + + # Check if dladdr is available + AS_IF([test "$installdirs_runtime_happy" = "yes"], + [AC_CHECK_HEADERS([dlfcn.h], + [], + [installdirs_runtime_happy="no"])]) + AS_IF([test "$installdirs_runtime_happy" = "yes"], + [AC_CHECK_LIB([dl], [dladdr], + [], + [installdirs_runtime_happy="no"]) + ]) + # + AS_IF([test "$installdirs_runtime_happy" = "yes"], + [AC_CONFIG_FILES([opal/mca/installdirs/runtime/Makefile]) + $1], [$2]) +]) + diff --git a/opal/mca/installdirs/runtime/opal_installdirs_runtime.c b/opal/mca/installdirs/runtime/opal_installdirs_runtime.c new file mode 100644 index 00000000000..f286d590b47 --- /dev/null +++ b/opal/mca/installdirs/runtime/opal_installdirs_runtime.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#include + +#include "opal/constants.h" +#include "opal/mca/installdirs/installdirs.h" +#include "opal/runtime/opal.h" + +static int installdirs_runtime_open(void); + +opal_installdirs_base_component_t mca_installdirs_runtime_component = { + /* First, the mca_component_t struct containing meta information + about the component itself */ + {OPAL_INSTALLDIRS_BASE_VERSION_2_0_0, + + /* Component name and version */ + "runtime", OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, OPAL_RELEASE_VERSION, + + /* Component open and close functions */ + installdirs_runtime_open, NULL}, + {/* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT}, + + /* Next the opal_install_dirs_t install_dirs_data information */ + { + NULL, + }, +}; + +#include +#include "opal/util/basename.h" + +/** + * We are trying to solve a particular use case here, when the entire install tree + * of Open MPI (and its dependencies) has been moved into another location. Nothing + * fancy, the entire install tree has maintained his shape but changed the prefix. + */ +static int installdirs_runtime_open(void) +{ + Dl_info info; + void* opal_fct; + + /* Casting from void* to fct pointer according to POSIX.1-2001 and POSIX.1-2008 */ + *(void **)&opal_fct = dlsym(RTLD_DEFAULT, "opal_init_util"); + + if( 0 == dladdr(opal_fct, &info) ) { + /* Can't find the symbol */ + return OPAL_ERROR; + } + + /* If this build was both static and shared then this compoenent will be build and will exists + * even in the static library. We need to prevent setting a prefix for the OMPI library that + * is actually the application path. Check, if the name points to a library. + */ + char* libname = opal_basename(info.dli_fname); + if( strncmp(libname, "lib", 3)) { /* not a shared library */ + free(libname); + return OPAL_ERROR; + } +#if defined(OPAL_LIB_NAME) + /* Extra check using the installed name of the OPAL library */ + if( strncmp(libname+3, OPAL_LIB_NAME, strlen(OPAL_LIB_NAME)) ) { /* not a shared library */ + free(libname); + return OPAL_ERROR; + } +#endif /* defined(OPAL_LIB_NAME) */ + /* Remove the shared library name and it's first dirname to obtain a prefix. This + * is true in most cases, especially when the install directory was just moved + * moved around, but it is not necessarily always true. + */ + char* dname = opal_dirname(info.dli_fname); + char* prefix = opal_dirname(dname); + + free(libname); + free(dname); + + mca_installdirs_runtime_component.install_dirs_data.prefix = prefix; + + return OPAL_SUCCESS; +} diff --git a/opal/mca/installdirs/runtime/owner.txt b/opal/mca/installdirs/runtime/owner.txt new file mode 100644 index 00000000000..0177f4391ab --- /dev/null +++ b/opal/mca/installdirs/runtime/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: NVIDIA +status: active From f682f4e70e8d51f862bacb623ecadcfeeb8e2df8 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 17 Mar 2025 10:45:58 -0400 Subject: [PATCH 2/5] Add a way to complain if install directories do not exists. Unset by default, but can be used to chack installations. Signed-off-by: George Bosilca --- .../base/installdirs_base_components.c | 63 +++++++++++++------ 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/opal/mca/installdirs/base/installdirs_base_components.c b/opal/mca/installdirs/base/installdirs_base_components.c index 1daae990df0..f254a829987 100644 --- a/opal/mca/installdirs/base/installdirs_base_components.c +++ b/opal/mca/installdirs/base/installdirs_base_components.c @@ -21,7 +21,11 @@ #include "opal/mca/installdirs/installdirs.h" #include "opal/mca/mca.h" +#include +#include + opal_install_dirs_t opal_install_dirs = {0}; +static int opal_install_dir_warn_if_non_existent = 0; #define CONDITIONAL_COPY(target, origin, field) \ do { \ @@ -30,6 +34,23 @@ opal_install_dirs_t opal_install_dirs = {0}; } \ } while (0) +#define CHECK_AND_COMPLAIN(field) \ + do { \ + if(!directory_exists(opal_install_dirs.field)) { \ + fprintf(stderr, #field ": %s\n", opal_install_dirs.field); \ + } \ + } while (0) + +static int directory_exists(const char *path) +{ + struct stat info; + + if (0 != stat(path, &info)) { + return 0; + } + return S_ISDIR(info.st_mode); +} + static int opal_installdirs_base_open(mca_base_open_flag_t flags) { mca_base_component_list_item_t *component_item; @@ -87,26 +108,30 @@ static int opal_installdirs_base_open(mca_base_open_flag_t flags) opal_install_dirs.opalincludedir = opal_install_dirs_expand_setup( opal_install_dirs.opalincludedir); -#if 0 - fprintf(stderr, "prefix: %s\n", opal_install_dirs.prefix); - fprintf(stderr, "exec_prefix: %s\n", opal_install_dirs.exec_prefix); - fprintf(stderr, "bindir: %s\n", opal_install_dirs.bindir); - fprintf(stderr, "sbindir: %s\n", opal_install_dirs.sbindir); - fprintf(stderr, "libexecdir: %s\n", opal_install_dirs.libexecdir); - fprintf(stderr, "datarootdir: %s\n", opal_install_dirs.datarootdir); - fprintf(stderr, "datadir: %s\n", opal_install_dirs.datadir); - fprintf(stderr, "sysconfdir: %s\n", opal_install_dirs.sysconfdir); - fprintf(stderr, "sharedstatedir: %s\n", opal_install_dirs.sharedstatedir); - fprintf(stderr, "localstatedir: %s\n", opal_install_dirs.localstatedir); - fprintf(stderr, "libdir: %s\n", opal_install_dirs.libdir); - fprintf(stderr, "includedir: %s\n", opal_install_dirs.includedir); - fprintf(stderr, "infodir: %s\n", opal_install_dirs.infodir); - fprintf(stderr, "mandir: %s\n", opal_install_dirs.mandir); - fprintf(stderr, "pkgdatadir: %s\n", opal_install_dirs.pkgdatadir); - fprintf(stderr, "pkglibdir: %s\n", opal_install_dirs.pkglibdir); - fprintf(stderr, "pkgincludedir: %s\n", opal_install_dirs.pkgincludedir); -#endif + (void)mca_base_var_register("opal", "opal", "installdir", "warn", + "Print a warning is any of the OMPI necessary paths cannot be found", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &opal_install_dir_warn_if_non_existent); + if(opal_install_dir_warn_if_non_existent) { + CHECK_AND_COMPLAIN(prefix); + CHECK_AND_COMPLAIN(exec_prefix); + CHECK_AND_COMPLAIN(bindir); + //CHECK_AND_COMPLAIN(sbindir); + //CHECK_AND_COMPLAIN(libexecdir); + CHECK_AND_COMPLAIN(datarootdir); + CHECK_AND_COMPLAIN(datadir); + CHECK_AND_COMPLAIN(sysconfdir); + //CHECK_AND_COMPLAIN(sharedstatedir); + //CHECK_AND_COMPLAIN(localstatedir); + CHECK_AND_COMPLAIN(libdir); + CHECK_AND_COMPLAIN(includedir); + //CHECK_AND_COMPLAIN(infodir); + CHECK_AND_COMPLAIN(mandir); + CHECK_AND_COMPLAIN(opaldatadir); + CHECK_AND_COMPLAIN(opallibdir); + CHECK_AND_COMPLAIN(opalincludedir); + } /* NTH: Is it ok not to close the components? If not we can add a flag to mca_base_framework_components_close to indicate not to deregister variable groups */ From 3fab7209d7f60e622237eb09d4f7c99d66e6fe7c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 19 Mar 2025 17:26:38 -0400 Subject: [PATCH 3/5] Move the install_dirs.h.in outside of the config component. This is the only way to make the default install path available for all components in the installdir framework. Signed-off-by: George Bosilca --- opal/mca/installdirs/config/Makefile.am | 2 +- opal/mca/installdirs/config/configure.m4 | 2 +- opal/mca/installdirs/config/opal_installdirs_config.c | 2 +- opal/mca/installdirs/{config => }/install_dirs.h.in | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename opal/mca/installdirs/{config => }/install_dirs.h.in (100%) diff --git a/opal/mca/installdirs/config/Makefile.am b/opal/mca/installdirs/config/Makefile.am index 2ec99d31234..b0136a7c78b 100644 --- a/opal/mca/installdirs/config/Makefile.am +++ b/opal/mca/installdirs/config/Makefile.am @@ -18,4 +18,4 @@ libmca_installdirs_config_la_SOURCES = \ # This file is generated; we do not want to include it in the tarball nodist_libmca_installdirs_config_la_SOURCES = \ - install_dirs.h + ../install_dirs.h diff --git a/opal/mca/installdirs/config/configure.m4 b/opal/mca/installdirs/config/configure.m4 index bc910fdcc5d..88489e9aeca 100644 --- a/opal/mca/installdirs/config/configure.m4 +++ b/opal/mca/installdirs/config/configure.m4 @@ -24,6 +24,6 @@ AC_DEFUN([MCA_opal_installdirs_config_COMPILE_MODE], [ # ------------------------------------------------ AC_DEFUN([MCA_opal_installdirs_config_CONFIG],[ AC_CONFIG_FILES([opal/mca/installdirs/config/Makefile - opal/mca/installdirs/config/install_dirs.h]) + opal/mca/installdirs/install_dirs.h]) ]) diff --git a/opal/mca/installdirs/config/opal_installdirs_config.c b/opal/mca/installdirs/config/opal_installdirs_config.c index 201cc497f16..1f79aff88be 100644 --- a/opal/mca/installdirs/config/opal_installdirs_config.c +++ b/opal/mca/installdirs/config/opal_installdirs_config.c @@ -10,7 +10,7 @@ #include "opal_config.h" -#include "opal/mca/installdirs/config/install_dirs.h" +#include "opal/mca/installdirs/install_dirs.h" #include "opal/mca/installdirs/installdirs.h" const opal_installdirs_base_component_t mca_installdirs_config_component = { diff --git a/opal/mca/installdirs/config/install_dirs.h.in b/opal/mca/installdirs/install_dirs.h.in similarity index 100% rename from opal/mca/installdirs/config/install_dirs.h.in rename to opal/mca/installdirs/install_dirs.h.in From 76f3d0e7c9cb1550932dfa24bad78b9ba58d0f10 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 19 Mar 2025 17:27:59 -0400 Subject: [PATCH 4/5] Propagate the user provided `--libdir` to the internal libevent Signed-off-by: George Bosilca --- config/opal_config_libevent.m4 | 1 + 1 file changed, 1 insertion(+) diff --git a/config/opal_config_libevent.m4 b/config/opal_config_libevent.m4 index 4e16d899696..cea427df2c6 100644 --- a/config/opal_config_libevent.m4 +++ b/config/opal_config_libevent.m4 @@ -205,6 +205,7 @@ AC_DEFUN([_OPAL_CONFIG_LIBEVENT_INTERNAL], [ OPAL_VAR_SCOPE_PUSH(subconfig_happy subconfig_prefix internal_libevent_location) AS_IF([test ! -z $prefix], [subconfig_prefix="--prefix=$prefix"]) + AS_IF([test ! -z $libdir], [subconfig_prefix="--libdir=$libdir"]) # Note: To update the version of libevent shipped, update the # constant in autogen.pl. From 5384920ceab7c589b2896f80ce287d48a6ada7ad Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 19 Mar 2025 17:29:05 -0400 Subject: [PATCH 5/5] Better prefix extraction For the libdir it now matched the libdir prefix from the installdirs.h in order to find the real prefix. It does the same for executable allowing us to execute our compiler wrappers and other tools from a relocated directory. The problem here is that all our internal executbales are statically linked against libopal_core, we we will find the basic symbols not in a shared library but directly in the executable. This works fine, but we need a way to prevent this mechanism from triggering on a statically build user application. Signed-off-by: George Bosilca --- .../runtime/opal_installdirs_runtime.c | 49 +++++++++++++------ 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/opal/mca/installdirs/runtime/opal_installdirs_runtime.c b/opal/mca/installdirs/runtime/opal_installdirs_runtime.c index f286d590b47..e5cb7cd1a35 100644 --- a/opal/mca/installdirs/runtime/opal_installdirs_runtime.c +++ b/opal/mca/installdirs/runtime/opal_installdirs_runtime.c @@ -41,6 +41,7 @@ opal_installdirs_base_component_t mca_installdirs_runtime_component = { #include #include "opal/util/basename.h" +#include "opal/mca/installdirs/install_dirs.h" /** * We are trying to solve a particular use case here, when the entire install tree @@ -51,42 +52,60 @@ static int installdirs_runtime_open(void) { Dl_info info; void* opal_fct; + char* libname = NULL; + const char* base_prefix_path = OPAL_LIBDIR; /* Casting from void* to fct pointer according to POSIX.1-2001 and POSIX.1-2008 */ *(void **)&opal_fct = dlsym(RTLD_DEFAULT, "opal_init_util"); if( 0 == dladdr(opal_fct, &info) ) { - /* Can't find the symbol */ - return OPAL_ERROR; + goto bail_out_with_no_data; } /* If this build was both static and shared then this compoenent will be build and will exists * even in the static library. We need to prevent setting a prefix for the OMPI library that * is actually the application path. Check, if the name points to a library. */ - char* libname = opal_basename(info.dli_fname); + libname = opal_basename(info.dli_fname); if( strncmp(libname, "lib", 3)) { /* not a shared library */ - free(libname); - return OPAL_ERROR; - } + base_prefix_path = OPAL_BINDIR; + } else { #if defined(OPAL_LIB_NAME) - /* Extra check using the installed name of the OPAL library */ - if( strncmp(libname+3, OPAL_LIB_NAME, strlen(OPAL_LIB_NAME)) ) { /* not a shared library */ - free(libname); - return OPAL_ERROR; - } + /* Extra check using the installed name of the OPAL library */ + if( strncmp(libname+3, OPAL_LIB_NAME, strlen(OPAL_LIB_NAME)) ) { /* not a shared library */ + goto bail_out_with_no_data; + } #endif /* defined(OPAL_LIB_NAME) */ + } /* Remove the shared library name and it's first dirname to obtain a prefix. This * is true in most cases, especially when the install directory was just moved * moved around, but it is not necessarily always true. */ - char* dname = opal_dirname(info.dli_fname); - char* prefix = opal_dirname(dname); + char *prefix = NULL, *dname = opal_dirname(info.dli_fname); + int dname_idx = strlen(dname), dname_token = dname_idx; + for( int i = strlen(base_prefix_path); (i > 0) && (dname_idx > 0); i-- ) { + if( dname[dname_idx] != base_prefix_path[i] ) { + dname[dname_token] = '\0'; + prefix = dname; + dname = NULL; /* the string is now attached to the component, prevent it from being freed */ + break; + } + if( dname[dname_idx] == OPAL_PATH_SEP[0] ) + dname_token = dname_idx; + dname_idx--; + } + + mca_installdirs_runtime_component.install_dirs_data.prefix = prefix; + + /* If we goto here, there is some error. Unfortunately, we can't return an error from + * this function, the MCA infrastructure is not yet completely setup, and a call to + * mca_base_component_close will break. + * So, return success but provide no meaningfull data in the component. + */ + bail_out_with_no_data: free(libname); free(dname); - mca_installdirs_runtime_component.install_dirs_data.prefix = prefix; - return OPAL_SUCCESS; }