From 3c7ea15b6095e691d81df8d030376e0ee7d40ec3 Mon Sep 17 00:00:00 2001 From: Austen Lauria Date: Thu, 18 Mar 2021 13:24:21 -0400 Subject: [PATCH] Powerpc atomics: Force usage of powerpc assembly. The builtins used by default on Power have been shown to perform poorly. For the time being, force all compilers to use the inline assembly until atomic builtins catch-up. This changes the defaults for all compilers sans xl, including: gcc, clang, and pgi to use the assembly. Previously, all of the above were using C11 or the gcc builtins. Bonus: Add a configure flag to force Power machines to use the builtins/C11, depending on what is available. This will make future testing easier. Signed-off-by: Austen Lauria (cherry picked from commit e3f3c5bd3eff2890d3ea993ef1e4443ebfb86a0c) --- config/opal_config_asm.m4 | 12 ++++++++++++ config/opal_configure_options.m4 | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 2c7bcfd480e..5183c7e0828 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -1136,6 +1136,18 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long]) fi OPAL_GCC_INLINE_ASSIGN='"1: li %0,0" : "=&r"(ret)' + + # See the following github PR and some performance numbers/discussion: + # https://github.com/open-mpi/ompi/pull/8649 + AC_MSG_CHECKING([$opal_cv_asm_arch: Checking if force gcc atomics requested]) + if test $force_gcc_atomics_ppc = 0 ; then + AC_MSG_RESULT([no]) + opal_cv_asm_builtin="BUILTIN_NO" + else + AC_MSG_RESULT([Yes]) + AC_MSG_WARN([$opal_cv_asm_arch: gcc atomics have been known to perform poorly on powerpc.]) + fi + ;; # There is no current difference between s390 and s390x # But use two different defines in case some come later diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index 43fcaf3469d..b2312ff3cde 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -84,6 +84,13 @@ else WANT_BRANCH_PROBABILITIES=0 fi +AC_ARG_ENABLE([builtin-atomics-for-ppc],[AS_HELP_STRING([--enable-builtin-atomics-for-ppc], + [POWER architectures only: Force use of builtin atomics if available. This could either be gcc builtins or C11 atomics, depending on what is available on your system. Enabling this is known to cause poor performance in atomic operations on Power machines. (default: disabled)])]) +if test "x$enable_builtin_atomics_for_ppc" = "xyes" ; then +force_gcc_atomics_ppc=1 +else +force_gcc_atomics_ppc=0 +fi # # Memory debugging