diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index aed2139a88b..da52039b01c 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -1192,6 +1192,18 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long]) fi OPAL_GCC_INLINE_ASSIGN='"1: li %0,0" : "=&r"(ret)' + + # See the following github PR and some performance numbers/discussion: + # https://github.com/open-mpi/ompi/pull/8649 + AC_MSG_CHECKING([$opal_cv_asm_arch: Checking if force gcc atomics requested]) + if test $force_gcc_atomics_ppc = 0 ; then + AC_MSG_RESULT([no]) + opal_cv_asm_builtin="BUILTIN_NO" + else + AC_MSG_RESULT([Yes]) + AC_MSG_WARN([$opal_cv_asm_arch: gcc atomics have been known to perform poorly on powerpc.]) + fi + ;; *) if test $opal_cv_have___atomic = "yes" ; then @@ -1285,6 +1297,8 @@ int main(int argc, char* argv[]) [Whether to use builtin atomics]) AC_SUBST([OPAL_ASSEMBLY_BUILTIN]) + OPAL_SUMMARY_ADD([[Atomics]],[[OMPI]],[],[$opal_cv_asm_builtin]) + OPAL_ASM_FIND_FILE unset result asm_format diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index 78dc6ba413c..734e91dadfb 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -84,6 +84,13 @@ else WANT_BRANCH_PROBABILITIES=0 fi +AC_ARG_ENABLE([builtin-atomics-for-ppc],[AS_HELP_STRING([--enable-builtin-atomics-for-ppc], + [POWER architectures only: Force use of builtin atomics if available. This could either be gcc builtins or C11 atomics, depending on what is available on your system. Enabling this is known to cause poor performance in atomic operations on Power machines. (default: disabled)])]) +if test "x$enable_builtin_atomics_for_ppc" = "xyes" ; then +force_gcc_atomics_ppc=1 +else +force_gcc_atomics_ppc=0 +fi # # Memory debugging diff --git a/test/threads/Makefile.am b/test/threads/Makefile.am index 024b4f3ae0c..ffac218740d 100644 --- a/test/threads/Makefile.am +++ b/test/threads/Makefile.am @@ -10,7 +10,8 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2021 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -24,7 +25,8 @@ AM_LDFLAGS = -lpthread check_PROGRAMS = \ opal_thread \ - opal_condition + opal_condition \ + opal_atomic_thread_bench # JMS possibly to be re-added when #1232 is fixed #TESTS = $(check_PROGRAMS) @@ -42,5 +44,11 @@ opal_condition_LDADD = \ $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la opal_condition_DEPENDENCIES = $(opal_condition_LDADD) +opal_atomic_thread_bench_SOURCES = opal_atomic_thread_bench.c +opal_atomic_thread_bench_LDADD = \ + $(top_builddir)/test/support/libsupport.a \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la +opal_atomic_thread_bench_DEPENDENCIES = $(opal_atomic_thread_bench_LDADD) + distclean: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/threads/opal_atomic_thread_bench.c b/test/threads/opal_atomic_thread_bench.c new file mode 100644 index 00000000000..2989be55dba --- /dev/null +++ b/test/threads/opal_atomic_thread_bench.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2021 IBM Corporation. All rights reserved. + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#include + +#include "support.h" +#include "opal/runtime/opal.h" +#include "opal/constants.h" +#include "opal/mca/threads/threads.h" +#include "opal/mca/threads/condition.h" +#include "opal/sys/atomic.h" + + +#define OPAL_TEST_THREAD_COUNT 8 +#define ITERATIONS 1000000 +#define ITEM_COUNT 100 + +static opal_atomic_int64_t var_64 = 0; +static opal_atomic_int32_t var_32 = 0; +static pthread_barrier_t barrier; + +#if !defined(timersub) +#define timersub(a, b, r) \ + do { \ + (r)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + if ((a)->tv_usec < (b)->tv_usec) { \ + (r)->tv_sec--; \ + (a)->tv_usec += 1000000; \ + } \ + (r)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + } while (0) +#endif + + +#if !defined(OPAL_TEST_DONE) +#define OPAL_TEST_DONE(func, val) { \ + gettimeofday (&stop, NULL); \ + timersub(&stop, &start, &total); \ + timing = ((double) total.tv_sec + (double) total.tv_usec * 1e-6) / (double) ITERATIONS; \ + printf ("%s() thread finished. Time: %d s %d us %d nsec/per\n", func, (int) total.tv_sec, \ + (int)total.tv_usec, (int)(timing / 1e-9)); \ + memset(&stop, 0, sizeof(struct timeval)); \ + memset(&start, 0, sizeof(struct timeval)); \ + memset(&total, 0, sizeof(struct timeval)); \ + /* printf("%ld\n", val); */ \ + fflush(stdout); \ + pthread_barrier_wait (&barrier); \ +} +#endif + +#if !defined(OPAL_RESET_VAR) +#define OPAL_RESET_VAR(var) { \ + var = 0; \ + pthread_barrier_wait (&barrier); \ +} +#endif + +static void *thread_test (void *arg) { + struct timeval start, stop, total; + double timing; + + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_compare_exchange_strong_64(&var_64, &i, i+1); + } + OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_64", var_64); + + OPAL_RESET_VAR(var_64); + + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_compare_exchange_strong_rel_64(&var_64, &i, i+1); + } + OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_rel_64", var_64); + + OPAL_RESET_VAR(var_64); + + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_compare_exchange_strong_acq_64(&var_64, &i, i+1); + } + OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_acq_64", var_64); + + OPAL_RESET_VAR(var_64); + + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_fetch_add_64(&var_64, 1); + } + OPAL_TEST_DONE("opal_atomic_fetch_add_64", var_64); + + OPAL_RESET_VAR(var_64); + + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_fetch_sub_64(&var_64, 1); + } + OPAL_TEST_DONE("opal_atomic_fetch_sub_64", var_64); + + OPAL_RESET_VAR(var_64); + + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_fetch_xor_64(&var_64, i); + } + OPAL_TEST_DONE("opal_atomic_fetch_xor_64", var_64); + + OPAL_RESET_VAR(var_64); + + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_swap_64(&var_64, i); + } + OPAL_TEST_DONE("opal_atomic_swap_64", var_64); + + OPAL_RESET_VAR(var_64); + +#if OPAL_HAVE_ATOMIC_LLSC_64 + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + int ret; + opal_atomic_sc_64(&var_64, i, ret); + } + OPAL_TEST_DONE("opal_atomic_sc_64", var_64); + + OPAL_RESET_VAR(var_64); + + gettimeofday (&start, NULL); + for (int64_t i = 0 ; i < ITERATIONS ; ++i) { + int ret; + opal_atomic_sc_64(&var_64, i, ret); + } + OPAL_TEST_DONE("opal_atomic_ll_64", var_64); + + OPAL_RESET_VAR(var_64); +#endif + + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_compare_exchange_strong_32(&var_32, &i, i+1); + } + OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_32", var_32); + + OPAL_RESET_VAR(var_32); + + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_compare_exchange_strong_rel_32(&var_32, &i, i+1); + } + OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_rel_32", var_32); + + OPAL_RESET_VAR(var_32); + + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_compare_exchange_strong_acq_32(&var_32, &i, i+1); + } + OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_acq_32", var_32); + + OPAL_RESET_VAR(var_32); + + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_fetch_add_32(&var_32, 1); + } + OPAL_TEST_DONE("opal_atomic_fetch_add_32", var_32); + + OPAL_RESET_VAR(var_32); + + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_fetch_sub_32(&var_32, 1); + } + OPAL_TEST_DONE("opal_atomic_fetch_sub_32", var_32); + + OPAL_RESET_VAR(var_32); + + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_fetch_xor_32(&var_32, i); + } + OPAL_TEST_DONE("opal_atomic_fetch_xor_32", var_32); + + OPAL_RESET_VAR(var_32); + + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + opal_atomic_swap_32(&var_32, i); + } + OPAL_TEST_DONE("opal_atomic_swap_32", var_32); + + OPAL_RESET_VAR(var_32); + +#if OPAL_HAVE_ATOMIC_LLSC_32 + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + int ret; + opal_atomic_sc_32(&var_32, i, ret); + } + OPAL_TEST_DONE("opal_atomic_sc_32", var_32); + + OPAL_RESET_VAR(var_32); + + gettimeofday (&start, NULL); + for (int32_t i = 0 ; i < ITERATIONS ; ++i) { + int ret; + opal_atomic_sc_32(&var_32, i, ret); + } + OPAL_TEST_DONE("opal_atomic_ll_32", var_32); + + OPAL_RESET_VAR(var_32); +#endif + + return NULL; +} + +int main(void) { + + pthread_barrier_init (&barrier, NULL, OPAL_TEST_THREAD_COUNT); + + pthread_t ts[OPAL_TEST_THREAD_COUNT]; + for(int i = 0; i < OPAL_TEST_THREAD_COUNT; i++) { + pthread_create(&ts[i], NULL, &thread_test, NULL); + } + + for(int i = 0; i < OPAL_TEST_THREAD_COUNT; i++) { + pthread_join(ts[i], NULL); + } + return 0; +}