From 9dd2ce5fab2e83d39fa86444a98ab15177688ce5 Mon Sep 17 00:00:00 2001 From: Austen Lauria Date: Mon, 22 Feb 2021 13:34:58 -0500 Subject: [PATCH] xl: Use C11 builtin atomics if available. If unavailable, fallback to gcc builtins. Some configury work was needed to force include stdatomic.h. Currently xl doesn't have it in its search path. Tested with xl V16.1.1. Signed-off-by: Austen Lauria --- config/opal_setup_cc.m4 | 54 ++- ompi/mca/osc/rdma/osc_rdma_active_target.c | 14 + opal/include/opal/opal_portable_platform.h | 2 +- opal/include/opal/sys/atomic.h | 9 +- opal/include/opal/sys/atomic_stdc.h | 42 ++- opal/include/opal/sys/powerpc/Makefile.am | 1 - opal/include/opal/sys/powerpc/atomic.h | 408 --------------------- opal/include/opal/sys/powerpc/update.sh | 3 +- opal/include/opal_stdatomic.h | 16 + opal/mca/threads/thread_usage.h | 40 +- 10 files changed, 162 insertions(+), 427 deletions(-) delete mode 100644 opal/include/opal/sys/powerpc/atomic.h diff --git a/config/opal_setup_cc.m4 b/config/opal_setup_cc.m4 index c8f3bb7b8a8..699be65b2cd 100644 --- a/config/opal_setup_cc.m4 +++ b/config/opal_setup_cc.m4 @@ -47,10 +47,46 @@ AC_DEFUN([OPAL_PROG_CC_C11_HELPER],[ opal_prog_cc_c11_helper_CFLAGS_save=$CFLAGS CFLAGS="$CFLAGS $1" + OPAL_C_COMPILER_VENDOR([opal_c_vendor]) OPAL_CC_HELPER([if $CC $1 supports C11 _Thread_local], [opal_prog_cc_c11_helper__Thread_local_available], [],[[static _Thread_local int foo = 1;++foo;]]) + + OPAL_CC_HELPER([if $CC $1 has stdatomic.h], [opal_prog_cc_c11_helper_atomic_has_stdatomic_h], + [[#include ]], []) + if test $opal_prog_cc_c11_helper_atomic_has_stdatomic_h -eq 0; then + if test "$opal_cv_c_compiler_vendor" = "ibm"; then + AC_REQUIRE([AC_PROG_GREP]) + tmp=`which gcc` + if test $? -eq 0; then + stdatomic_include_path="" + AC_MSG_CHECKING([if gcc has stdatomic.h]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([ + #include + int main() { } + ] + )]) + stdatomic_include_path=`gcc -M conftest.c | $GREP stdatomic.h` + if test -z "$stdatomic_include_path"; then + AC_MSG_WARN([stdatomic.h cannot be found. Fallback to C99 atomics.]) + else + stdatomic_include_path="${stdatomic_include_path#"${stdatomic_include_path%%[![:space:]]*}"}" + stdatomic_include_path=${stdatomic_include_path%stdatomic.h} + CFLAGS="$CFLAGS -I$stdatomic_include_path" + opal_prog_cc_c11_helper_CFLAGS_save="$opal_prog_cc_c11_helper_CFLAGS -I$stdatomic_include_path" + opal_prog_cc_c11_helper_atomic_has_stdatomic_h=1 + AC_MSG_RESULT([stdatomic.h is available]) + AC_MSG_RESULT([Adding $stdatomic_include_path to include path. Using C11 atomics.]) + fi + else + AC_MSG_WARN([No gcc found. Not checking for stdatomic.h include. Fallback to C99 atomics.]) + fi + else + AC_MSG_WARN([Not checking for gcc stdatomic.h include. Fallback to C99 atomics.]) + fi + fi + OPAL_CC_HELPER([if $CC $1 supports C11 atomic variables], [opal_prog_cc_c11_helper_atomic_var_available], [[#include ]], [[static atomic_long foo = 1;++foo;]]) @@ -65,9 +101,9 @@ AC_DEFUN([OPAL_PROG_CC_C11_HELPER],[ OPAL_CC_HELPER([if $CC $1 supports C11 atomic_fetch_xor_explicit], [opal_prog_cc_c11_helper_atomic_fetch_xor_explicit_available], [[#include -#include ]],[[_Atomic uint32_t a; uint32_t b; atomic_fetch_xor_explicit(&a, b, memory_order_relaxed);]]) + #include ]],[[_Atomic uint32_t a; uint32_t b; atomic_fetch_xor_explicit(&a, b, memory_order_relaxed);]]) - AS_IF([test $opal_prog_cc_c11_helper__Thread_local_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_var_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_fetch_xor_explicit_available -eq 1], + AS_IF([test $opal_prog_cc_c11_helper__Thread_local_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_var_available -eq 1], [$2], [$3]) @@ -166,7 +202,7 @@ AC_DEFUN([OPAL_SETUP_CC],[ if test $opal_cv_c11_supported = no ; then # It is not currently an error if C11 support is not available. Uncomment the # following lines and update the warning when we require a C11 compiler. - # AC_MSG_WARNING([Open MPI requires a C11 (or newer) compiler]) + # AC_MSG_WARN([Open MPI requires a C11 (or newer) compiler]) # AC_MSG_ERROR([Aborting.]) # From Open MPI 1.7 on we require a C99 compiant compiler AC_PROG_CC_C99 @@ -287,16 +323,20 @@ AC_DEFUN([OPAL_SETUP_CC],[ OPAL_CFLAGS_BEFORE_PICKY="$CFLAGS" if test $WANT_PICKY_COMPILER -eq 1; then - _OPAL_CHECK_SPECIFIC_CFLAGS(-Wundef, Wundef) + if test "$opal_cv_c_compiler_vendor" != "portland group"; then + _OPAL_CHECK_SPECIFIC_CFLAGS(-Wundef, Wundef) + _OPAL_CHECK_SPECIFIC_CFLAGS(-Wmissing-prototypes, Wmissing_prototypes) + _OPAL_CHECK_SPECIFIC_CFLAGS(-Wstrict-prototypes, Wstrict_prototypes) + fi _OPAL_CHECK_SPECIFIC_CFLAGS(-Wno-long-long, Wno_long_long, int main() { long long x; }) _OPAL_CHECK_SPECIFIC_CFLAGS(-Wsign-compare, Wsign_compare) - _OPAL_CHECK_SPECIFIC_CFLAGS(-Wmissing-prototypes, Wmissing_prototypes) - _OPAL_CHECK_SPECIFIC_CFLAGS(-Wstrict-prototypes, Wstrict_prototypes) _OPAL_CHECK_SPECIFIC_CFLAGS(-Wcomment, Wcomment) _OPAL_CHECK_SPECIFIC_CFLAGS(-Werror-implicit-function-declaration, Werror_implicit_function_declaration) _OPAL_CHECK_SPECIFIC_CFLAGS(-Wno-long-double, Wno_long_double, int main() { long double x; }) _OPAL_CHECK_SPECIFIC_CFLAGS(-fno-strict-aliasing, fno_strict_aliasing, int main() { long double x; }) - _OPAL_CHECK_SPECIFIC_CFLAGS(-pedantic, pedantic) + if test "$opal_cv_c_compiler_vendor" != "ibm" && test "$opal_cv_c_compiler_vendor" != "portland group"; then + _OPAL_CHECK_SPECIFIC_CFLAGS(-pedantic, pedantic) + fi _OPAL_CHECK_SPECIFIC_CFLAGS(-Wall, Wall) fi diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index f6cf6bb2820..031ce31208c 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -240,6 +240,20 @@ static void ompi_osc_rdma_check_posts (ompi_osc_rdma_module_t *module) } } +#if defined(__ibmxl__) +// Work around an xl optimization bug that would cause the compiler to segv. +// +// xl doesn't like something about caching and checking the return value of +// ompi_osc_rdma_lock_compare_exchange() below when the opt-level is high. +// For now work around this bug by lowering the optimization on this function +// with xl compilers. +// +// Found on: +// $. xlc --version +// IBM XL C/C++ for Linux, V16.1.1 (5725-C73, 5765-J13) +// Version: 16.01.0001.0008 +#pragma option_override(ompi_osc_rdma_post_peer, "opt(level,0)") +#endif static int ompi_osc_rdma_post_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer) { uint64_t target = (uint64_t) (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, post_index); diff --git a/opal/include/opal/opal_portable_platform.h b/opal/include/opal/opal_portable_platform.h index 5eaa28998b3..0820b4c5f7e 100644 --- a/opal/include/opal/opal_portable_platform.h +++ b/opal/include/opal/opal_portable_platform.h @@ -180,7 +180,7 @@ # else # define PLATFORM_COMPILER_XLC_C 1 # endif -# define PLATFORM_COMPILER_VERSION __xlC__ +# define PLATFORM_COMPILER_VERSION __ibmxl_version__ # define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ ( ((maj) << 8) | ((min) << 4) | (pat) ) diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index e17212e9fee..4fdc2c150fa 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -97,6 +97,7 @@ BEGIN_C_DECLS * to use an int or unsigned char as the lock value - the user is not * informed either way. */ +#ifndef OPAL_USE_ATOMIC_FLAG_OPAL_LOCK struct opal_atomic_lock_t { union { opal_atomic_int32_t lock; /**< The lock address (an integer) */ @@ -105,7 +106,7 @@ struct opal_atomic_lock_t { } u; }; typedef struct opal_atomic_lock_t opal_atomic_lock_t; - +#endif /********************************************************************** * * Set or unset these macros in the architecture-specific atomic.h @@ -164,7 +165,7 @@ enum { *********************************************************************/ #if defined(DOXYGEN) /* don't include system-level gorp when generating doxygen files */ -#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC +#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC || OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 || OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 #include "opal/sys/gcc_builtin/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 #include "opal/sys/x86_64/atomic.h" @@ -174,10 +175,6 @@ enum { #include "opal/sys/arm64/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 #include "opal/sys/ia32/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 -#include "opal/sys/powerpc/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 -#include "opal/sys/powerpc/atomic.h" #endif #ifndef DOXYGEN diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 1c31df8b7b2..ed2a75ef157 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -97,10 +97,28 @@ static inline void opal_atomic_rmb (void) #define opal_atomic_compare_exchange_strong_acq(addr, oldval, newval) atomic_compare_exchange_strong_explicit (addr, oldval, newval, memory_order_acquire, memory_order_relaxed) #define opal_atomic_compare_exchange_strong_rel(addr, oldval, newval) atomic_compare_exchange_strong_explicit (addr, oldval, newval, memory_order_release, memory_order_relaxed) +#if defined(__PGI) || defined(__ibmxl__) +#define opal_atomic_swap_32(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) +#define opal_atomic_swap_64(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) +#define opal_atomic_swap_ptr(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) +#else #define opal_atomic_swap_32(addr, value) atomic_exchange_explicit ((_Atomic unsigned int *)addr, value, memory_order_relaxed) #define opal_atomic_swap_64(addr, value) atomic_exchange_explicit ((_Atomic unsigned long *)addr, value, memory_order_relaxed) #define opal_atomic_swap_ptr(addr, value) atomic_exchange_explicit ((_Atomic unsigned long *)addr, value, memory_order_relaxed) +#endif +#if defined(__PGI) || defined(__ibmxl__) +#define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ + static inline type opal_atomic_fetch_ ## op ##_## bits (opal_atomic_ ## type *addr, type value) \ + { \ + return atomic_fetch_ ## op ## _explicit ((type *)addr, value, memory_order_relaxed); \ + } \ + \ + static inline type opal_atomic_## op ## _fetch_ ## bits (opal_atomic_ ## type *addr, type value) \ + { \ + return atomic_fetch_ ## op ## _explicit ((type *)addr, value, memory_order_relaxed) operator value; \ + } +#else #define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ static inline type opal_atomic_fetch_ ## op ##_## bits (opal_atomic_ ## type *addr, type value) \ { \ @@ -111,6 +129,7 @@ static inline void opal_atomic_rmb (void) { \ return atomic_fetch_ ## op ## _explicit (addr, value, memory_order_relaxed) operator value; \ } +#endif OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 32, int32_t, +) OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 64, int64_t, +) @@ -206,22 +225,39 @@ static inline int64_t opal_atomic_max_fetch_64 (opal_atomic_int64_t *addr, int64 #define OPAL_ATOMIC_LOCK_UNLOCKED false #define OPAL_ATOMIC_LOCK_LOCKED true -#define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT +#define OPAL_USE_C11_ATOMIC_LOCK 1 +#define OPAL_USE_ATOMIC_FLAG_OPAL_LOCK 1 +#if defined(__ibmxl__) || defined(__PGI) +#define OPAL_ATOMIC_LOCK_INIT 0 +typedef _Atomic bool opal_atomic_lock_t; +#else +#define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT typedef atomic_flag opal_atomic_lock_t; +#endif /* * Lock initialization function. It set the lock to UNLOCKED. */ static inline void opal_atomic_lock_init (opal_atomic_lock_t *lock, bool value) { +#if defined(__PGI) + atomic_flag_clear ((volatile void *) lock); +#else atomic_flag_clear (lock); +#endif } static inline int opal_atomic_trylock (opal_atomic_lock_t *lock) { + +#if defined(__PGI) + return (int) atomic_flag_test_and_set ((volatile void *) lock); +#else return (int) atomic_flag_test_and_set (lock); +#endif + } @@ -234,7 +270,11 @@ static inline void opal_atomic_lock(opal_atomic_lock_t *lock) static inline void opal_atomic_unlock (opal_atomic_lock_t *lock) { +#if defined(__PGI) + atomic_flag_clear ((volatile void *) lock); +#else atomic_flag_clear (lock); +#endif } diff --git a/opal/include/opal/sys/powerpc/Makefile.am b/opal/include/opal/sys/powerpc/Makefile.am index 612dd2e4d7f..cdff06b6bf0 100644 --- a/opal/include/opal/sys/powerpc/Makefile.am +++ b/opal/include/opal/sys/powerpc/Makefile.am @@ -19,5 +19,4 @@ # This makefile.am does not stand on its own - it is included from opal/include/Makefile.am headers += \ - opal/sys/powerpc/atomic.h \ opal/sys/powerpc/timer.h diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h deleted file mode 100644 index 393b92acba5..00000000000 --- a/opal/include/opal/sys/powerpc/atomic.h +++ /dev/null @@ -1,408 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2021 IBM Corporation. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_SYS_ARCH_ATOMIC_H -#define OPAL_SYS_ARCH_ATOMIC_H 1 - -/* - * On powerpc ... - */ - -#define MB() __asm__ __volatile__ ("sync" : : : "memory") -#define RMB() __asm__ __volatile__ ("lwsync" : : : "memory") -#define WMB() __asm__ __volatile__ ("lwsync" : : : "memory") -#define ISYNC() __asm__ __volatile__ ("isync" : : : "memory") - - -/********************************************************************** - * - * Define constants for PowerPC 32 - * - *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_SWAP_32 1 -#define OPAL_HAVE_ATOMIC_LLSC_32 1 - -#define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_ATOMIC_AND_32 1 -#define OPAL_HAVE_ATOMIC_OR_32 1 -#define OPAL_HAVE_ATOMIC_XOR_32 1 -#define OPAL_HAVE_ATOMIC_SUB_32 1 - - -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -#define OPAL_HAVE_ATOMIC_SWAP_64 1 -#define OPAL_HAVE_ATOMIC_LLSC_64 1 -#define OPAL_HAVE_ATOMIC_MATH_64 1 -#define OPAL_HAVE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_ATOMIC_AND_64 1 -#define OPAL_HAVE_ATOMIC_OR_64 1 -#define OPAL_HAVE_ATOMIC_XOR_64 1 -#define OPAL_HAVE_ATOMIC_SUB_64 1 -#endif - - -/********************************************************************** - * - * Memory Barriers - * - *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline -void opal_atomic_mb(void) -{ - MB(); -} - - -static inline -void opal_atomic_rmb(void) -{ - RMB(); -} - - -static inline -void opal_atomic_wmb(void) -{ - WMB(); -} - -static inline -void opal_atomic_isync(void) -{ - ISYNC(); -} - -#endif /* end OPAL_GCC_INLINE_ASSEMBLY */ - -/********************************************************************** - * - * Atomic math operations - * - *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -#if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) -/* work-around bizzare xlc bug in which it sign-extends - a pointer to a 32-bit signed integer */ -#define OPAL_ASM_ADDR(a) ((uintptr_t)a) -#else -#define OPAL_ASM_ADDR(a) (a) -#endif - -#if defined(__PGI) -/* work-around for bug in PGI 16.5-16.7 where the compiler fails to - * correctly emit load instructions for 64-bit operands. without this - * it will emit lwz instead of ld to load the 64-bit operand. */ -#define OPAL_ASM_VALUE64(x) (void *)(intptr_t) (x) -#else -#define OPAL_ASM_VALUE64(x) x -#endif - -static inline bool opal_atomic_compare_exchange_strong_32 (opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -{ - int32_t prev; - bool ret; - - __asm__ __volatile__ ( - "1: lwarx %0, 0, %2 \n\t" - " cmpw 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stwcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (prev), "=m" (*addr) - : "r" OPAL_ASM_ADDR(addr), "r" (*oldval), "r" (newval), "m" (*addr) - : "cc", "memory"); - - ret = (prev == *oldval); - *oldval = prev; - return ret; -} - -/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason - * is that even with an always_inline attribute the compiler may still emit instructions to store then - * load the arguments to/from the stack. This sequence may cause the ll reservation to be cancelled. */ -#define opal_atomic_ll_32(addr, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret; \ - __asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" \ - : "=&r" (_ret) \ - : "r" (_addr) \ - ); \ - ret = (typeof(ret)) _ret; \ - } while (0) - -#define opal_atomic_sc_32(addr, value, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret, _foo, _newval = (int32_t) value; \ - \ - __asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r" (_ret), "=m" (*_addr), "=r" (_foo) \ - : "r" (_addr), "r" (_newval) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_32 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ -static inline bool opal_atomic_compare_exchange_strong_acq_32 (opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -{ - bool rc; - - rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline bool opal_atomic_compare_exchange_strong_rel_32 (opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); -} - -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t ret; - - __asm__ __volatile__ ("1: lwarx %0, 0, %2 \n\t" - " stwcx. %3, 0, %2 \n\t" - " bne- 1b \n\t" - : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (newval) - : "cc", "memory"); - - return ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - - -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) - -#if OPAL_GCC_INLINE_ASSEMBLY - -#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ -static inline int64_t opal_atomic_fetch_ ## type ## _64(opal_atomic_int64_t* v, int64_t val) \ -{ \ - int64_t t, old; \ - \ - __asm__ __volatile__( \ - "1: ldarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stdcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r" (t), "=&r" (old), "=m" (*v) \ - : "r" (OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m" (*v) \ - : "cc"); \ - \ - return old; \ -} - -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(and, and) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) - -static inline bool opal_atomic_compare_exchange_strong_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -{ - int64_t prev; - bool ret; - - __asm__ __volatile__ ( - "1: ldarx %0, 0, %2 \n\t" - " cmpd 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stdcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (prev), "=m" (*addr) - : "r" (addr), "r" (OPAL_ASM_VALUE64(*oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) - : "cc", "memory"); - - ret = (prev == *oldval); - *oldval = prev; - return ret; -} - -#define opal_atomic_ll_64(addr, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _ret; \ - __asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \ - : "=&r" (_ret) \ - : "r" (_addr) \ - ); \ - ret = (typeof(ret)) _ret; \ - } while (0) - -#define opal_atomic_sc_64(addr, value, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _newval = (int64_t) value; \ - int32_t _ret; \ - \ - __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r" (_ret) \ - : "r" (_addr), "r" (OPAL_ASM_VALUE64(_newval)) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t ret; - - __asm__ __volatile__ ("1: ldarx %0, 0, %2 \n\t" - " stdcx. %3, 0, %2 \n\t" - " bne- 1b \n\t" - : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (OPAL_ASM_VALUE64(newval)) - : "cc", "memory"); - - return ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) && OPAL_ASM_SUPPORT_64BIT - -#ifndef ll_low /* GLIBC provides these somewhere, so protect */ -#define ll_low(x) *(((unsigned int*)&(x))+0) -#define ll_high(x) *(((unsigned int*)&(x))+1) -#endif - -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline bool opal_atomic_compare_exchange_strong_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -{ - int64_t prev; - int ret; - - /* - * We force oldval and newval into memory because PPC doesn't - * appear to have a way to do a move register with offset. Since - * this is 32-bit code, a 64 bit integer will be loaded into two - * registers (assuming no inlining, addr will be in r3, oldval - * will be in r4 and r5, and newval will be r6 and r7. We need - * to load the whole thing into one register. So we have the - * compiler push the values into memory and load the double word - * into registers. We use r4,r5 so that the main block of code - * is very similar to the pure 64 bit version. - */ - __asm__ __volatile__ ( - "ld r4,%3 \n\t" - "ld r5,%4 \n\t" - "1: ldarx %1, 0, %2 \n\t" - " cmpd 0, %1, r4 \n\t" - " bne- 2f \n\t" - " stdcx. r5, 0, %2 \n\t" - " bne- 1b \n\t" - "2: \n\t" - "xor r5,r4,%1 \n\t" - "subfic r9,r5,0 \n\t" - "adde %0,r9,r5 \n\t" - : "=&r" (ret), "+r" (prev) - : "r"OPAL_ASM_ADDR(addr), - "m"(*oldval), "m"(newval) - : "r4", "r5", "r9", "cc", "memory"); - *oldval = prev; - return (bool) ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* OPAL_ASM_SUPPORT_64BIT */ - -#if OPAL_GCC_INLINE_ASSEMBLY - -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_64 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ -static inline bool opal_atomic_compare_exchange_strong_acq_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -{ - bool rc; - - rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline bool opal_atomic_compare_exchange_strong_rel_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); -} - - -#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ -static inline int32_t opal_atomic_fetch_ ## type ## _32(opal_atomic_int32_t* v, int val) \ -{ \ - int32_t t, old; \ - \ - __asm__ __volatile__( \ - "1: lwarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stwcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r" (t), "=&r" (old), "=m" (*v) \ - : "r" (val), "r" OPAL_ASM_ADDR(v), "m" (*v) \ - : "cc"); \ - \ - return old; \ -} - -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(add, add) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(and, and) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(or, or) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(xor, xor) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(sub, subf) - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/powerpc/update.sh b/opal/include/opal/sys/powerpc/update.sh index 095868d4fb5..7e8d9ee1997 100644 --- a/opal/include/opal/sys/powerpc/update.sh +++ b/opal/include/opal/sys/powerpc/update.sh @@ -21,7 +21,7 @@ CFILE=/tmp/opal_asm_$$.c trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 -echo Updating asm.s from atomic.h and timer.h using gcc +echo Updating asm.s from timer.h using gcc cat > $CFILE< @@ -30,7 +30,6 @@ cat > $CFILE<