diff --git a/config/opal_setup_cc.m4 b/config/opal_setup_cc.m4 index c8f3bb7b8a8..699be65b2cd 100644 --- a/config/opal_setup_cc.m4 +++ b/config/opal_setup_cc.m4 @@ -47,10 +47,46 @@ AC_DEFUN([OPAL_PROG_CC_C11_HELPER],[ opal_prog_cc_c11_helper_CFLAGS_save=$CFLAGS CFLAGS="$CFLAGS $1" + OPAL_C_COMPILER_VENDOR([opal_c_vendor]) OPAL_CC_HELPER([if $CC $1 supports C11 _Thread_local], [opal_prog_cc_c11_helper__Thread_local_available], [],[[static _Thread_local int foo = 1;++foo;]]) + + OPAL_CC_HELPER([if $CC $1 has stdatomic.h], [opal_prog_cc_c11_helper_atomic_has_stdatomic_h], + [[#include ]], []) + if test $opal_prog_cc_c11_helper_atomic_has_stdatomic_h -eq 0; then + if test "$opal_cv_c_compiler_vendor" = "ibm"; then + AC_REQUIRE([AC_PROG_GREP]) + tmp=`which gcc` + if test $? -eq 0; then + stdatomic_include_path="" + AC_MSG_CHECKING([if gcc has stdatomic.h]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([ + #include + int main() { } + ] + )]) + stdatomic_include_path=`gcc -M conftest.c | $GREP stdatomic.h` + if test -z "$stdatomic_include_path"; then + AC_MSG_WARN([stdatomic.h cannot be found. Fallback to C99 atomics.]) + else + stdatomic_include_path="${stdatomic_include_path#"${stdatomic_include_path%%[![:space:]]*}"}" + stdatomic_include_path=${stdatomic_include_path%stdatomic.h} + CFLAGS="$CFLAGS -I$stdatomic_include_path" + opal_prog_cc_c11_helper_CFLAGS_save="$opal_prog_cc_c11_helper_CFLAGS -I$stdatomic_include_path" + opal_prog_cc_c11_helper_atomic_has_stdatomic_h=1 + AC_MSG_RESULT([stdatomic.h is available]) + AC_MSG_RESULT([Adding $stdatomic_include_path to include path. Using C11 atomics.]) + fi + else + AC_MSG_WARN([No gcc found. Not checking for stdatomic.h include. Fallback to C99 atomics.]) + fi + else + AC_MSG_WARN([Not checking for gcc stdatomic.h include. Fallback to C99 atomics.]) + fi + fi + OPAL_CC_HELPER([if $CC $1 supports C11 atomic variables], [opal_prog_cc_c11_helper_atomic_var_available], [[#include ]], [[static atomic_long foo = 1;++foo;]]) @@ -65,9 +101,9 @@ AC_DEFUN([OPAL_PROG_CC_C11_HELPER],[ OPAL_CC_HELPER([if $CC $1 supports C11 atomic_fetch_xor_explicit], [opal_prog_cc_c11_helper_atomic_fetch_xor_explicit_available], [[#include -#include ]],[[_Atomic uint32_t a; uint32_t b; atomic_fetch_xor_explicit(&a, b, memory_order_relaxed);]]) + #include ]],[[_Atomic uint32_t a; uint32_t b; atomic_fetch_xor_explicit(&a, b, memory_order_relaxed);]]) - AS_IF([test $opal_prog_cc_c11_helper__Thread_local_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_var_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_fetch_xor_explicit_available -eq 1], + AS_IF([test $opal_prog_cc_c11_helper__Thread_local_available -eq 1 && test $opal_prog_cc_c11_helper_atomic_var_available -eq 1], [$2], [$3]) @@ -166,7 +202,7 @@ AC_DEFUN([OPAL_SETUP_CC],[ if test $opal_cv_c11_supported = no ; then # It is not currently an error if C11 support is not available. Uncomment the # following lines and update the warning when we require a C11 compiler. - # AC_MSG_WARNING([Open MPI requires a C11 (or newer) compiler]) + # AC_MSG_WARN([Open MPI requires a C11 (or newer) compiler]) # AC_MSG_ERROR([Aborting.]) # From Open MPI 1.7 on we require a C99 compiant compiler AC_PROG_CC_C99 @@ -287,16 +323,20 @@ AC_DEFUN([OPAL_SETUP_CC],[ OPAL_CFLAGS_BEFORE_PICKY="$CFLAGS" if test $WANT_PICKY_COMPILER -eq 1; then - _OPAL_CHECK_SPECIFIC_CFLAGS(-Wundef, Wundef) + if test "$opal_cv_c_compiler_vendor" != "portland group"; then + _OPAL_CHECK_SPECIFIC_CFLAGS(-Wundef, Wundef) + _OPAL_CHECK_SPECIFIC_CFLAGS(-Wmissing-prototypes, Wmissing_prototypes) + _OPAL_CHECK_SPECIFIC_CFLAGS(-Wstrict-prototypes, Wstrict_prototypes) + fi _OPAL_CHECK_SPECIFIC_CFLAGS(-Wno-long-long, Wno_long_long, int main() { long long x; }) _OPAL_CHECK_SPECIFIC_CFLAGS(-Wsign-compare, Wsign_compare) - _OPAL_CHECK_SPECIFIC_CFLAGS(-Wmissing-prototypes, Wmissing_prototypes) - _OPAL_CHECK_SPECIFIC_CFLAGS(-Wstrict-prototypes, Wstrict_prototypes) _OPAL_CHECK_SPECIFIC_CFLAGS(-Wcomment, Wcomment) _OPAL_CHECK_SPECIFIC_CFLAGS(-Werror-implicit-function-declaration, Werror_implicit_function_declaration) _OPAL_CHECK_SPECIFIC_CFLAGS(-Wno-long-double, Wno_long_double, int main() { long double x; }) _OPAL_CHECK_SPECIFIC_CFLAGS(-fno-strict-aliasing, fno_strict_aliasing, int main() { long double x; }) - _OPAL_CHECK_SPECIFIC_CFLAGS(-pedantic, pedantic) + if test "$opal_cv_c_compiler_vendor" != "ibm" && test "$opal_cv_c_compiler_vendor" != "portland group"; then + _OPAL_CHECK_SPECIFIC_CFLAGS(-pedantic, pedantic) + fi _OPAL_CHECK_SPECIFIC_CFLAGS(-Wall, Wall) fi diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index f6cf6bb2820..031ce31208c 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -240,6 +240,20 @@ static void ompi_osc_rdma_check_posts (ompi_osc_rdma_module_t *module) } } +#if defined(__ibmxl__) +// Work around an xl optimization bug that would cause the compiler to segv. +// +// xl doesn't like something about caching and checking the return value of +// ompi_osc_rdma_lock_compare_exchange() below when the opt-level is high. +// For now work around this bug by lowering the optimization on this function +// with xl compilers. +// +// Found on: +// $. xlc --version +// IBM XL C/C++ for Linux, V16.1.1 (5725-C73, 5765-J13) +// Version: 16.01.0001.0008 +#pragma option_override(ompi_osc_rdma_post_peer, "opt(level,0)") +#endif static int ompi_osc_rdma_post_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer) { uint64_t target = (uint64_t) (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, post_index); diff --git a/opal/include/opal/opal_portable_platform.h b/opal/include/opal/opal_portable_platform.h index 5eaa28998b3..0820b4c5f7e 100644 --- a/opal/include/opal/opal_portable_platform.h +++ b/opal/include/opal/opal_portable_platform.h @@ -180,7 +180,7 @@ # else # define PLATFORM_COMPILER_XLC_C 1 # endif -# define PLATFORM_COMPILER_VERSION __xlC__ +# define PLATFORM_COMPILER_VERSION __ibmxl_version__ # define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ ( ((maj) << 8) | ((min) << 4) | (pat) ) diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index e17212e9fee..4fdc2c150fa 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -97,6 +97,7 @@ BEGIN_C_DECLS * to use an int or unsigned char as the lock value - the user is not * informed either way. */ +#ifndef OPAL_USE_ATOMIC_FLAG_OPAL_LOCK struct opal_atomic_lock_t { union { opal_atomic_int32_t lock; /**< The lock address (an integer) */ @@ -105,7 +106,7 @@ struct opal_atomic_lock_t { } u; }; typedef struct opal_atomic_lock_t opal_atomic_lock_t; - +#endif /********************************************************************** * * Set or unset these macros in the architecture-specific atomic.h @@ -164,7 +165,7 @@ enum { *********************************************************************/ #if defined(DOXYGEN) /* don't include system-level gorp when generating doxygen files */ -#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC +#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC || OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 || OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 #include "opal/sys/gcc_builtin/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 #include "opal/sys/x86_64/atomic.h" @@ -174,10 +175,6 @@ enum { #include "opal/sys/arm64/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 #include "opal/sys/ia32/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 -#include "opal/sys/powerpc/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 -#include "opal/sys/powerpc/atomic.h" #endif #ifndef DOXYGEN diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 1c31df8b7b2..ed2a75ef157 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -97,10 +97,28 @@ static inline void opal_atomic_rmb (void) #define opal_atomic_compare_exchange_strong_acq(addr, oldval, newval) atomic_compare_exchange_strong_explicit (addr, oldval, newval, memory_order_acquire, memory_order_relaxed) #define opal_atomic_compare_exchange_strong_rel(addr, oldval, newval) atomic_compare_exchange_strong_explicit (addr, oldval, newval, memory_order_release, memory_order_relaxed) +#if defined(__PGI) || defined(__ibmxl__) +#define opal_atomic_swap_32(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) +#define opal_atomic_swap_64(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) +#define opal_atomic_swap_ptr(addr, value) atomic_exchange_explicit (addr, value, memory_order_relaxed) +#else #define opal_atomic_swap_32(addr, value) atomic_exchange_explicit ((_Atomic unsigned int *)addr, value, memory_order_relaxed) #define opal_atomic_swap_64(addr, value) atomic_exchange_explicit ((_Atomic unsigned long *)addr, value, memory_order_relaxed) #define opal_atomic_swap_ptr(addr, value) atomic_exchange_explicit ((_Atomic unsigned long *)addr, value, memory_order_relaxed) +#endif +#if defined(__PGI) || defined(__ibmxl__) +#define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ + static inline type opal_atomic_fetch_ ## op ##_## bits (opal_atomic_ ## type *addr, type value) \ + { \ + return atomic_fetch_ ## op ## _explicit ((type *)addr, value, memory_order_relaxed); \ + } \ + \ + static inline type opal_atomic_## op ## _fetch_ ## bits (opal_atomic_ ## type *addr, type value) \ + { \ + return atomic_fetch_ ## op ## _explicit ((type *)addr, value, memory_order_relaxed) operator value; \ + } +#else #define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ static inline type opal_atomic_fetch_ ## op ##_## bits (opal_atomic_ ## type *addr, type value) \ { \ @@ -111,6 +129,7 @@ static inline void opal_atomic_rmb (void) { \ return atomic_fetch_ ## op ## _explicit (addr, value, memory_order_relaxed) operator value; \ } +#endif OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 32, int32_t, +) OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 64, int64_t, +) @@ -206,22 +225,39 @@ static inline int64_t opal_atomic_max_fetch_64 (opal_atomic_int64_t *addr, int64 #define OPAL_ATOMIC_LOCK_UNLOCKED false #define OPAL_ATOMIC_LOCK_LOCKED true -#define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT +#define OPAL_USE_C11_ATOMIC_LOCK 1 +#define OPAL_USE_ATOMIC_FLAG_OPAL_LOCK 1 +#if defined(__ibmxl__) || defined(__PGI) +#define OPAL_ATOMIC_LOCK_INIT 0 +typedef _Atomic bool opal_atomic_lock_t; +#else +#define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT typedef atomic_flag opal_atomic_lock_t; +#endif /* * Lock initialization function. It set the lock to UNLOCKED. */ static inline void opal_atomic_lock_init (opal_atomic_lock_t *lock, bool value) { +#if defined(__PGI) + atomic_flag_clear ((volatile void *) lock); +#else atomic_flag_clear (lock); +#endif } static inline int opal_atomic_trylock (opal_atomic_lock_t *lock) { + +#if defined(__PGI) + return (int) atomic_flag_test_and_set ((volatile void *) lock); +#else return (int) atomic_flag_test_and_set (lock); +#endif + } @@ -234,7 +270,11 @@ static inline void opal_atomic_lock(opal_atomic_lock_t *lock) static inline void opal_atomic_unlock (opal_atomic_lock_t *lock) { +#if defined(__PGI) + atomic_flag_clear ((volatile void *) lock); +#else atomic_flag_clear (lock); +#endif } diff --git a/opal/include/opal/sys/powerpc/Makefile.am b/opal/include/opal/sys/powerpc/Makefile.am index 612dd2e4d7f..cdff06b6bf0 100644 --- a/opal/include/opal/sys/powerpc/Makefile.am +++ b/opal/include/opal/sys/powerpc/Makefile.am @@ -19,5 +19,4 @@ # This makefile.am does not stand on its own - it is included from opal/include/Makefile.am headers += \ - opal/sys/powerpc/atomic.h \ opal/sys/powerpc/timer.h diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h deleted file mode 100644 index 393b92acba5..00000000000 --- a/opal/include/opal/sys/powerpc/atomic.h +++ /dev/null @@ -1,408 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2021 IBM Corporation. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_SYS_ARCH_ATOMIC_H -#define OPAL_SYS_ARCH_ATOMIC_H 1 - -/* - * On powerpc ... - */ - -#define MB() __asm__ __volatile__ ("sync" : : : "memory") -#define RMB() __asm__ __volatile__ ("lwsync" : : : "memory") -#define WMB() __asm__ __volatile__ ("lwsync" : : : "memory") -#define ISYNC() __asm__ __volatile__ ("isync" : : : "memory") - - -/********************************************************************** - * - * Define constants for PowerPC 32 - * - *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_SWAP_32 1 -#define OPAL_HAVE_ATOMIC_LLSC_32 1 - -#define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_ATOMIC_AND_32 1 -#define OPAL_HAVE_ATOMIC_OR_32 1 -#define OPAL_HAVE_ATOMIC_XOR_32 1 -#define OPAL_HAVE_ATOMIC_SUB_32 1 - - -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -#define OPAL_HAVE_ATOMIC_SWAP_64 1 -#define OPAL_HAVE_ATOMIC_LLSC_64 1 -#define OPAL_HAVE_ATOMIC_MATH_64 1 -#define OPAL_HAVE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_ATOMIC_AND_64 1 -#define OPAL_HAVE_ATOMIC_OR_64 1 -#define OPAL_HAVE_ATOMIC_XOR_64 1 -#define OPAL_HAVE_ATOMIC_SUB_64 1 -#endif - - -/********************************************************************** - * - * Memory Barriers - * - *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline -void opal_atomic_mb(void) -{ - MB(); -} - - -static inline -void opal_atomic_rmb(void) -{ - RMB(); -} - - -static inline -void opal_atomic_wmb(void) -{ - WMB(); -} - -static inline -void opal_atomic_isync(void) -{ - ISYNC(); -} - -#endif /* end OPAL_GCC_INLINE_ASSEMBLY */ - -/********************************************************************** - * - * Atomic math operations - * - *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - -#if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) -/* work-around bizzare xlc bug in which it sign-extends - a pointer to a 32-bit signed integer */ -#define OPAL_ASM_ADDR(a) ((uintptr_t)a) -#else -#define OPAL_ASM_ADDR(a) (a) -#endif - -#if defined(__PGI) -/* work-around for bug in PGI 16.5-16.7 where the compiler fails to - * correctly emit load instructions for 64-bit operands. without this - * it will emit lwz instead of ld to load the 64-bit operand. */ -#define OPAL_ASM_VALUE64(x) (void *)(intptr_t) (x) -#else -#define OPAL_ASM_VALUE64(x) x -#endif - -static inline bool opal_atomic_compare_exchange_strong_32 (opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -{ - int32_t prev; - bool ret; - - __asm__ __volatile__ ( - "1: lwarx %0, 0, %2 \n\t" - " cmpw 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stwcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (prev), "=m" (*addr) - : "r" OPAL_ASM_ADDR(addr), "r" (*oldval), "r" (newval), "m" (*addr) - : "cc", "memory"); - - ret = (prev == *oldval); - *oldval = prev; - return ret; -} - -/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason - * is that even with an always_inline attribute the compiler may still emit instructions to store then - * load the arguments to/from the stack. This sequence may cause the ll reservation to be cancelled. */ -#define opal_atomic_ll_32(addr, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret; \ - __asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" \ - : "=&r" (_ret) \ - : "r" (_addr) \ - ); \ - ret = (typeof(ret)) _ret; \ - } while (0) - -#define opal_atomic_sc_32(addr, value, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret, _foo, _newval = (int32_t) value; \ - \ - __asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r" (_ret), "=m" (*_addr), "=r" (_foo) \ - : "r" (_addr), "r" (_newval) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_32 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ -static inline bool opal_atomic_compare_exchange_strong_acq_32 (opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -{ - bool rc; - - rc = opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline bool opal_atomic_compare_exchange_strong_rel_32 (opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_compare_exchange_strong_32 (addr, oldval, newval); -} - -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t ret; - - __asm__ __volatile__ ("1: lwarx %0, 0, %2 \n\t" - " stwcx. %3, 0, %2 \n\t" - " bne- 1b \n\t" - : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (newval) - : "cc", "memory"); - - return ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - - -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) - -#if OPAL_GCC_INLINE_ASSEMBLY - -#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ -static inline int64_t opal_atomic_fetch_ ## type ## _64(opal_atomic_int64_t* v, int64_t val) \ -{ \ - int64_t t, old; \ - \ - __asm__ __volatile__( \ - "1: ldarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stdcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r" (t), "=&r" (old), "=m" (*v) \ - : "r" (OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m" (*v) \ - : "cc"); \ - \ - return old; \ -} - -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(and, and) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) - -static inline bool opal_atomic_compare_exchange_strong_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -{ - int64_t prev; - bool ret; - - __asm__ __volatile__ ( - "1: ldarx %0, 0, %2 \n\t" - " cmpd 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stdcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r" (prev), "=m" (*addr) - : "r" (addr), "r" (OPAL_ASM_VALUE64(*oldval)), "r" (OPAL_ASM_VALUE64(newval)), "m" (*addr) - : "cc", "memory"); - - ret = (prev == *oldval); - *oldval = prev; - return ret; -} - -#define opal_atomic_ll_64(addr, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _ret; \ - __asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \ - : "=&r" (_ret) \ - : "r" (_addr) \ - ); \ - ret = (typeof(ret)) _ret; \ - } while (0) - -#define opal_atomic_sc_64(addr, value, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _newval = (int64_t) value; \ - int32_t _ret; \ - \ - __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r" (_ret) \ - : "r" (_addr), "r" (OPAL_ASM_VALUE64(_newval)) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t ret; - - __asm__ __volatile__ ("1: ldarx %0, 0, %2 \n\t" - " stdcx. %3, 0, %2 \n\t" - " bne- 1b \n\t" - : "=&r" (ret), "=m" (*addr) - : "r" (addr), "r" (OPAL_ASM_VALUE64(newval)) - : "cc", "memory"); - - return ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) && OPAL_ASM_SUPPORT_64BIT - -#ifndef ll_low /* GLIBC provides these somewhere, so protect */ -#define ll_low(x) *(((unsigned int*)&(x))+0) -#define ll_high(x) *(((unsigned int*)&(x))+1) -#endif - -#if OPAL_GCC_INLINE_ASSEMBLY - -static inline bool opal_atomic_compare_exchange_strong_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -{ - int64_t prev; - int ret; - - /* - * We force oldval and newval into memory because PPC doesn't - * appear to have a way to do a move register with offset. Since - * this is 32-bit code, a 64 bit integer will be loaded into two - * registers (assuming no inlining, addr will be in r3, oldval - * will be in r4 and r5, and newval will be r6 and r7. We need - * to load the whole thing into one register. So we have the - * compiler push the values into memory and load the double word - * into registers. We use r4,r5 so that the main block of code - * is very similar to the pure 64 bit version. - */ - __asm__ __volatile__ ( - "ld r4,%3 \n\t" - "ld r5,%4 \n\t" - "1: ldarx %1, 0, %2 \n\t" - " cmpd 0, %1, r4 \n\t" - " bne- 2f \n\t" - " stdcx. r5, 0, %2 \n\t" - " bne- 1b \n\t" - "2: \n\t" - "xor r5,r4,%1 \n\t" - "subfic r9,r5,0 \n\t" - "adde %0,r9,r5 \n\t" - : "=&r" (ret), "+r" (prev) - : "r"OPAL_ASM_ADDR(addr), - "m"(*oldval), "m"(newval) - : "r4", "r5", "r9", "cc", "memory"); - *oldval = prev; - return (bool) ret; -} - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* OPAL_ASM_SUPPORT_64BIT */ - -#if OPAL_GCC_INLINE_ASSEMBLY - -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_64 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ -static inline bool opal_atomic_compare_exchange_strong_acq_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -{ - bool rc; - - rc = opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - - -static inline bool opal_atomic_compare_exchange_strong_rel_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_compare_exchange_strong_64 (addr, oldval, newval); -} - - -#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ -static inline int32_t opal_atomic_fetch_ ## type ## _32(opal_atomic_int32_t* v, int val) \ -{ \ - int32_t t, old; \ - \ - __asm__ __volatile__( \ - "1: lwarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stwcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r" (t), "=&r" (old), "=m" (*v) \ - : "r" (val), "r" OPAL_ASM_ADDR(v), "m" (*v) \ - : "cc"); \ - \ - return old; \ -} - -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(add, add) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(and, and) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(or, or) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(xor, xor) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(sub, subf) - -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/powerpc/update.sh b/opal/include/opal/sys/powerpc/update.sh index 095868d4fb5..7e8d9ee1997 100644 --- a/opal/include/opal/sys/powerpc/update.sh +++ b/opal/include/opal/sys/powerpc/update.sh @@ -21,7 +21,7 @@ CFILE=/tmp/opal_asm_$$.c trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 -echo Updating asm.s from atomic.h and timer.h using gcc +echo Updating asm.s from timer.h using gcc cat > $CFILE< @@ -30,7 +30,6 @@ cat > $CFILE<