diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index 89b90329383..3cc88393cd1 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -14,6 +14,8 @@ * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reseved. + * Copyright (c) 2021 Triad National Security, LLC. All rights reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,7 +78,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo ) return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost; } -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR /* Add one element to the FIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index 99b90be10a2..4c34fbe5f5c 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -16,6 +16,8 @@ * reseved. * Copyright (c) 2016-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,7 +52,7 @@ union opal_counted_pointer_t { /** list item pointer */ volatile opal_atomic_intptr_t item; } data; -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T && !OPAL_HAVE_ATOMIC_LLSC_PTR /** used for atomics when there is a cmpset that can operate on * two 64-bit values */ opal_atomic_int128_t atomic_value; @@ -60,7 +62,7 @@ union opal_counted_pointer_t { typedef union opal_counted_pointer_t opal_counted_pointer_t; -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR /* Add one element to the FIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the @@ -136,7 +138,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo ) } -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR /* Add one element to the LIFO. We will return the last head of the list * to allow the upper level to detect if this element is the first one in the diff --git a/opal/include/opal/sys/arm64/Makefile.am b/opal/include/opal/sys/arm64/Makefile.am index 33db6ecb014..33468ac1887 100644 --- a/opal/include/opal/sys/arm64/Makefile.am +++ b/opal/include/opal/sys/arm64/Makefile.am @@ -9,6 +9,8 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2021 Triad National Security, LLC. All rights reserved. +# Copyright (c) 2021 Google, LLC. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -20,5 +22,6 @@ headers += \ opal/sys/arm64/atomic.h \ + opal/sys/arm64/atomic_llsc.h \ opal/sys/arm64/timer.h diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 76409623b16..8495901ace1 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -14,6 +14,8 @@ * Copyright (c) 2010 ARM ltd. All rights reserved. * Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,6 +23,8 @@ * $HEADER$ */ +#include "atomic_llsc.h" + #if !defined(OPAL_SYS_ARCH_ATOMIC_H) #define OPAL_SYS_ARCH_ATOMIC_H 1 @@ -28,13 +32,11 @@ #if OPAL_GCC_INLINE_ASSEMBLY #define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_LLSC_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_MATH_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 -#define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1 @@ -162,32 +164,6 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32 (opal_atomic_int32 return ret; } -#define opal_atomic_ll_32(addr, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret; \ - \ - __asm__ __volatile__ ("ldaxr %w0, [%1] \n" \ - : "=&r" (_ret) \ - : "r" (_addr)); \ - \ - ret = (typeof(ret)) _ret; \ - } while (0) - -#define opal_atomic_sc_32(addr, newval, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _newval = (int32_t) newval; \ - int _ret; \ - \ - __asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \ - : "=&r" (_ret) \ - : "r" (_addr), "r" (_newval) \ - : "cc", "memory"); \ - \ - ret = (_ret == 0); \ - } while (0) - static inline bool opal_atomic_compare_exchange_strong_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { int64_t prev; @@ -272,32 +248,6 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (opal_atomic_int64 return ret; } -#define opal_atomic_ll_64(addr, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _ret; \ - \ - __asm__ __volatile__ ("ldaxr %0, [%1] \n" \ - : "=&r" (_ret) \ - : "r" (_addr)); \ - \ - ret = (typeof(ret)) _ret; \ - } while (0) - -#define opal_atomic_sc_64(addr, newval, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _newval = (int64_t) newval; \ - int _ret; \ - \ - __asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \ - : "=&r" (_ret) \ - : "r" (_addr), "r" (_newval) \ - : "cc", "memory"); \ - \ - ret = (_ret == 0); \ - } while (0) - #define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ static inline type opal_atomic_fetch_ ## name ## _ ## bits (opal_atomic_ ## type *addr, type value) \ { \ diff --git a/opal/include/opal/sys/arm64/atomic_llsc.h b/opal/include/opal/sys/arm64/atomic_llsc.h new file mode 100644 index 00000000000..56dff4934c3 --- /dev/null +++ b/opal/include/opal/sys/arm64/atomic_llsc.h @@ -0,0 +1,92 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010 ARM ltd. All rights reserved. + * Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OPAL_SYS_ARCH_ATOMIC_LLSC_H) + +#define OPAL_SYS_ARCH_ATOMIC_LLSC_H + +#if OPAL_C_GCC_INLINE_ASSEMBLY + +#undef OPAL_HAVE_ATOMIC_LLSC_32 +#undef OPAL_HAVE_ATOMIC_LLSC_64 + +#define OPAL_HAVE_ATOMIC_LLSC_32 1 +#define OPAL_HAVE_ATOMIC_LLSC_64 1 + +#define opal_atomic_ll_32(addr, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + int32_t _ret; \ + \ + __asm__ __volatile__ ("ldaxr %w0, [%1] \n" \ + : "=&r" (_ret) \ + : "r" (_addr)); \ + \ + ret = (typeof(ret)) _ret; \ + } while (0) + +#define opal_atomic_sc_32(addr, newval, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + int32_t _newval = (int32_t) newval; \ + int _ret; \ + \ + __asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \ + : "=&r" (_ret) \ + : "r" (_addr), "r" (_newval) \ + : "cc", "memory"); \ + \ + ret = (_ret == 0); \ + } while (0) + +#define opal_atomic_ll_64(addr, ret) \ + do { \ + opal_atomic_int64_t *_addr = (addr); \ + int64_t _ret; \ + \ + __asm__ __volatile__ ("ldaxr %0, [%1] \n" \ + : "=&r" (_ret) \ + : "r" (_addr)); \ + \ + ret = (typeof(ret)) _ret; \ + } while (0) + +#define opal_atomic_sc_64(addr, newval, ret) \ + do { \ + opal_atomic_int64_t *_addr = (addr); \ + int64_t _newval = (int64_t) newval; \ + int _ret; \ + \ + __asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \ + : "=&r" (_ret) \ + : "r" (_addr), "r" (_newval) \ + : "cc", "memory"); \ + \ + ret = (_ret == 0); \ + } while (0) + +#endif /* OPAL_GCC_INLINE_ASSEMBLY */ + +#endif /* ! OPAL_SYS_ARCH_ATOMIC_LLSC_H */ diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 8e396384e6e..e17212e9fee 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -59,12 +59,6 @@ #include "opal/sys/architecture.h" #include "opal_stdatomic.h" -#if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER) - -#include "atomic_stdc.h" - -#else /* !OPAL_C_HAVE__ATOMIC */ - /* do some quick #define cleanup in cases where we are doing testing... */ #ifdef OPAL_DISABLE_INLINE_ASM @@ -72,6 +66,12 @@ #define OPAL_C_GCC_INLINE_ASSEMBLY 0 #endif +#if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER) + +#include "atomic_stdc.h" + +#else /* !OPAL_C_HAVE__ATOMIC */ + /* define OPAL_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the OPAL_C_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we are in C or C++ */ @@ -642,6 +642,48 @@ static inline intptr_t opal_atomic_fetch_sub_ptr( opal_atomic_intptr_t* addr, vo #endif /* !OPAL_C_HAVE__ATOMIC */ +/****** load-linked, store-conditional atomic implementations ******/ + +/* C11 atomics do not expose the low-level load-linked, store-conditional + * instructions. Open MPI can use these instructions to implement a more + * efficient version of the lock-free lifo and fifo. On Apple Silicon the + * LL/SC fifo and lifo are ~ 2-20x faster than the CAS128 implementation. */ +#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#include "opal/sys/arm64/atomic_llsc.h" +#endif + +#if !defined(OPAL_HAVE_ATOMIC_LLSC_32) +#define OPAL_HAVE_ATOMIC_LLSC_32 0 +#endif + +#if !defined(OPAL_HAVE_ATOMIC_LLSC_64) +#define OPAL_HAVE_ATOMIC_LLSC_64 0 +#endif + +#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64) + +#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32 + +#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret) +#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t) (value), ret) + +#define OPAL_HAVE_ATOMIC_LLSC_PTR 1 + +#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 + +#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret) +#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t) (value), ret) + +#define OPAL_HAVE_ATOMIC_LLSC_PTR 1 + +#endif + +#else + +#define OPAL_HAVE_ATOMIC_LLSC_PTR 0 + +#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/ + END_C_DECLS #endif /* OPAL_SYS_ATOMIC_H */ diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 4b1565cae83..e1ddcd64d3a 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -304,26 +304,6 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) #endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */ -#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64) - -#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32 - -#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret) -#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t) (value), ret) - -#define OPAL_HAVE_ATOMIC_LLSC_PTR 1 - -#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 - -#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret) -#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t) (value), ret) - -#define OPAL_HAVE_ATOMIC_LLSC_PTR 1 - -#endif - -#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/ - #if !defined(OPAL_HAVE_ATOMIC_LLSC_PTR) #define OPAL_HAVE_ATOMIC_LLSC_PTR 0 #endif diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 9b98a131cd4..1c31df8b7b2 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -52,10 +52,6 @@ #define OPAL_HAVE_ATOMIC_XOR_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1 -#define OPAL_HAVE_ATOMIC_LLSC_32 0 -#define OPAL_HAVE_ATOMIC_LLSC_64 0 -#define OPAL_HAVE_ATOMIC_LLSC_PTR 0 - #define OPAL_HAVE_ATOMIC_MIN_32 1 #define OPAL_HAVE_ATOMIC_MAX_32 1 diff --git a/test/class/opal_fifo.c b/test/class/opal_fifo.c index 196c84a704d..9a2ea797866 100644 --- a/test/class/opal_fifo.c +++ b/test/class/opal_fifo.c @@ -107,10 +107,10 @@ static void *thread_test_exhaust (opal_object_t *arg) { static bool check_fifo_consistency (opal_fifo_t *fifo, int expected_count) { - volatile opal_list_item_t *volatile item; + opal_list_item_t * item; int count; - for (count = 0, item = fifo->opal_fifo_head.data.item ; item != &fifo->opal_fifo_ghost ; + for (count = 0, item = (opal_list_item_t *) fifo->opal_fifo_head.data.item ; item != &fifo->opal_fifo_ghost ; item = opal_list_get_next(item), count++); return count == expected_count; diff --git a/test/class/opal_lifo.c b/test/class/opal_lifo.c index 8e031b352cf..3f4d9b7d92d 100644 --- a/test/class/opal_lifo.c +++ b/test/class/opal_lifo.c @@ -70,7 +70,7 @@ static bool check_lifo_consistency (opal_lifo_t *lifo, int expected_count) opal_list_item_t *item; int count; - for (count = 0, item = lifo->opal_lifo_head.data.item ; item != &lifo->opal_lifo_ghost ; + for (count = 0, item = (opal_list_item_t *) lifo->opal_lifo_head.data.item ; item != &lifo->opal_lifo_ghost ; item = opal_list_get_next(item), count++); return count == expected_count;