Skip to content

Commit 3598c82

Browse files
authored
Merge pull request #8412 from hjelmn/enable_the_load_linked_store_conditional_lock_free_structures_when_using_c11_or_other_builtins
opal: enable load-linked, store-conditional atomics for AArch64
2 parents 7e0c8cb + 5e13f02 commit 3598c82

File tree

10 files changed

+158
-91
lines changed

10 files changed

+158
-91
lines changed

opal/class/opal_fifo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* Copyright (c) 2010 IBM Corporation. All rights reserved.
1515
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
1616
* reseved.
17+
* Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
18+
* Copyright (c) 2021 Google, LLC. All rights reserved.
1719
* $COPYRIGHT$
1820
*
1921
* Additional copyrights may follow
@@ -76,7 +78,7 @@ static inline bool opal_fifo_is_empty( opal_fifo_t* fifo )
7678
return opal_fifo_head (fifo) == &fifo->opal_fifo_ghost;
7779
}
7880

79-
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
81+
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR
8082

8183
/* Add one element to the FIFO. We will return the last head of the list
8284
* to allow the upper level to detect if this element is the first one in the

opal/class/opal_lifo.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
* reseved.
1717
* Copyright (c) 2016-2018 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
19+
* Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
20+
* Copyright (c) 2021 Google, LLC. All rights reserved.
1921
* $COPYRIGHT$
2022
*
2123
* Additional copyrights may follow
@@ -50,7 +52,7 @@ union opal_counted_pointer_t {
5052
/** list item pointer */
5153
volatile opal_atomic_intptr_t item;
5254
} data;
53-
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T
55+
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && HAVE_OPAL_INT128_T && !OPAL_HAVE_ATOMIC_LLSC_PTR
5456
/** used for atomics when there is a cmpset that can operate on
5557
* two 64-bit values */
5658
opal_atomic_int128_t atomic_value;
@@ -60,7 +62,7 @@ union opal_counted_pointer_t {
6062
typedef union opal_counted_pointer_t opal_counted_pointer_t;
6163

6264

63-
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
65+
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR
6466

6567
/* Add one element to the FIFO. We will return the last head of the list
6668
* to allow the upper level to detect if this element is the first one in the
@@ -136,7 +138,7 @@ static inline bool opal_lifo_is_empty( opal_lifo_t* lifo )
136138
}
137139

138140

139-
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128
141+
#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 && !OPAL_HAVE_ATOMIC_LLSC_PTR
140142

141143
/* Add one element to the LIFO. We will return the last head of the list
142144
* to allow the upper level to detect if this element is the first one in the

opal/include/opal/sys/arm64/Makefile.am

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
# University of Stuttgart. All rights reserved.
1010
# Copyright (c) 2004-2005 The Regents of the University of California.
1111
# All rights reserved.
12+
# Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
13+
# Copyright (c) 2021 Google, LLC. All rights reserved.
1214
# $COPYRIGHT$
1315
#
1416
# Additional copyrights may follow
@@ -20,5 +22,6 @@
2022

2123
headers += \
2224
opal/sys/arm64/atomic.h \
25+
opal/sys/arm64/atomic_llsc.h \
2326
opal/sys/arm64/timer.h
2427

opal/include/opal/sys/arm64/atomic.h

Lines changed: 4 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,29 @@
1414
* Copyright (c) 2010 ARM ltd. All rights reserved.
1515
* Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights
1616
* reserved.
17+
* Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
18+
* Copyright (c) 2021 Google, LLC. All rights reserved.
1719
* $COPYRIGHT$
1820
*
1921
* Additional copyrights may follow
2022
*
2123
* $HEADER$
2224
*/
2325

26+
#include "atomic_llsc.h"
27+
2428
#if !defined(OPAL_SYS_ARCH_ATOMIC_H)
2529

2630
#define OPAL_SYS_ARCH_ATOMIC_H 1
2731

2832
#if OPAL_GCC_INLINE_ASSEMBLY
2933

3034
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
31-
#define OPAL_HAVE_ATOMIC_LLSC_32 1
3235
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1
3336
#define OPAL_HAVE_ATOMIC_SWAP_32 1
3437
#define OPAL_HAVE_ATOMIC_MATH_32 1
3538
#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1
3639
#define OPAL_HAVE_ATOMIC_SWAP_64 1
37-
#define OPAL_HAVE_ATOMIC_LLSC_64 1
3840
#define OPAL_HAVE_ATOMIC_ADD_32 1
3941
#define OPAL_HAVE_ATOMIC_AND_32 1
4042
#define OPAL_HAVE_ATOMIC_OR_32 1
@@ -162,32 +164,6 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32 (opal_atomic_int32
162164
return ret;
163165
}
164166

165-
#define opal_atomic_ll_32(addr, ret) \
166-
do { \
167-
opal_atomic_int32_t *_addr = (addr); \
168-
int32_t _ret; \
169-
\
170-
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
171-
: "=&r" (_ret) \
172-
: "r" (_addr)); \
173-
\
174-
ret = (typeof(ret)) _ret; \
175-
} while (0)
176-
177-
#define opal_atomic_sc_32(addr, newval, ret) \
178-
do { \
179-
opal_atomic_int32_t *_addr = (addr); \
180-
int32_t _newval = (int32_t) newval; \
181-
int _ret; \
182-
\
183-
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
184-
: "=&r" (_ret) \
185-
: "r" (_addr), "r" (_newval) \
186-
: "cc", "memory"); \
187-
\
188-
ret = (_ret == 0); \
189-
} while (0)
190-
191167
static inline bool opal_atomic_compare_exchange_strong_64 (opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval)
192168
{
193169
int64_t prev;
@@ -272,32 +248,6 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (opal_atomic_int64
272248
return ret;
273249
}
274250

275-
#define opal_atomic_ll_64(addr, ret) \
276-
do { \
277-
opal_atomic_int64_t *_addr = (addr); \
278-
int64_t _ret; \
279-
\
280-
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
281-
: "=&r" (_ret) \
282-
: "r" (_addr)); \
283-
\
284-
ret = (typeof(ret)) _ret; \
285-
} while (0)
286-
287-
#define opal_atomic_sc_64(addr, newval, ret) \
288-
do { \
289-
opal_atomic_int64_t *_addr = (addr); \
290-
int64_t _newval = (int64_t) newval; \
291-
int _ret; \
292-
\
293-
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
294-
: "=&r" (_ret) \
295-
: "r" (_addr), "r" (_newval) \
296-
: "cc", "memory"); \
297-
\
298-
ret = (_ret == 0); \
299-
} while (0)
300-
301251
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
302252
static inline type opal_atomic_fetch_ ## name ## _ ## bits (opal_atomic_ ## type *addr, type value) \
303253
{ \
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4+
* University Research and Technology
5+
* Corporation. All rights reserved.
6+
* Copyright (c) 2004-2005 The University of Tennessee and The University
7+
* of Tennessee Research Foundation. All rights
8+
* reserved.
9+
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10+
* University of Stuttgart. All rights reserved.
11+
* Copyright (c) 2004-2005 The Regents of the University of California.
12+
* All rights reserved.
13+
* Copyright (c) 2010 IBM Corporation. All rights reserved.
14+
* Copyright (c) 2010 ARM ltd. All rights reserved.
15+
* Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights
16+
* reserved.
17+
* Copyright (c) 2021 Triad National Security, LLC. All rights reserved.
18+
* Copyright (c) 2021 Google, LLC. All rights reserved.
19+
* $COPYRIGHT$
20+
*
21+
* Additional copyrights may follow
22+
*
23+
* $HEADER$
24+
*/
25+
26+
#if !defined(OPAL_SYS_ARCH_ATOMIC_LLSC_H)
27+
28+
#define OPAL_SYS_ARCH_ATOMIC_LLSC_H
29+
30+
#if OPAL_C_GCC_INLINE_ASSEMBLY
31+
32+
#undef OPAL_HAVE_ATOMIC_LLSC_32
33+
#undef OPAL_HAVE_ATOMIC_LLSC_64
34+
35+
#define OPAL_HAVE_ATOMIC_LLSC_32 1
36+
#define OPAL_HAVE_ATOMIC_LLSC_64 1
37+
38+
#define opal_atomic_ll_32(addr, ret) \
39+
do { \
40+
opal_atomic_int32_t *_addr = (addr); \
41+
int32_t _ret; \
42+
\
43+
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
44+
: "=&r" (_ret) \
45+
: "r" (_addr)); \
46+
\
47+
ret = (typeof(ret)) _ret; \
48+
} while (0)
49+
50+
#define opal_atomic_sc_32(addr, newval, ret) \
51+
do { \
52+
opal_atomic_int32_t *_addr = (addr); \
53+
int32_t _newval = (int32_t) newval; \
54+
int _ret; \
55+
\
56+
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
57+
: "=&r" (_ret) \
58+
: "r" (_addr), "r" (_newval) \
59+
: "cc", "memory"); \
60+
\
61+
ret = (_ret == 0); \
62+
} while (0)
63+
64+
#define opal_atomic_ll_64(addr, ret) \
65+
do { \
66+
opal_atomic_int64_t *_addr = (addr); \
67+
int64_t _ret; \
68+
\
69+
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
70+
: "=&r" (_ret) \
71+
: "r" (_addr)); \
72+
\
73+
ret = (typeof(ret)) _ret; \
74+
} while (0)
75+
76+
#define opal_atomic_sc_64(addr, newval, ret) \
77+
do { \
78+
opal_atomic_int64_t *_addr = (addr); \
79+
int64_t _newval = (int64_t) newval; \
80+
int _ret; \
81+
\
82+
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
83+
: "=&r" (_ret) \
84+
: "r" (_addr), "r" (_newval) \
85+
: "cc", "memory"); \
86+
\
87+
ret = (_ret == 0); \
88+
} while (0)
89+
90+
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
91+
92+
#endif /* ! OPAL_SYS_ARCH_ATOMIC_LLSC_H */

opal/include/opal/sys/atomic.h

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,19 @@
5959
#include "opal/sys/architecture.h"
6060
#include "opal_stdatomic.h"
6161

62-
#if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER)
63-
64-
#include "atomic_stdc.h"
65-
66-
#else /* !OPAL_C_HAVE__ATOMIC */
67-
6862
/* do some quick #define cleanup in cases where we are doing
6963
testing... */
7064
#ifdef OPAL_DISABLE_INLINE_ASM
7165
#undef OPAL_C_GCC_INLINE_ASSEMBLY
7266
#define OPAL_C_GCC_INLINE_ASSEMBLY 0
7367
#endif
7468

69+
#if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER)
70+
71+
#include "atomic_stdc.h"
72+
73+
#else /* !OPAL_C_HAVE__ATOMIC */
74+
7575
/* define OPAL_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the
7676
OPAL_C_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we
7777
are in C or C++ */
@@ -642,6 +642,48 @@ static inline intptr_t opal_atomic_fetch_sub_ptr( opal_atomic_intptr_t* addr, vo
642642

643643
#endif /* !OPAL_C_HAVE__ATOMIC */
644644

645+
/****** load-linked, store-conditional atomic implementations ******/
646+
647+
/* C11 atomics do not expose the low-level load-linked, store-conditional
648+
* instructions. Open MPI can use these instructions to implement a more
649+
* efficient version of the lock-free lifo and fifo. On Apple Silicon the
650+
* LL/SC fifo and lifo are ~ 2-20x faster than the CAS128 implementation. */
651+
#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64
652+
#include "opal/sys/arm64/atomic_llsc.h"
653+
#endif
654+
655+
#if !defined(OPAL_HAVE_ATOMIC_LLSC_32)
656+
#define OPAL_HAVE_ATOMIC_LLSC_32 0
657+
#endif
658+
659+
#if !defined(OPAL_HAVE_ATOMIC_LLSC_64)
660+
#define OPAL_HAVE_ATOMIC_LLSC_64 0
661+
#endif
662+
663+
#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)
664+
665+
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
666+
667+
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret)
668+
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t) (value), ret)
669+
670+
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
671+
672+
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
673+
674+
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret)
675+
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t) (value), ret)
676+
677+
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
678+
679+
#endif
680+
681+
#else
682+
683+
#define OPAL_HAVE_ATOMIC_LLSC_PTR 0
684+
685+
#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/
686+
645687
END_C_DECLS
646688

647689
#endif /* OPAL_SYS_ATOMIC_H */

opal/include/opal/sys/atomic_impl.h

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -304,26 +304,6 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_)
304304

305305
#endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */
306306

307-
#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)
308-
309-
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
310-
311-
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret)
312-
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t) (value), ret)
313-
314-
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
315-
316-
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
317-
318-
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret)
319-
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t) (value), ret)
320-
321-
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
322-
323-
#endif
324-
325-
#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/
326-
327307
#if !defined(OPAL_HAVE_ATOMIC_LLSC_PTR)
328308
#define OPAL_HAVE_ATOMIC_LLSC_PTR 0
329309
#endif

opal/include/opal/sys/atomic_stdc.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,6 @@
5252
#define OPAL_HAVE_ATOMIC_XOR_64 1
5353
#define OPAL_HAVE_ATOMIC_SUB_64 1
5454

55-
#define OPAL_HAVE_ATOMIC_LLSC_32 0
56-
#define OPAL_HAVE_ATOMIC_LLSC_64 0
57-
#define OPAL_HAVE_ATOMIC_LLSC_PTR 0
58-
5955
#define OPAL_HAVE_ATOMIC_MIN_32 1
6056
#define OPAL_HAVE_ATOMIC_MAX_32 1
6157

test/class/opal_fifo.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ static void *thread_test_exhaust (opal_object_t *arg) {
107107

108108
static bool check_fifo_consistency (opal_fifo_t *fifo, int expected_count)
109109
{
110-
volatile opal_list_item_t *volatile item;
110+
opal_list_item_t * item;
111111
int count;
112112

113-
for (count = 0, item = fifo->opal_fifo_head.data.item ; item != &fifo->opal_fifo_ghost ;
113+
for (count = 0, item = (opal_list_item_t *) fifo->opal_fifo_head.data.item ; item != &fifo->opal_fifo_ghost ;
114114
item = opal_list_get_next(item), count++);
115115

116116
return count == expected_count;

test/class/opal_lifo.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static bool check_lifo_consistency (opal_lifo_t *lifo, int expected_count)
7070
opal_list_item_t *item;
7171
int count;
7272

73-
for (count = 0, item = lifo->opal_lifo_head.data.item ; item != &lifo->opal_lifo_ghost ;
73+
for (count = 0, item = (opal_list_item_t *) lifo->opal_lifo_head.data.item ; item != &lifo->opal_lifo_ghost ;
7474
item = opal_list_get_next(item), count++);
7575

7676
return count == expected_count;

0 commit comments

Comments
 (0)