Skip to content

Commit d440c13

Browse files
authored
Merge pull request #8649 from awlauria/force_ppc_assembly_atomics
Powerpc atomics: Force usage of powerpc assembly.
2 parents cc32d74 + 136213d commit d440c13

File tree

4 files changed

+266
-2
lines changed

4 files changed

+266
-2
lines changed

config/opal_config_asm.m4

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,18 @@ AC_DEFUN([OPAL_CONFIG_ASM],[
11921192
AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long])
11931193
fi
11941194
OPAL_GCC_INLINE_ASSIGN='"1: li %0,0" : "=&r"(ret)'
1195+
1196+
# See the following github PR and some performance numbers/discussion:
1197+
# https://github.com/open-mpi/ompi/pull/8649
1198+
AC_MSG_CHECKING([$opal_cv_asm_arch: Checking if force gcc atomics requested])
1199+
if test $force_gcc_atomics_ppc = 0 ; then
1200+
AC_MSG_RESULT([no])
1201+
opal_cv_asm_builtin="BUILTIN_NO"
1202+
else
1203+
AC_MSG_RESULT([Yes])
1204+
AC_MSG_WARN([$opal_cv_asm_arch: gcc atomics have been known to perform poorly on powerpc.])
1205+
fi
1206+
11951207
;;
11961208
*)
11971209
if test $opal_cv_have___atomic = "yes" ; then

config/opal_configure_options.m4

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,13 @@ else
8484
WANT_BRANCH_PROBABILITIES=0
8585
fi
8686

87+
AC_ARG_ENABLE([builtin-atomics-for-ppc],[AS_HELP_STRING([--enable-builtin-atomics-for-ppc],
88+
[POWER architectures only: Force use of builtin atomics if available. This could either be gcc builtins or C11 atomics, depending on what is available on your system. Enabling this is known to cause poor performance in atomic operations on Power machines. (default: disabled)])])
89+
if test "x$enable_builtin_atomics_for_ppc" = "xyes" ; then
90+
force_gcc_atomics_ppc=1
91+
else
92+
force_gcc_atomics_ppc=0
93+
fi
8794

8895
#
8996
# Memory debugging

test/threads/Makefile.am

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
# University of Stuttgart. All rights reserved.
1111
# Copyright (c) 2004-2005 The Regents of the University of California.
1212
# All rights reserved.
13-
# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
13+
# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
14+
# Copyright (c) 2021 IBM Corporation. All rights reserved.
1415
# $COPYRIGHT$
1516
#
1617
# Additional copyrights may follow
@@ -24,7 +25,8 @@ AM_LDFLAGS = -lpthread
2425

2526
check_PROGRAMS = \
2627
opal_thread \
27-
opal_condition
28+
opal_condition \
29+
opal_atomic_thread_bench
2830

2931
# JMS possibly to be re-added when #1232 is fixed
3032
#TESTS = $(check_PROGRAMS)
@@ -42,5 +44,11 @@ opal_condition_LDADD = \
4244
$(top_builddir)/opal/lib@[email protected]
4345
opal_condition_DEPENDENCIES = $(opal_condition_LDADD)
4446

47+
opal_atomic_thread_bench_SOURCES = opal_atomic_thread_bench.c
48+
opal_atomic_thread_bench_LDADD = \
49+
$(top_builddir)/test/support/libsupport.a \
50+
$(top_builddir)/opal/lib@[email protected]
51+
opal_atomic_thread_bench_DEPENDENCIES = $(opal_atomic_thread_bench_LDADD)
52+
4553
distclean:
4654
rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/*
2+
* Copyright (c) 2021 IBM Corporation. All rights reserved.
3+
* Additional copyrights may follow
4+
*
5+
* $HEADER$
6+
*/
7+
8+
#include "opal_config.h"
9+
10+
#include <stdio.h>
11+
#include <time.h>
12+
13+
#include "support.h"
14+
#include "opal/runtime/opal.h"
15+
#include "opal/constants.h"
16+
#include "opal/mca/threads/threads.h"
17+
#include "opal/mca/threads/condition.h"
18+
#include "opal/sys/atomic.h"
19+
20+
21+
#define OPAL_TEST_THREAD_COUNT 8
22+
#define ITERATIONS 1000000
23+
#define ITEM_COUNT 100
24+
25+
static opal_atomic_int64_t var_64 = 0;
26+
static opal_atomic_int32_t var_32 = 0;
27+
static pthread_barrier_t barrier;
28+
29+
#if !defined(timersub)
30+
#define timersub(a, b, r) \
31+
do { \
32+
(r)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
33+
if ((a)->tv_usec < (b)->tv_usec) { \
34+
(r)->tv_sec--; \
35+
(a)->tv_usec += 1000000; \
36+
} \
37+
(r)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
38+
} while (0)
39+
#endif
40+
41+
42+
#if !defined(OPAL_TEST_DONE)
43+
#define OPAL_TEST_DONE(func, val) { \
44+
gettimeofday (&stop, NULL); \
45+
timersub(&stop, &start, &total); \
46+
timing = ((double) total.tv_sec + (double) total.tv_usec * 1e-6) / (double) ITERATIONS; \
47+
printf ("%s() thread finished. Time: %d s %d us %d nsec/per\n", func, (int) total.tv_sec, \
48+
(int)total.tv_usec, (int)(timing / 1e-9)); \
49+
memset(&stop, 0, sizeof(struct timeval)); \
50+
memset(&start, 0, sizeof(struct timeval)); \
51+
memset(&total, 0, sizeof(struct timeval)); \
52+
/* printf("%ld\n", val); */ \
53+
fflush(stdout); \
54+
pthread_barrier_wait (&barrier); \
55+
}
56+
#endif
57+
58+
#if !defined(OPAL_RESET_VAR)
59+
#define OPAL_RESET_VAR(var) { \
60+
var = 0; \
61+
pthread_barrier_wait (&barrier); \
62+
}
63+
#endif
64+
65+
static void *thread_test (void *arg) {
66+
struct timeval start, stop, total;
67+
double timing;
68+
69+
gettimeofday (&start, NULL);
70+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
71+
opal_atomic_compare_exchange_strong_64(&var_64, &i, i+1);
72+
}
73+
OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_64", var_64);
74+
75+
OPAL_RESET_VAR(var_64);
76+
77+
gettimeofday (&start, NULL);
78+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
79+
opal_atomic_compare_exchange_strong_rel_64(&var_64, &i, i+1);
80+
}
81+
OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_rel_64", var_64);
82+
83+
OPAL_RESET_VAR(var_64);
84+
85+
gettimeofday (&start, NULL);
86+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
87+
opal_atomic_compare_exchange_strong_acq_64(&var_64, &i, i+1);
88+
}
89+
OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_acq_64", var_64);
90+
91+
OPAL_RESET_VAR(var_64);
92+
93+
gettimeofday (&start, NULL);
94+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
95+
opal_atomic_fetch_add_64(&var_64, 1);
96+
}
97+
OPAL_TEST_DONE("opal_atomic_fetch_add_64", var_64);
98+
99+
OPAL_RESET_VAR(var_64);
100+
101+
gettimeofday (&start, NULL);
102+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
103+
opal_atomic_fetch_sub_64(&var_64, 1);
104+
}
105+
OPAL_TEST_DONE("opal_atomic_fetch_sub_64", var_64);
106+
107+
OPAL_RESET_VAR(var_64);
108+
109+
gettimeofday (&start, NULL);
110+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
111+
opal_atomic_fetch_xor_64(&var_64, i);
112+
}
113+
OPAL_TEST_DONE("opal_atomic_fetch_xor_64", var_64);
114+
115+
OPAL_RESET_VAR(var_64);
116+
117+
gettimeofday (&start, NULL);
118+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
119+
opal_atomic_swap_64(&var_64, i);
120+
}
121+
OPAL_TEST_DONE("opal_atomic_swap_64", var_64);
122+
123+
OPAL_RESET_VAR(var_64);
124+
125+
#if OPAL_HAVE_ATOMIC_LLSC_64
126+
gettimeofday (&start, NULL);
127+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
128+
int ret;
129+
opal_atomic_sc_64(&var_64, i, ret);
130+
}
131+
OPAL_TEST_DONE("opal_atomic_sc_64", var_64);
132+
133+
OPAL_RESET_VAR(var_64);
134+
135+
gettimeofday (&start, NULL);
136+
for (int64_t i = 0 ; i < ITERATIONS ; ++i) {
137+
int ret;
138+
opal_atomic_sc_64(&var_64, i, ret);
139+
}
140+
OPAL_TEST_DONE("opal_atomic_ll_64", var_64);
141+
142+
OPAL_RESET_VAR(var_64);
143+
#endif
144+
145+
gettimeofday (&start, NULL);
146+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
147+
opal_atomic_compare_exchange_strong_32(&var_32, &i, i+1);
148+
}
149+
OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_32", var_32);
150+
151+
OPAL_RESET_VAR(var_32);
152+
153+
gettimeofday (&start, NULL);
154+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
155+
opal_atomic_compare_exchange_strong_rel_32(&var_32, &i, i+1);
156+
}
157+
OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_rel_32", var_32);
158+
159+
OPAL_RESET_VAR(var_32);
160+
161+
gettimeofday (&start, NULL);
162+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
163+
opal_atomic_compare_exchange_strong_acq_32(&var_32, &i, i+1);
164+
}
165+
OPAL_TEST_DONE("opal_atomic_compare_exchange_strong_acq_32", var_32);
166+
167+
OPAL_RESET_VAR(var_32);
168+
169+
gettimeofday (&start, NULL);
170+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
171+
opal_atomic_fetch_add_32(&var_32, 1);
172+
}
173+
OPAL_TEST_DONE("opal_atomic_fetch_add_32", var_32);
174+
175+
OPAL_RESET_VAR(var_32);
176+
177+
gettimeofday (&start, NULL);
178+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
179+
opal_atomic_fetch_sub_32(&var_32, 1);
180+
}
181+
OPAL_TEST_DONE("opal_atomic_fetch_sub_32", var_32);
182+
183+
OPAL_RESET_VAR(var_32);
184+
185+
gettimeofday (&start, NULL);
186+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
187+
opal_atomic_fetch_xor_32(&var_32, i);
188+
}
189+
OPAL_TEST_DONE("opal_atomic_fetch_xor_32", var_32);
190+
191+
OPAL_RESET_VAR(var_32);
192+
193+
gettimeofday (&start, NULL);
194+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
195+
opal_atomic_swap_32(&var_32, i);
196+
}
197+
OPAL_TEST_DONE("opal_atomic_swap_32", var_32);
198+
199+
OPAL_RESET_VAR(var_32);
200+
201+
#if OPAL_HAVE_ATOMIC_LLSC_32
202+
gettimeofday (&start, NULL);
203+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
204+
int ret;
205+
opal_atomic_sc_32(&var_32, i, ret);
206+
}
207+
OPAL_TEST_DONE("opal_atomic_sc_32", var_32);
208+
209+
OPAL_RESET_VAR(var_32);
210+
211+
gettimeofday (&start, NULL);
212+
for (int32_t i = 0 ; i < ITERATIONS ; ++i) {
213+
int ret;
214+
opal_atomic_sc_32(&var_32, i, ret);
215+
}
216+
OPAL_TEST_DONE("opal_atomic_ll_32", var_32);
217+
218+
OPAL_RESET_VAR(var_32);
219+
#endif
220+
221+
return NULL;
222+
}
223+
224+
int main(void) {
225+
226+
pthread_barrier_init (&barrier, NULL, OPAL_TEST_THREAD_COUNT);
227+
228+
pthread_t ts[OPAL_TEST_THREAD_COUNT];
229+
for(int i = 0; i < OPAL_TEST_THREAD_COUNT; i++) {
230+
pthread_create(&ts[i], NULL, &thread_test, NULL);
231+
}
232+
233+
for(int i = 0; i < OPAL_TEST_THREAD_COUNT; i++) {
234+
pthread_join(ts[i], NULL);
235+
}
236+
return 0;
237+
}

0 commit comments

Comments
 (0)