Skip to content

Commit ed24de1

Browse files
Vendor in an implementation of lock-free 128 bit atomics from progress64 as a fallback
for when the standard implementation isn't actually lock free. Also test carefully to check whether the standard implementation will actually be lock free and use it at least in those cases.
1 parent 619afe9 commit ed24de1

File tree

10 files changed

+1090
-0
lines changed

10 files changed

+1090
-0
lines changed

configure.ac

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ AC_ARG_ENABLE([picky],
5050
[AS_HELP_STRING([--enable-picky],
5151
[turns on extra compiler warnings (for developers of qthreads)])])
5252

53+
AC_ARG_ENABLE([avx],
54+
[AS_HELP_STRING([--disable-avx],
55+
[turns off avx for x86-64 and uses a fallback implementation to get lockfree 128 bit atomics instead of relying on the consistency guarantees of avx processors])])
56+
5357
AC_ARG_ENABLE([debugging],
5458
[AS_HELP_STRING([--enable-debugging],
5559
[turns off optimization and turns on debug flags])])
@@ -534,6 +538,10 @@ AS_IF([test "x$qthread_cv_asm_arch" = "xPOWERPC32"],
534538
align_aligned_t=4],
535539
[sizeof_aligned_t=8
536540
align_aligned_t=8])
541+
AS_IF([test "x$enable_avx" != "xno"],
542+
AS_IF([test "x$qthread_cv_asm_arch" = xAMD64],
543+
[CFLAGS="-mavx $CFLAGS"
544+
CXXFLAGS="-mavx $CXXFLAGS"]))
537545
AS_IF([test "x$enable_64bit_aligned_t" = "xyes"], [sizeof_aligned_t="8"
538546
align_aligned_t="8"])
539547
AS_IF([test "x$enable_64bit_aligned_t" = "xno"], [sizeof_aligned_t="$ac_cv_sizeof_int"

include/progress64/.clang-format

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
DisableFormat: true
2+
SortIncludes: Never

include/progress64/LICENSE

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Copyright 2018 ARM Limited.
2+
3+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
4+
5+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
6+
7+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8+
9+
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10+
11+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

include/progress64/common.h

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//Copyright (c) 2018, ARM Limited. All rights reserved.
2+
//
3+
//SPDX-License-Identifier: BSD-3-Clause
4+
5+
#ifndef _COMMON_H
6+
#define _COMMON_H
7+
8+
//Compiler hints
9+
#define ALWAYS_INLINE __attribute__((always_inline))
10+
#define NO_INLINE __attribute__((noinline))
11+
#ifdef __clang__
12+
#define UNROLL_LOOPS __attribute__((opencl_unroll_hint(8)))
13+
#else
14+
#define UNROLL_LOOPS __attribute__((optimize("unroll-loops")))
15+
#endif
16+
#define INIT_FUNCTION __attribute__((constructor))
17+
#define LIKELY(x) __builtin_expect(!!(x), 1)
18+
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
19+
#define COMPILER_MEMORY_FENCE() __asm __volatile("" ::: "memory")
20+
#define UNREACHABLE() __builtin_unreachable()
21+
22+
#ifdef NDEBUG
23+
#if defined __GNUC__ && __GNUC__ >= 8
24+
#define ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
25+
#else
26+
#define ASSUME(cond) (void)(cond)
27+
#endif
28+
#else //Assertions enabled, check that assumptions are true
29+
#define ASSUME(cond) assert(cond)
30+
#endif
31+
32+
//Hardware hints
33+
#define PREFETCH_FOR_READ(ptr) __builtin_prefetch((ptr), 0, 3)
34+
#define PREFETCH_FOR_WRITE(ptr) __builtin_prefetch((ptr), 1, 3)
35+
36+
//Use GNUC syntax for ALIGNED
37+
#define ALIGNED(x) __attribute__((__aligned__(x)))
38+
#if __STDC_VERSION__ >= 201112L
39+
//Use C11 syntax
40+
#define THREAD_LOCAL _Thread_local
41+
#else
42+
//Use GNUC syntax
43+
#define THREAD_LOCAL __thread
44+
#endif
45+
46+
#define ROUNDUP_POW2(x) \
47+
({ \
48+
unsigned long _x = (x); \
49+
_x > 1 ? (1UL << (__SIZEOF_LONG__ * __CHAR_BIT__ - __builtin_clzl(_x - 1UL))) : 1; \
50+
})
51+
52+
/*
53+
* By Hallvard B Furuseth
54+
* https://groups.google.com/forum/?hl=en#!msg/comp.lang.c/attFnqwhvGk/sGBKXvIkY3AJ
55+
* Return (v ? floor(log2(v)) : 0) when 0 <= v < 1<<[8, 16, 32, 64].
56+
* Inefficient algorithm, intended for compile-time constants.
57+
*/
58+
#define LOG2_8BIT(v) (8 - 90/(((v)/4+14)|1) - 2/((v)/2+1))
59+
#define LOG2_16BIT(v) (8*((v)>255) + LOG2_8BIT((v) >>8*((v)>255)))
60+
#define LOG2_32BIT(v) \
61+
(16*((v)>65535L) + LOG2_16BIT((v)*1L >>16*((v)>65535L)))
62+
#define LOG2_64BIT(v)\
63+
(32*((v)/2L>>31 > 0) \
64+
+ LOG2_32BIT((v)*1L >>16*((v)/2L>>31 > 0) \
65+
>>16*((v)/2L>>31 > 0)))
66+
67+
#define ROUNDUP(a, b) \
68+
({ \
69+
__typeof__ (a) tmp_a = (a); \
70+
__typeof__ (b) tmp_b = (b); \
71+
((tmp_a + tmp_b - 1) / tmp_b) * tmp_b; \
72+
})
73+
74+
#define MIN(a, b) \
75+
({ \
76+
__typeof__ (a) tmp_a = (a); \
77+
__typeof__ (b) tmp_b = (b); \
78+
tmp_a < tmp_b ? tmp_a : tmp_b; \
79+
})
80+
81+
#define MAX(a, b) \
82+
({ \
83+
__typeof__ (a) tmp_a = (a); \
84+
__typeof__ (b) tmp_b = (b); \
85+
tmp_a > tmp_b ? tmp_a : tmp_b; \
86+
})
87+
88+
#define IS_POWER_OF_TWO(n) \
89+
({ \
90+
__typeof__ (n) tmp_n = (n); \
91+
tmp_n != 0 && (tmp_n & (tmp_n - 1)) == 0; \
92+
})
93+
94+
#define SWAP(_a, _b) \
95+
{ \
96+
__typeof__ (_a) _t; \
97+
_t = _a; \
98+
_a = _b; \
99+
_b = _t; \
100+
}
101+
102+
#if __SIZEOF_POINTER__ == 4
103+
typedef unsigned long long ptrpair_t;//assume 64 bits
104+
#else //__SIZEOF_POINTER__ == 8
105+
typedef __int128 ptrpair_t;
106+
#endif
107+
108+
#endif

include/progress64/lockfree.h

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
//Copyright (c) 2018, ARM Limited. All rights reserved.
2+
//
3+
//SPDX-License-Identifier: BSD-3-Clause
4+
5+
#ifndef _LOCKFREE_H
6+
#define _LOCKFREE_H
7+
8+
#include "common.h"
9+
10+
#define HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
11+
#define HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || (mo) == __ATOMIC_ACQ_REL || (mo) == __ATOMIC_SEQ_CST)
12+
13+
#define MO_LOAD(mo) (HAS_ACQ((mo)) ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED)
14+
#define MO_STORE(mo) (HAS_RLS((mo)) ? __ATOMIC_RELEASE : __ATOMIC_RELAXED)
15+
16+
#if defined __aarch64__
17+
18+
#include "lockfree/aarch64.h"
19+
#define lockfree_compare_exchange_pp_frail lockfree_compare_exchange_16_frail
20+
#define lockfree_compare_exchange_pp lockfree_compare_exchange_16
21+
22+
#elif defined __arm__
23+
24+
#define lockfree_compare_exchange_pp_frail __atomic_compare_exchange_8
25+
#define lockfree_compare_exchange_pp __atomic_compare_exchange_8
26+
27+
#elif defined __x86_64__
28+
29+
#include "lockfree/x86-64.h"
30+
#define lockfree_compare_exchange_pp_frail lockfree_compare_exchange_16
31+
#define lockfree_compare_exchange_pp lockfree_compare_exchange_16
32+
33+
#else
34+
35+
#error Unsupported architecture
36+
37+
#endif
38+
39+
#if (__ATOMIC_RELAXED | __ATOMIC_ACQUIRE) != __ATOMIC_ACQUIRE
40+
#error __ATOMIC bit-wise OR hack failed (see XXX)
41+
#endif
42+
#if (__ATOMIC_RELEASE | __ATOMIC_ACQUIRE) != __ATOMIC_RELEASE
43+
#error __ATOMIC bit-wise OR hack failed (see XXX)
44+
#endif
45+
46+
#ifndef _ATOMIC_UMAX_4_DEFINED
47+
#define _ATOMIC_UMAX_4_DEFINED
48+
49+
ALWAYS_INLINE
50+
static inline uint32_t
51+
lockfree_fetch_umax_4(uint32_t *var, uint32_t val, int mo)
52+
{
53+
uint32_t old = __atomic_load_n(var, __ATOMIC_RELAXED);
54+
do
55+
{
56+
if (val <= old)
57+
{
58+
return old;
59+
}
60+
}
61+
while (!__atomic_compare_exchange_n(var,
62+
&old,
63+
val,
64+
/*weak=*/true,
65+
MO_LOAD(mo) | MO_STORE(mo),//XXX
66+
MO_LOAD(mo)));
67+
return old;
68+
}
69+
#endif
70+
71+
#ifndef _ATOMIC_UMAX_8_DEFINED
72+
#define _ATOMIC_UMAX_8_DEFINED
73+
ALWAYS_INLINE
74+
static inline uint64_t
75+
lockfree_fetch_umax_8(uint64_t *var, uint64_t val, int mo)
76+
{
77+
uint64_t old = __atomic_load_n(var, __ATOMIC_RELAXED);
78+
do
79+
{
80+
if (val <= old)
81+
{
82+
return old;
83+
}
84+
}
85+
while (!__atomic_compare_exchange_n(var,
86+
&old,
87+
val,
88+
/*weak=*/true,
89+
MO_LOAD(mo) | MO_STORE(mo),//XXX
90+
MO_LOAD(mo)));
91+
return old;
92+
}
93+
#endif
94+
95+
#endif

0 commit comments

Comments
 (0)