Skip to content

128 Bit Atomics Fallback Implementation #215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions config/qthread_check_128_bit_atomics_fallback.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
AC_DEFUN([QT_NEEDS_128_BIT_ATOMIC_FALLBACK],
[
AC_CACHE_CHECK([whether Qthreads may need to use the fallback implementation for 128 bit atomics],
[qt_cv_128b_atomic_fallback],
[
AC_LANG_PUSH([C++])
AC_TRY_COMPILE(
[
// This file compiles only if clang or similar can safely use 128 bit atomics at runtime even in debug builds.
// For that to be the case it needs to be using a sufficiently recent libatomic from gcc or it needs
// to be using compiler_rt for its atomics.
#if __cplusplus >= 202002L
#include <version>
#else
#include <ciso646>
#endif

// Only have a fallback implementation of 128 bit atomics for these architectures,
// so let the others use the standard implementation and hope for the best.
// On OSX and FreeBSD, compiler-rt is used by default instead of libatomic so we don't need to worry about this.
#if defined(__aarch64__) || defined(__arm__) || !(defined(__clang__) && (defined(__APPLE__) || defined(__FreeBSD__)))
// gcc version info as obtained from libstdc++ since it's surprisingly difficult to figure out which gcc clang is using for its support libraries.
// Note: if no C++ compiler is available, this compilation will fail and the fallback will be used since we couldn't confirm
// that the underlying libatomic is safe to use.
// If we're seeing libstdc++ here, that means there's an underlying gcc install also providing libatomic and that libatomic is actually
// what we're using because libstdc++ depends on it.
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 7
#error Too early of a version to have _GLIBCXX_RELEASE, let alone 128 bit atomics.
#endif
#if !defined(_LIBCPP_VERSION) && !defined(__GLIBCXX__)
#error Too early of a libstdc++ version to even include its version info in <ciso646>.
#endif
#if defined(__GLIBCXX__)
#ifdef __amd64__
#if !(_GLIBCXX_RELEASE == 13 || (_GLIBCXX_RELEASE == 12 && __GLIBCXX__ >= 20230508) || (_GLIBCXX_RELEASE == 11 && __GLIBCXX__ >= 20230529))
#error Only more recent versions of gcc have lock-free 128 bit atomics.
#endif
#elif defined(__aarch64__)
#if !(_GLIBCXX_RELEASE == 13)
#error Only more recent versions of gcc have lock-free 128 bit atomics.
#endif
#endif
#endif

// If we're using libc++ we can't get the underlying gcc version from it here so we can't guarantee that
// 128 bit atomics will actually be lock-free. We only have clang use the fallback implementation for
// 128 bit atomics in debug builds anyway and it's extremely unusual to build qthreads with libc++,
// so this case is not super important.
// It's also technically possible for libc++ and clang to just get their atomics from
// compiler-rt instead, but again we don't have an easy way to check that here.
#if defined(_LIBCPP_VERSION)
#error can't guarantee lock-free 128 bit atomics without gcc version info.
#endif

#endif
],
[return 0;],
[qt_cv_128b_atomic_fallback=no],
[qt_cv_128b_atomic_fallback=yes])
AC_LANG_POP([C++])
])
AS_IF([test x$qt_cv_128b_atomic_fallback = xyes],
[AC_DEFINE([QTHREADS_NEEDS_128_BIT_ATOMIC_FALLBACK], [1],
[Whether a fallback is needed because the underlying libatomic may not provide lock-free 128 bit atomics])])
])
10 changes: 10 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ AC_ARG_ENABLE([picky],
[AS_HELP_STRING([--enable-picky],
[turns on extra compiler warnings (for developers of qthreads)])])

AC_ARG_ENABLE([avx],
[AS_HELP_STRING([--disable-avx],
[turns off avx for x86-64 and uses a fallback implementation to get lockfree 128 bit atomics instead of relying on the consistency guarantees of avx processors])])

AC_ARG_ENABLE([debugging],
[AS_HELP_STRING([--enable-debugging],
[turns off optimization and turns on debug flags])])
Expand Down Expand Up @@ -534,6 +538,10 @@ AS_IF([test "x$qthread_cv_asm_arch" = "xPOWERPC32"],
align_aligned_t=4],
[sizeof_aligned_t=8
align_aligned_t=8])
AS_IF([test "x$enable_avx" != "xno"],
AS_IF([test "x$qthread_cv_asm_arch" = xAMD64],
[CFLAGS="-mavx $CFLAGS"
CXXFLAGS="-mavx $CXXFLAGS"]))
AS_IF([test "x$enable_64bit_aligned_t" = "xyes"], [sizeof_aligned_t="8"
align_aligned_t="8"])
AS_IF([test "x$enable_64bit_aligned_t" = "xno"], [sizeof_aligned_t="$ac_cv_sizeof_int"
Expand Down Expand Up @@ -580,6 +588,8 @@ AS_IF([test "x$enable_eurekas" = "xyes"],

AC_CACHE_SAVE

QT_NEEDS_128_BIT_ATOMIC_FALLBACK

## ----------------------- ##
## Checks for header files ##
## ----------------------- ##
Expand Down
2 changes: 2 additions & 0 deletions include/progress64/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DisableFormat: true
SortIncludes: Never
11 changes: 11 additions & 0 deletions include/progress64/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Copyright 2018 ARM Limited.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
108 changes: 108 additions & 0 deletions include/progress64/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//Copyright (c) 2018, ARM Limited. All rights reserved.
//
//SPDX-License-Identifier: BSD-3-Clause

#ifndef _COMMON_H
#define _COMMON_H

//Compiler hints
#define ALWAYS_INLINE __attribute__((always_inline))
#define NO_INLINE __attribute__((noinline))
#ifdef __clang__
#define UNROLL_LOOPS __attribute__((opencl_unroll_hint(8)))
#else
#define UNROLL_LOOPS __attribute__((optimize("unroll-loops")))
#endif
#define INIT_FUNCTION __attribute__((constructor))
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#define COMPILER_MEMORY_FENCE() __asm __volatile("" ::: "memory")
#define UNREACHABLE() __builtin_unreachable()

#ifdef NDEBUG
#if defined __GNUC__ && __GNUC__ >= 8
#define ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
#else
#define ASSUME(cond) (void)(cond)
#endif
#else //Assertions enabled, check that assumptions are true
#define ASSUME(cond) assert(cond)
#endif

//Hardware hints
#define PREFETCH_FOR_READ(ptr) __builtin_prefetch((ptr), 0, 3)
#define PREFETCH_FOR_WRITE(ptr) __builtin_prefetch((ptr), 1, 3)

//Use GNUC syntax for ALIGNED
#define ALIGNED(x) __attribute__((__aligned__(x)))
#if __STDC_VERSION__ >= 201112L
//Use C11 syntax
#define THREAD_LOCAL _Thread_local
#else
//Use GNUC syntax
#define THREAD_LOCAL __thread
#endif

#define ROUNDUP_POW2(x) \
({ \
unsigned long _x = (x); \
_x > 1 ? (1UL << (__SIZEOF_LONG__ * __CHAR_BIT__ - __builtin_clzl(_x - 1UL))) : 1; \
})

/*
* By Hallvard B Furuseth
* https://groups.google.com/forum/?hl=en#!msg/comp.lang.c/attFnqwhvGk/sGBKXvIkY3AJ
* Return (v ? floor(log2(v)) : 0) when 0 <= v < 1<<[8, 16, 32, 64].
* Inefficient algorithm, intended for compile-time constants.
*/
#define LOG2_8BIT(v) (8 - 90/(((v)/4+14)|1) - 2/((v)/2+1))
#define LOG2_16BIT(v) (8*((v)>255) + LOG2_8BIT((v) >>8*((v)>255)))
#define LOG2_32BIT(v) \
(16*((v)>65535L) + LOG2_16BIT((v)*1L >>16*((v)>65535L)))
#define LOG2_64BIT(v)\
(32*((v)/2L>>31 > 0) \
+ LOG2_32BIT((v)*1L >>16*((v)/2L>>31 > 0) \
>>16*((v)/2L>>31 > 0)))

#define ROUNDUP(a, b) \
({ \
__typeof__ (a) tmp_a = (a); \
__typeof__ (b) tmp_b = (b); \
((tmp_a + tmp_b - 1) / tmp_b) * tmp_b; \
})

#define MIN(a, b) \
({ \
__typeof__ (a) tmp_a = (a); \
__typeof__ (b) tmp_b = (b); \
tmp_a < tmp_b ? tmp_a : tmp_b; \
})

#define MAX(a, b) \
({ \
__typeof__ (a) tmp_a = (a); \
__typeof__ (b) tmp_b = (b); \
tmp_a > tmp_b ? tmp_a : tmp_b; \
})

#define IS_POWER_OF_TWO(n) \
({ \
__typeof__ (n) tmp_n = (n); \
tmp_n != 0 && (tmp_n & (tmp_n - 1)) == 0; \
})

#define SWAP(_a, _b) \
{ \
__typeof__ (_a) _t; \
_t = _a; \
_a = _b; \
_b = _t; \
}

#if __SIZEOF_POINTER__ == 4
typedef unsigned long long ptrpair_t;//assume 64 bits
#else //__SIZEOF_POINTER__ == 8
typedef __int128 ptrpair_t;
#endif

#endif
95 changes: 95 additions & 0 deletions include/progress64/lockfree.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//Copyright (c) 2018, ARM Limited. All rights reserved.
//
//SPDX-License-Identifier: BSD-3-Clause

#ifndef _LOCKFREE_H
#define _LOCKFREE_H

#include "common.h"

#define HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
#define HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || (mo) == __ATOMIC_ACQ_REL || (mo) == __ATOMIC_SEQ_CST)

#define MO_LOAD(mo) (HAS_ACQ((mo)) ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED)
#define MO_STORE(mo) (HAS_RLS((mo)) ? __ATOMIC_RELEASE : __ATOMIC_RELAXED)

#if defined __aarch64__

#include "lockfree/aarch64.h"
#define lockfree_compare_exchange_pp_frail lockfree_compare_exchange_16_frail
#define lockfree_compare_exchange_pp lockfree_compare_exchange_16

#elif defined __arm__

#define lockfree_compare_exchange_pp_frail __atomic_compare_exchange_8
#define lockfree_compare_exchange_pp __atomic_compare_exchange_8

#elif defined __x86_64__

#include "lockfree/x86-64.h"
#define lockfree_compare_exchange_pp_frail lockfree_compare_exchange_16
#define lockfree_compare_exchange_pp lockfree_compare_exchange_16

#else

#error Unsupported architecture

#endif

#if (__ATOMIC_RELAXED | __ATOMIC_ACQUIRE) != __ATOMIC_ACQUIRE
#error __ATOMIC bit-wise OR hack failed (see XXX)
#endif
#if (__ATOMIC_RELEASE | __ATOMIC_ACQUIRE) != __ATOMIC_RELEASE
#error __ATOMIC bit-wise OR hack failed (see XXX)
#endif

#ifndef _ATOMIC_UMAX_4_DEFINED
#define _ATOMIC_UMAX_4_DEFINED

ALWAYS_INLINE
static inline uint32_t
lockfree_fetch_umax_4(uint32_t *var, uint32_t val, int mo)
{
uint32_t old = __atomic_load_n(var, __ATOMIC_RELAXED);
do
{
if (val <= old)
{
return old;
}
}
while (!__atomic_compare_exchange_n(var,
&old,
val,
/*weak=*/true,
MO_LOAD(mo) | MO_STORE(mo),//XXX
MO_LOAD(mo)));
return old;
}
#endif

#ifndef _ATOMIC_UMAX_8_DEFINED
#define _ATOMIC_UMAX_8_DEFINED
ALWAYS_INLINE
static inline uint64_t
lockfree_fetch_umax_8(uint64_t *var, uint64_t val, int mo)
{
uint64_t old = __atomic_load_n(var, __ATOMIC_RELAXED);
do
{
if (val <= old)
{
return old;
}
}
while (!__atomic_compare_exchange_n(var,
&old,
val,
/*weak=*/true,
MO_LOAD(mo) | MO_STORE(mo),//XXX
MO_LOAD(mo)));
return old;
}
#endif

#endif
Loading