Skip to content

Update opal to use C11 atomics if available #5445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 137 additions & 10 deletions config/opal_config_asm.m4
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dnl Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
dnl Copyright (c) 2015-2017 Research Organization for Information Science
dnl and Technology (RIST). All rights reserved.
dnl Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
dnl Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
dnl reserved.
dnl Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights
dnl reserved.
Expand Down Expand Up @@ -122,6 +122,58 @@ int main(int argc, char** argv)
}
]])

dnl This is a C test to see if 128-bit __atomic_compare_exchange_n()
dnl actually works (e.g., it compiles and links successfully on
dnl ARM64+clang, but returns incorrect answers as of August 2018).
AC_DEFUN([OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_TEST_SOURCE],[[
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdatomic.h>

typedef union {
uint64_t fake@<:@2@:>@;
_Atomic __int128 real;
} ompi128;

static void test1(void)
{
// As of Aug 2018, we could not figure out a way to assign 128-bit
// constants -- the compilers would not accept it. So use a fake
// union to assign 2 uin64_t's to make a single __int128.
ompi128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }};
ompi128 expected = { .fake = { 0x11EEDDCCBBAA0099, 0x88776655443322FF }};
ompi128 desired = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }};
bool r = atomic_compare_exchange_strong (&ptr.real, &expected.real,
desired.real, true,
atomic_relaxed, atomic_relaxed);
if ( !(r == false && ptr.real == expected.real)) {
exit(1);
}
}

static void test2(void)
{
ompi128 ptr = { .fake = { 0xFFEEDDCCBBAA0099, 0x8877665544332211 }};
ompi128 expected = ptr;
ompi128 desired = { .fake = { 0x1122DDCCBBAA0099, 0x887766554433EEFF }};
bool r = atomic_compare_exchange_strong (&ptr.real, &expected.real,
desired.real, true,
atomic_relaxed, atomic_relaxed);
if (!(r == true && ptr.real == desired.real)) {
exit(2);
}
}

vvvvvvvvvvvvvvvvvvvv
int main(int argc, char** argv)
{
test1();
test2();
return 0;
}
]])

dnl ------------------------------------------------------------------

dnl
Expand Down Expand Up @@ -329,6 +381,71 @@ __atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);],
OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128
])

AC_DEFUN([OPAL_CHECK_C11_CSWAP_INT128], [
OPAL_VAR_SCOPE_PUSH([atomic_compare_exchange_result atomic_compare_exchange_CFLAGS_save atomic_compare_exchange_LIBS_save])

atomic_compare_exchange_CFLAGS_save=$CFLAGS
atomic_compare_exchange_LIBS_save=$LIBS

# Do we have C11 atomics on 128-bit integers?
# Use a special macro because we need to check with a few different
# CFLAGS/LIBS.
OPAL_ASM_CHECK_ATOMIC_FUNC([atomic_compare_exchange_strong_16],
[AC_LANG_SOURCE(OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_TEST_SOURCE)],
[atomic_compare_exchange_result=1],
[atomic_compare_exchange_result=0])

# If we have it and it works, check to make sure it is always lock
# free.
AS_IF([test $atomic_compare_exchange_result -eq 1],
[AC_MSG_CHECKING([if C11 __int128 atomic compare-and-swap is always lock-free])
AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdatomic.h>], [_Atomic __int128_t x; if (!atomic_is_lock_free(&x)) { return 1; }])],
[AC_MSG_RESULT([yes])],
[atomic_compare_exchange_result=0
# If this test fails, need to reset CFLAGS/LIBS (the
# above tests atomically set CFLAGS/LIBS or not; this
# test is running after the fact, so we have to undo
# the side-effects of setting CFLAGS/LIBS if the above
# tests passed).
CFLAGS=$atomic_compare_exchange_CFLAGS_save
LIBS=$atomic_compare_exchange_LIBS_save
AC_MSG_RESULT([no])],
[AC_MSG_RESULT([cannot test -- assume yes (cross compiling)])])
])

AC_DEFINE_UNQUOTED([OPAL_HAVE_C11_CSWAP_INT128],
[$atomic_compare_exchange_result],
[Whether C11 atomic compare swap is both supported and lock-free on 128-bit values])

dnl If we could not find decent support for 128-bits atomic let's
dnl try the GCC _sync
AS_IF([test $atomic_compare_exchange_result -eq 0],
[OPAL_CHECK_SYNC_BUILTIN_CSWAP_INT128])

OPAL_VAR_SCOPE_POP
])

AC_DEFUN([OPAL_CHECK_GCC_ATOMIC_BUILTINS], [
AC_MSG_CHECKING([for __atomic builtin atomics])

AC_TRY_LINK([
#include <stdint.h>
uint32_t tmp, old = 0;
uint64_t tmp64, old64 = 0;], [
__atomic_thread_fence(__ATOMIC_SEQ_CST);
__atomic_compare_exchange_n(&tmp, &old, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
__atomic_add_fetch(&tmp, 1, __ATOMIC_RELAXED);
__atomic_compare_exchange_n(&tmp64, &old64, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
__atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);],
[AC_MSG_RESULT([yes])
$1],
[AC_MSG_RESULT([no])
$2])

# Check for 128-bit support
OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128
])


dnl #################################################################
dnl
Expand Down Expand Up @@ -1020,17 +1137,27 @@ AC_DEFUN([OPAL_CONFIG_ASM],[
AC_REQUIRE([OPAL_SETUP_CC])
AC_REQUIRE([AM_PROG_AS])

AC_ARG_ENABLE([c11-atomics],[AC_HELP_STRING([--enable-c11-atomics],
[Enable use of C11 atomics if available (default: enabled)])])

AC_ARG_ENABLE([builtin-atomics],
[AC_HELP_STRING([--enable-builtin-atomics],
[Enable use of __sync builtin atomics (default: enabled)])])

opal_cv_asm_builtin="BUILTIN_NO"
AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" != "no"],
[OPAL_CHECK_GCC_ATOMIC_BUILTINS([opal_cv_asm_builtin="BUILTIN_GCC"], [])])
AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" != "no"],
[OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], [])])
AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"],
[AC_MSG_ERROR([__sync builtin atomics requested but not found.])])
[Enable use of __sync builtin atomics (default: disabled)])])

OPAL_CHECK_C11_CSWAP_INT128

if test "x$enable_c11_atomics" != "xno" && test "$opal_cv_c11_supported" = "yes" ; then
opal_cv_asm_builtin="BUILTIN_C11"
OPAL_CHECK_C11_CSWAP_INT128
else
opal_cv_asm_builtin="BUILTIN_NO"
AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"],
[OPAL_CHECK_GCC_ATOMIC_BUILTINS([opal_cv_asm_builtin="BUILTIN_GCC"], [])])
AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"],
[OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], [])])
AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"],
[AC_MSG_ERROR([__sync builtin atomics requested but not found.])])
fi

OPAL_CHECK_ASM_PROC
OPAL_CHECK_ASM_TEXT
Expand Down
4 changes: 2 additions & 2 deletions ompi/communicator/comm_request.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2018 Los Alamos National Security, LLC. All rights
* reseved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
Expand Down Expand Up @@ -99,7 +99,7 @@ int ompi_comm_request_schedule_append (ompi_comm_request_t *request, ompi_comm_r
static int ompi_comm_request_progress (void)
{
ompi_comm_request_t *request, *next;
static int32_t progressing = 0;
static opal_atomic_int32_t progressing = 0;

/* don't allow re-entry */
if (opal_atomic_swap_32 (&progressing, 1)) {
Expand Down
2 changes: 1 addition & 1 deletion ompi/datatype/ompi_datatype.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct ompi_datatype_t {
struct opal_hash_table_t *d_keyhash; /**< Attribute fields */

void* args; /**< Data description for the user */
void* packed_description; /**< Packed description of the datatype */
opal_atomic_intptr_t packed_description; /**< Packed description of the datatype */
uint64_t pml_data; /**< PML-specific information */
/* --- cacheline 6 boundary (384 bytes) --- */
char name[MPI_MAX_OBJECT_NAME];/**< Externally visible name */
Expand Down
18 changes: 9 additions & 9 deletions ompi/datatype/ompi_datatype_args.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ __ompi_datatype_create_from_args( int32_t* i, ptrdiff_t * a,
ompi_datatype_t** d, int32_t type );

typedef struct __dt_args {
int32_t ref_count;
opal_atomic_int32_t ref_count;
int32_t create_type;
size_t total_pack_size;
int32_t ci;
Expand Down Expand Up @@ -104,7 +104,7 @@ typedef struct __dt_args {
pArgs->total_pack_size = (4 + (IC) + (DC)) * sizeof(int) + \
(AC) * sizeof(ptrdiff_t); \
(PDATA)->args = (void*)pArgs; \
(PDATA)->packed_description = NULL; \
(PDATA)->packed_description = 0; \
} while(0)


Expand Down Expand Up @@ -483,12 +483,12 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype,
{
ompi_datatype_args_t* args = (ompi_datatype_args_t*)datatype->args;
int next_index = OMPI_DATATYPE_MAX_PREDEFINED;
void *packed_description = datatype->packed_description;
void *packed_description = (void *) datatype->packed_description;
void* recursive_buffer;

if (NULL == packed_description) {
void *_tmp_ptr = NULL;
if (opal_atomic_compare_exchange_strong_ptr (&datatype->packed_description, (void *) &_tmp_ptr, (void *) 1)) {
if (opal_atomic_compare_exchange_strong_ptr (&datatype->packed_description, (intptr_t *) &_tmp_ptr, 1)) {
if( ompi_datatype_is_predefined(datatype) ) {
packed_description = malloc(2 * sizeof(int));
} else if( NULL == args ) {
Expand All @@ -510,22 +510,22 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype,
}

opal_atomic_wmb ();
datatype->packed_description = packed_description;
datatype->packed_description = (intptr_t) packed_description;
} else {
/* another thread beat us to it */
packed_description = datatype->packed_description;
packed_description = (void *) datatype->packed_description;
}
}

if ((void *) 1 == packed_description) {
struct timespec interval = {.tv_sec = 0, .tv_nsec = 1000};

/* wait until the packed description is updated */
while ((void *) 1 == datatype->packed_description) {
while (1 == datatype->packed_description) {
nanosleep (&interval, NULL);
}

packed_description = datatype->packed_description;
packed_description = (void *) datatype->packed_description;
}

*packed_buffer = (const void *) packed_description;
Expand All @@ -534,7 +534,7 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype,

size_t ompi_datatype_pack_description_length( ompi_datatype_t* datatype )
{
void *packed_description = datatype->packed_description;
void *packed_description = (void *) datatype->packed_description;

if( ompi_datatype_is_predefined(datatype) ) {
return 2 * sizeof(int);
Expand Down
10 changes: 5 additions & 5 deletions ompi/datatype/ompi_datatype_create.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static void __ompi_datatype_allocate( ompi_datatype_t* datatype )
datatype->id = -1;
datatype->d_keyhash = NULL;
datatype->name[0] = '\0';
datatype->packed_description = NULL;
datatype->packed_description = 0;
datatype->pml_data = 0;
}

Expand All @@ -46,10 +46,10 @@ static void __ompi_datatype_release(ompi_datatype_t * datatype)
ompi_datatype_release_args( datatype );
datatype->args = NULL;
}
if( NULL != datatype->packed_description ) {
free( datatype->packed_description );
datatype->packed_description = NULL;
}

free ((void *) datatype->packed_description );
datatype->packed_description = 0;

if( datatype->d_f_to_c_index >= 0 ) {
opal_pointer_array_set_item( &ompi_datatype_f_to_c_table, datatype->d_f_to_c_index, NULL );
datatype->d_f_to_c_index = -1;
Expand Down
2 changes: 1 addition & 1 deletion ompi/datatype/ompi_datatype_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
.d_f_to_c_index = -1, \
.d_keyhash = NULL, \
.args = NULL, \
.packed_description = NULL, \
.packed_description = 0, \
.name = "MPI_" # NAME

#define OMPI_DATATYPE_INITIALIZER_UNAVAILABLE(FLAGS) \
Expand Down
2 changes: 1 addition & 1 deletion ompi/datatype/ompi_datatype_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}};
(PDST)->super.desc = (PSRC)->super.desc; \
(PDST)->super.opt_desc = (PSRC)->super.opt_desc; \
(PDST)->packed_description = (PSRC)->packed_description; \
(PSRC)->packed_description = NULL; \
(PSRC)->packed_description = 0; \
/* transfer the ptypes */ \
(PDST)->super.ptypes = (PSRC)->super.ptypes; \
(PSRC)->super.ptypes = NULL; \
Expand Down
3 changes: 2 additions & 1 deletion ompi/group/group.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,8 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group,
ompi_proc_t *real_proc =
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc));

if (opal_atomic_compare_exchange_strong_ptr (group->grp_proc_pointers + peer_id, &proc, real_proc)) {
if (opal_atomic_compare_exchange_strong_ptr ((opal_atomic_intptr_t *)(group->grp_proc_pointers + peer_id),
(intptr_t *) &proc, (intptr_t) real_proc)) {
OBJ_RETAIN(real_proc);
}

Expand Down
2 changes: 1 addition & 1 deletion ompi/mca/coll/libnbc/coll_libnbc.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct ompi_coll_libnbc_component_t {
mca_coll_base_component_2_0_0_t super;
opal_free_list_t requests;
opal_list_t active_requests;
int32_t active_comms;
opal_atomic_int32_t active_comms;
opal_mutex_t lock; /* protect access to the active_requests list */
};
typedef struct ompi_coll_libnbc_component_t ompi_coll_libnbc_component_t;
Expand Down
2 changes: 1 addition & 1 deletion ompi/mca/coll/monitoring/coll_monitoring.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct mca_coll_monitoring_module_t {
mca_coll_base_module_t super;
mca_coll_base_comm_coll_t real;
mca_monitoring_coll_data_t*data;
int32_t is_initialized;
opal_atomic_int32_t is_initialized;
};
typedef struct mca_coll_monitoring_module_t mca_coll_monitoring_module_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_monitoring_module_t);
Expand Down
5 changes: 3 additions & 2 deletions ompi/mca/coll/portals4/coll_portals4.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2015 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
Expand Down Expand Up @@ -91,7 +92,7 @@ typedef struct ompi_coll_portals4_tree_t {

struct mca_coll_portals4_module_t {
mca_coll_base_module_t super;
size_t coll_count;
opal_atomic_size_t coll_count;

/* record handlers dedicated to fallback if offloaded operations are not supported */
mca_coll_base_module_reduce_fn_t previous_reduce;
Expand Down
4 changes: 2 additions & 2 deletions ompi/mca/coll/sm/coll_sm.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ BEGIN_C_DECLS
typedef struct mca_coll_sm_in_use_flag_t {
/** Number of processes currently using this set of
segments */
volatile uint32_t mcsiuf_num_procs_using;
opal_atomic_uint32_t mcsiuf_num_procs_using;
/** Must match data->mcb_count */
volatile uint32_t mcsiuf_operation_count;
} mca_coll_sm_in_use_flag_t;
Expand Down Expand Up @@ -152,7 +152,7 @@ BEGIN_C_DECLS
/** Pointer to my parent's barrier control pages (will be NULL
for communicator rank 0; odd index pages are "in", even
index pages are "out") */
uint32_t *mcb_barrier_control_parent;
opal_atomic_uint32_t *mcb_barrier_control_parent;

/** Pointers to my childrens' barrier control pages (they're
contiguous in memory, so we only point to the base -- the
Expand Down
3 changes: 2 additions & 1 deletion ompi/mca/coll/sm/coll_sm_barrier.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm,
int rank, buffer_set;
mca_coll_sm_comm_t *data;
uint32_t i, num_children;
volatile uint32_t *me_in, *me_out, *parent, *children = NULL;
volatile uint32_t *me_in, *me_out, *children = NULL;
opal_atomic_uint32_t *parent;
int uint_control_size;
mca_coll_sm_module_t *sm_module = (mca_coll_sm_module_t*) module;

Expand Down
2 changes: 1 addition & 1 deletion ompi/mca/coll/sm/coll_sm_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
data->mcb_barrier_control_me = (uint32_t*)
(base + (rank * control_size * num_barrier_buffers * 2));
if (data->mcb_tree[rank].mcstn_parent) {
data->mcb_barrier_control_parent = (uint32_t*)
data->mcb_barrier_control_parent = (opal_atomic_uint32_t*)
(base +
(data->mcb_tree[rank].mcstn_parent->mcstn_id * control_size *
num_barrier_buffers * 2));
Expand Down
Loading