Skip to content

SHMEM_LOCKS: MCS implementation of SHMEM LOCKS #12003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions oshmem/runtime/oshmem_shmem_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
#include "oshmem/constants.h"


int oshmem_shmem_lock_recursive = 0;
int oshmem_shmem_api_verbose = 0;
int oshmem_preconnect_all = 0;
int oshmem_shmem_lock_recursive = 0;
int oshmem_shmem_api_verbose = 0;
int oshmem_shmem_enable_mcs_locks = 1;
int oshmem_preconnect_all = 0;

int oshmem_shmem_register_params(void)
{
Expand All @@ -38,6 +39,19 @@ int oshmem_shmem_register_params(void)
MCA_BASE_VAR_SCOPE_READONLY,
&oshmem_shmem_lock_recursive);

(void) mca_base_var_register("oshmem",
"oshmem",
NULL,
"enable_mcs_lock",
"enable mcs locks",
MCA_BASE_VAR_TYPE_INT,
NULL,
1,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&oshmem_shmem_enable_mcs_locks);

(void) mca_base_var_register("oshmem",
"oshmem",
NULL,
Expand Down
7 changes: 7 additions & 0 deletions oshmem/runtime/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ OSHMEM_DECLSPEC extern int oshmem_shmem_api_verbose;
*/
OSHMEM_DECLSPEC extern int oshmem_preconnect_all;


/**
* Whether to force SHMEM processes to use MCS locking
* for shmem_locks
*/
OSHMEM_DECLSPEC extern int oshmem_shmem_enable_mcs_locks;

END_C_DECLS

#endif /* OSHMEM_RUNTIME_PARAMS_H */
3 changes: 2 additions & 1 deletion oshmem/shmem/c/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@


OSHMEM_AUX_SOURCES = \
shmem_lock.c
shmem_lock.c \
shmem_mcs_lock.c

OSHMEM_API_SOURCES = \
shmem_init.c \
Expand Down
11 changes: 10 additions & 1 deletion oshmem/shmem/c/shmem_clear_lock.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
/*
* Copyright (c) 2023 NVIDIA Corporation.
* All rights reserved.
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2019 Research Organization for Information Science
Expand All @@ -18,6 +20,7 @@
#include "oshmem/shmem/shmem_api_logger.h"
#include "oshmem/runtime/runtime.h"
#include "oshmem/shmem/shmem_lock.h"
#include "oshmem/runtime/params.h"

#if OSHMEM_PROFILING
#include "oshmem/include/pshmem.h"
Expand All @@ -27,5 +30,11 @@

void shmem_clear_lock(volatile long *lock)
{
_shmem_clear_lock((void *)lock, sizeof(long));
if (oshmem_shmem_enable_mcs_locks) {
SHMEM_API_VERBOSE(10, "Clear Lock with MCS Lock implementation");
_shmem_mcs_clear_lock((long *)lock);
} else {
SHMEM_API_VERBOSE(10, "Clear Lock with Ticket Lock implementation");
_shmem_clear_lock((void *)lock, sizeof(long));
}
}
239 changes: 239 additions & 0 deletions oshmem/shmem/c/shmem_mcs_lock.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
/*
* Copyright (c) 2023 NVIDIA Corporation.
* All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

#include "oshmem_config.h"

#include "oshmem/constants.h"
#include "oshmem/include/shmem.h"
#include "oshmem/runtime/params.h"
#include "oshmem/runtime/runtime.h"
#include <stdlib.h>
#include <memory.h>

#include "oshmem/shmem/shmem_api_logger.h"
#include "oshmem/shmem/shmem_lock.h"
#include "oshmem/mca/memheap/memheap.h"
#include "oshmem/mca/memheap/base/base.h"
#include "oshmem/mca/atomic/atomic.h"

#define OPAL_BITWISE_SIZEOF_LONG (SIZEOF_LONG * 8)


/** Use basic MCS distributed lock algorithm for lock */
struct shmem_mcs_lock {
/** has meaning only on MCSQ_TAIL OWNER */
int tail;
/** It has meaning on all PEs */
/** The next pointer is a combination of the PE ID and wait signal */
int next;
};
typedef struct shmem_mcs_lock shmem_mcs_lock_t;

#define SHMEM_MCSL_TAIL_OWNER(lock_ptr)\
(((uintptr_t)(lock_ptr) / sizeof(long)) % shmem_n_pes())

#define SHMEM_MCSL_NEXT_MASK 0x7FFFFFFFU
#define SHMEM_MCSL_SIGNAL_MASK 0x80000000U /** Wait signal mask */
#define SHMEM_MCSL_NEXT(lock_val) ((lock_val) & SHMEM_MCSL_NEXT_MASK)
/** Improve readability */
#define SHMEM_MCSL_GET_PE(tail_val) ((tail_val) & SHMEM_MCSL_NEXT_MASK)
#define SHMEM_MCSL_SIGNAL(lock_val) ((lock_val) & SHMEM_MCSL_SIGNAL_MASK)
#define SHMEM_MCSL_SET_SIGNAL(lock_val) ((lock_val) | SHMEM_MCSL_SIGNAL_MASK)

void
_shmem_mcs_set_lock(long *lockp)
{
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
int new_tail_req = 0;
int *tail = &(lock->tail);
int *next = &(lock->next);
int my_pe = shmem_my_pe();
int curr = 0;
int out_value = 0;
int prev_tail = 0;
int prev_tailpe = 0;
int tval = 0;
int tmp_val = 0;
int retv = 0;
uint64_t value_tmp = 0;

RUNTIME_CHECK_INIT();
/**
* Initializing next pointer to next mask
* Done atomically to avoid races as NEXT pointer
* can be modified by other PEs while acquiring or
* releasing it.
*/
/**
* Can make this to be shmem_atomic_set to be safe
* in non-cc architectures
* has an impact on performance
*/
value_tmp = SHMEM_MCSL_NEXT_MASK;
out_value = SHMEM_MCSL_NEXT_MASK;
retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void*)next,
(void*)&out_value, value_tmp,
sizeof(int), my_pe));
RUNTIME_CHECK_RC(retv);
MCA_SPML_CALL(quiet(oshmem_ctx_default));

/** Signal for setting lock */
new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe);
/**
* Swap and make me the new tail and update in tail owner
* Get the previous tail PE.
*/
retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void *)tail,
(void*)&prev_tail,
OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req,
sizeof(new_tail_req)),
sizeof(int), mcs_tail_owner));
RUNTIME_CHECK_RC(retv);

prev_tailpe = SHMEM_MCSL_GET_PE(prev_tail);
if (SHMEM_MCSL_SIGNAL(prev_tail)) {
/**
* Someone else has got the lock before this PE
* Adding this PE to the previous tail PE's Next pointer
* Substract the SIGNAL Bit to avoid changing it.
*/
tmp_val = my_pe - SHMEM_MCSL_NEXT_MASK;
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void*)next, tmp_val,
sizeof(int), prev_tailpe));
RUNTIME_CHECK_RC(retv);
/**
* This value to be changed eventually by predecessor
* when its lock is released.
* Need to be done atomically to avoid any races where
* next pointer is modified by another PE acquiring or
* releasing this.
*/
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void *)next,
SHMEM_MCSL_SIGNAL_MASK, sizeof(int),
my_pe));
RUNTIME_CHECK_RC(retv);
MCA_SPML_CALL(quiet(oshmem_ctx_default));
/** Wait for predecessor release lock to this PE signal to false. */
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
(void*)&curr, tval, sizeof(int), my_pe));
RUNTIME_CHECK_RC(retv);

while (SHMEM_MCSL_SIGNAL(curr)) {
retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE,
(void*)&curr, SHMEM_INT));
RUNTIME_CHECK_RC(retv);
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
(void*)&curr, tval, sizeof(int),
my_pe));
RUNTIME_CHECK_RC(retv);
}
}
/** else.. this pe has got the lock as no one else had it */
}

void
_shmem_mcs_clear_lock(long *lockp)
{
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
int *tail = &(lock->tail);
int *next = &(lock->next);
int my_pe = shmem_my_pe();
int next_value = 0;
int swap_cond = 0;
int prev_value = 0;
int tval = 0;
int val_tmp = 0;
int nmask = 0;
int a_val = 0;
int retv = 0;

/**
* Can make atomic fetch to be safe in non-cc architectures
* Has impact on performance
*/
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
(void*)&next_value, tval, sizeof(int),
my_pe));
RUNTIME_CHECK_RC(retv);
MCA_SPML_CALL(quiet(oshmem_ctx_default));

if (next_value == SHMEM_MCSL_NEXT_MASK) {
swap_cond = SHMEM_MCSL_SET_SIGNAL(my_pe);
retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default,
(void *)tail, (uint64_t *)&(prev_value),
OSHMEM_ATOMIC_PTR_2_INT(&swap_cond,
sizeof(swap_cond)),
OSHMEM_ATOMIC_PTR_2_INT(&val_tmp,
sizeof(val_tmp)), sizeof(int),
mcs_tail_owner));
RUNTIME_CHECK_RC(retv);

/** I am the tail.. and lock is released */
if (prev_value == swap_cond) {
return;
}
/**
* I am not the tail, another PE maybe racing to acquire lock,
* let them complete setting themselves as our next
*/
nmask = SHMEM_MCSL_NEXT_MASK;
while(next_value == nmask) {
retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE,
(void*)&nmask, SHMEM_INT));
RUNTIME_CHECK_RC(retv);
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next,
(void*)&next_value, tval,
sizeof(int), my_pe));
RUNTIME_CHECK_RC(retv);
}
}
/** There is a successor release lock to the successor */
a_val = SHMEM_MCSL_SIGNAL_MASK;
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default,
(void *)next, a_val, sizeof(a_val),
SHMEM_MCSL_NEXT(next_value)));
RUNTIME_CHECK_RC(retv);
MCA_SPML_CALL(quiet(oshmem_ctx_default));
}

int
_shmem_mcs_test_lock(long *lockp)
{
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp;
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock);
int new_tail_req = 0;
int prev_tail = 0;
int tmp_cond = 0;
int *tail = &(lock->tail);
int *next = &(lock->next);
int my_pe = shmem_my_pe();
int retv = 0;

/** Initializing next pointer to next mask */
*next = SHMEM_MCSL_NEXT_MASK;

/** Signal for setting lock */
new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe);

/** Check if previously cleared before swapping */
retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default,
(void *)tail, (uint64_t *)&(prev_tail),
OSHMEM_ATOMIC_PTR_2_INT(&tmp_cond,
sizeof(tmp_cond)),
OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req,
sizeof(new_tail_req)),
sizeof(int), mcs_tail_owner));
RUNTIME_CHECK_RC(retv);

return (0 != prev_tail);
}
11 changes: 10 additions & 1 deletion oshmem/shmem/c/shmem_set_lock.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
/*
* Copyright (c) 2023 NVIDIA Corporation.
* All rights reserved.
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2019 Research Organization for Information Science
Expand All @@ -18,6 +20,7 @@
#include "oshmem/shmem/shmem_api_logger.h"
#include "oshmem/runtime/runtime.h"
#include "oshmem/shmem/shmem_lock.h"
#include "oshmem/runtime/params.h"

#if OSHMEM_PROFILING
#include "oshmem/include/pshmem.h"
Expand All @@ -27,5 +30,11 @@

void shmem_set_lock(volatile long *lock)
{
_shmem_set_lock((void *)lock, sizeof(long));
if (oshmem_shmem_enable_mcs_locks) {
SHMEM_API_VERBOSE(10, "Set Lock with MCS Lock implementation");
_shmem_mcs_set_lock((long *)lock);
} else {
SHMEM_API_VERBOSE(10, "Set Lock with Ticket Lock implementation");
_shmem_set_lock((void *)lock, sizeof(long));
}
}
11 changes: 10 additions & 1 deletion oshmem/shmem/c/shmem_test_lock.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
/*
* Copyright (c) 2023 NVIDIA Corporation.
* All rights reserved.
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2019 Research Organization for Information Science
Expand All @@ -18,6 +20,7 @@
#include "oshmem/include/shmem.h"
#include "oshmem/shmem/shmem_api_logger.h"
#include "oshmem/runtime/runtime.h"
#include "oshmem/runtime/params.h"
#include "oshmem/shmem/shmem_lock.h"

#if OSHMEM_PROFILING
Expand All @@ -28,5 +31,11 @@

int shmem_test_lock(volatile long *lock)
{
return _shmem_test_lock((void *)lock, sizeof(long));
if (oshmem_shmem_enable_mcs_locks) {
SHMEM_API_VERBOSE(10, "Test lock using MCS Lock implementation");
return _shmem_mcs_test_lock((long *)lock);
} else {
SHMEM_API_VERBOSE(10, "Test_lock using Ticket Lock implementation");
return _shmem_test_lock((void *)lock, sizeof(long));
}
}
3 changes: 3 additions & 0 deletions oshmem/shmem/shmem_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,8 @@ void _shmem_set_lock(void *lock, int lock_size);
int _shmem_test_lock(void *lock, int lock_size);
void _shmem_clear_lock(void *lock, int lock_size);

void _shmem_mcs_set_lock(long *lock);
void _shmem_mcs_clear_lock(long *lock);
int _shmem_mcs_test_lock(long *lock);

#endif /*SHMEM_LOCK_H*/