Skip to content

Commit 87d41da

Browse files
committed
btl/vader: add support for atomics and emulated rdma
This commit adds support for atomic operations as well as rdma for systems without rdma support. This support is implemented using an internal send tag. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 08f9ae9 commit 87d41da

9 files changed

+482
-25
lines changed

opal/mca/btl/btl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ typedef uint8_t mca_btl_base_tag_t;
189189
#define MCA_BTL_TAG_IB (MCA_BTL_TAG_BTL + 0)
190190
#define MCA_BTL_TAG_UDAPL (MCA_BTL_TAG_BTL + 1)
191191
#define MCA_BTL_TAG_SMCUDA (MCA_BTL_TAG_BTL + 2)
192+
#define MCA_BTL_TAG_VADER (MCA_BTL_TAG_BTL + 3)
192193

193194
/* prefered protocol */
194195
#define MCA_BTL_FLAGS_SEND 0x0001

opal/mca/btl/vader/Makefile.am

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ libmca_btl_vader_la_sources = \
4040
btl_vader_xpmem.c \
4141
btl_vader_xpmem.h \
4242
btl_vader_knem.c \
43-
btl_vader_knem.h
43+
btl_vader_knem.h \
44+
btl_vader_sc_emu.c \
45+
btl_vader_atomic.c
4446

4547
# Make the output library in this directory, and name it either
4648
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la

opal/mca/btl/vader/btl_vader.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
1414
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
15-
* Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights
15+
* Copyright (c) 2010-2018 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
1818
*
@@ -92,6 +92,7 @@ enum {
9292
MCA_BTL_VADER_CMA = 1,
9393
MCA_BTL_VADER_KNEM = 2,
9494
MCA_BTL_VADER_NONE = 3,
95+
MCA_BTL_VADER_EMUL = 4,
9596
};
9697

9798
/**
@@ -233,6 +234,11 @@ int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
233234
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
234235
#endif
235236

237+
int mca_btl_vader_put_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
238+
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
239+
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
240+
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
241+
236242
/**
237243
* Initiate an synchronous get.
238244
*
@@ -261,6 +267,29 @@ int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
261267
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
262268
#endif
263269

270+
int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
271+
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
272+
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
273+
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
274+
275+
int mca_btl_vader_emu_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
276+
uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
277+
mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
278+
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
279+
280+
int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
281+
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
282+
mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
283+
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
284+
void *cbcontext, void *cbdata);
285+
286+
int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
287+
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
288+
mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
289+
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
290+
291+
void mca_btl_vader_sc_emu_init (void);
292+
264293
/**
265294
* Allocate a segment.
266295
*

opal/mca/btl/vader/btl_vader_atomic.c

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights
4+
* reserved.
5+
* $COPYRIGHT$
6+
*
7+
* Additional copyrights may follow
8+
*
9+
* $HEADER$
10+
*/
11+
12+
#include "opal_config.h"
13+
14+
#include "btl_vader.h"
15+
#include "btl_vader_frag.h"
16+
#include "btl_vader_endpoint.h"
17+
#include "btl_vader_xpmem.h"
18+
19+
static void mca_btl_vader_sc_emu_aop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
20+
mca_btl_base_descriptor_t *desc, int status)
21+
{
22+
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
23+
void *local_address = frag->rdma.local_address;
24+
25+
frag->rdma.cbfunc (btl, endpoint, local_address, NULL, frag->rdma.context, frag->rdma.cbdata, status);
26+
}
27+
28+
int mca_btl_vader_emu_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
29+
uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
30+
mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
31+
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
32+
{
33+
mca_btl_vader_frag_t *frag;
34+
35+
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags, NULL,
36+
remote_address, cbfunc, cbcontext, cbdata, mca_btl_vader_sc_emu_aop_complete);
37+
if (OPAL_UNLIKELY(NULL == frag)) {
38+
return OPAL_ERR_OUT_OF_RESOURCE;
39+
}
40+
41+
/* send is always successful */
42+
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
43+
44+
return OPAL_SUCCESS;
45+
}
46+
47+
static void mca_btl_vader_sc_emu_afop_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
48+
mca_btl_base_descriptor_t *desc, int status)
49+
{
50+
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) desc;
51+
mca_btl_vader_sc_emu_hdr_t *hdr;
52+
void *local_address = frag->rdma.local_address;
53+
54+
hdr = (mca_btl_vader_sc_emu_hdr_t *) frag->segments[0].seg_addr.pval;
55+
56+
*((int64_t *) frag->rdma.local_address) = hdr->operand[0];
57+
58+
frag->rdma.cbfunc (btl, endpoint, local_address, NULL, frag->rdma.context, frag->rdma.cbdata, status);
59+
}
60+
61+
int mca_btl_vader_emu_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
62+
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
63+
mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
64+
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
65+
void *cbcontext, void *cbdata)
66+
{
67+
mca_btl_vader_frag_t *frag;
68+
69+
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_ATOMIC, operand, 0, op, 0, order, flags,
70+
local_address, remote_address, cbfunc, cbcontext, cbdata,
71+
mca_btl_vader_sc_emu_afop_complete);
72+
if (OPAL_UNLIKELY(NULL == frag)) {
73+
return OPAL_ERR_OUT_OF_RESOURCE;
74+
}
75+
76+
/* send is always successful */
77+
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
78+
79+
return OPAL_SUCCESS;
80+
}
81+
82+
int mca_btl_vader_emu_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
83+
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
84+
mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
85+
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
86+
{
87+
mca_btl_vader_frag_t *frag;
88+
89+
frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_CSWAP, compare, value, 0, 0, order,
90+
flags, local_address, remote_address, cbfunc, cbcontext, cbdata,
91+
mca_btl_vader_sc_emu_afop_complete);
92+
if (OPAL_UNLIKELY(NULL == frag)) {
93+
return OPAL_ERR_OUT_OF_RESOURCE;
94+
}
95+
96+
/* send is always successful */
97+
(void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);
98+
99+
return OPAL_SUCCESS;
100+
}

opal/mca/btl/vader/btl_vader_component.c

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
1414
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
15-
* Copyright (c) 2010-2017 Los Alamos National Security, LLC.
15+
* Copyright (c) 2010-2018 Los Alamos National Security, LLC.
1616
* All rights reserved.
1717
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
1818
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
@@ -68,6 +68,7 @@ static mca_base_var_enum_value_t single_copy_mechanisms[] = {
6868
#if OPAL_BTL_VADER_HAVE_KNEM
6969
{.value = MCA_BTL_VADER_KNEM, .string = "knem"},
7070
#endif
71+
{.value = MCA_BTL_VADER_EMUL, .string = "emulated"},
7172
{.value = MCA_BTL_VADER_NONE, .string = "none"},
7273
{.value = 0, .string = NULL}
7374
};
@@ -95,14 +96,6 @@ mca_btl_vader_component_t mca_btl_vader_component = {
9596
} /* end super */
9697
};
9798

98-
static void mca_btl_vader_dummy_rdma (void)
99-
{
100-
/* If a backtrace ends at this function something has gone wrong with
101-
* the btl bootstrapping. Check that the btl_get function was set to
102-
* something reasonable. */
103-
abort ();
104-
}
105-
10699
static int mca_btl_vader_component_register (void)
107100
{
108101
mca_base_var_enum_t *new_enum;
@@ -119,7 +112,7 @@ static int mca_btl_vader_component_register (void)
119112
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
120113
MCA_BASE_VAR_SCOPE_LOCAL,
121114
&mca_btl_vader_component.vader_free_list_num);
122-
mca_btl_vader_component.vader_free_list_max = 4096;
115+
mca_btl_vader_component.vader_free_list_max = 512;
123116
(void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
124117
"free_list_max", "Maximum number of fragments "
125118
"to allocate for shared memory communication.",
@@ -252,20 +245,42 @@ static int mca_btl_vader_component_register (void)
252245
mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
253246
mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit;
254247

255-
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
248+
#if OPAL_HAVE_ATOMIC_MATH_64
249+
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA |
250+
MCA_BTL_FLAGS_ATOMIC_OPS | MCA_BTL_FLAGS_ATOMIC_FOPS;
251+
252+
mca_btl_vader.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_AND |
253+
MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | MCA_BTL_ATOMIC_SUPPORTS_CSWAP |
254+
MCA_BTL_ATOMIC_SUPPORTS_GLOB | MCA_BTL_ATOMIC_SUPPORTS_SWAP;
255+
#if OPAL_HAVE_ATOMIC_MATH_32
256+
mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_32BIT;
257+
#endif /* OPAL_HAVE_ATOMIC_MATH_32 */
258+
259+
#if OPAL_HAVE_ATOMIC_MIN_64
260+
mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN;
261+
#endif /* OPAL_HAVE_ATOMIC_MIN_64 */
262+
263+
#if OPAL_HAVE_ATOMIC_MAX_64
264+
mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MAX;
265+
#endif /* OPAL_HAVE_ATOMIC_MAX_64 */
266+
267+
#else
268+
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA;
269+
#endif /* OPAL_HAVE_ATOMIC_MATH_64 */
256270

257271
if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
258-
mca_btl_vader.super.btl_flags |= MCA_BTL_FLAGS_RDMA;
259-
/* Single copy mechanisms should provide better bandwidth */
272+
/* True single copy mechanisms should provide better bandwidth */
260273
mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */
261-
262-
/* Set dummy values so the RDMA flag doesn't get unset by mca_btl_base_param_verify() */
263-
mca_btl_vader.super.btl_get = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma;
264-
mca_btl_vader.super.btl_put = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma;
265274
} else {
266275
mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */
267276
}
268277

278+
mca_btl_vader.super.btl_get = mca_btl_vader_get_sc_emu;
279+
mca_btl_vader.super.btl_put = mca_btl_vader_put_sc_emu;
280+
mca_btl_vader.super.btl_atomic_op = mca_btl_vader_emu_aop;
281+
mca_btl_vader.super.btl_atomic_fop = mca_btl_vader_emu_afop;
282+
mca_btl_vader.super.btl_atomic_cswap = mca_btl_vader_emu_acswap;
283+
269284
mca_btl_vader.super.btl_latency = 1; /* Microsecs */
270285

271286
/* Call the BTL based to register its MCA params */
@@ -350,7 +365,6 @@ static int mca_btl_base_vader_modex_send (void)
350365
return rc;
351366
}
352367

353-
#if OPAL_BTL_VADER_HAVE_XPMEM || OPAL_BTL_VADER_HAVE_CMA || OPAL_BTL_VADER_HAVE_KNEM
354368
static void mca_btl_vader_select_next_single_copy_mechanism (void)
355369
{
356370
for (int i = 0 ; single_copy_mechanisms[i].value != MCA_BTL_VADER_NONE ; ++i) {
@@ -446,8 +460,14 @@ static void mca_btl_vader_check_single_copy (void)
446460
mca_btl_vader.super.btl_get = NULL;
447461
mca_btl_vader.super.btl_put = NULL;
448462
}
463+
464+
if (MCA_BTL_VADER_EMUL == mca_btl_vader_component.single_copy_mechanism) {
465+
mca_btl_vader_sc_emu_init ();
466+
/* limit to the maximum fragment size */
467+
mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
468+
mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
469+
}
449470
}
450-
#endif
451471

452472
/*
453473
* VADER component initialization
@@ -497,11 +517,10 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
497517
component->num_fbox_in_endpoints = 0;
498518
component->fbox_count = 0;
499519

500-
#if OPAL_BTL_VADER_HAVE_XPMEM || OPAL_BTL_VADER_HAVE_CMA || OPAL_BTL_VADER_HAVE_KNEM
501520
mca_btl_vader_check_single_copy ();
502-
#endif
503521

504522
if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
523+
const char *base_dir = opal_process_info.proc_session_dir;
505524
char *sm_file;
506525

507526
rc = asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%x.%d", mca_btl_vader_component.backing_directory,

0 commit comments

Comments
 (0)