Skip to content

Commit a454341

Browse files
committed
add support for mxm 2.0
This commit was SVN r27661.
1 parent 4db2c69 commit a454341

File tree

2 files changed

+64
-4
lines changed

2 files changed

+64
-4
lines changed

ompi/mca/mtl/mxm/mtl_mxm.c

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,22 +84,45 @@ static uint32_t ompi_mtl_mxm_get_job_id(void)
8484

8585
int ompi_mtl_mxm_progress(void);
8686

87+
#if MXM_API < MXM_VERSION(2, 0)
8788
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
8889
{
8990
size_t addrlen;
9091
mxm_error_t err;
9192

9293
addrlen = sizeof(ep_info->ptl_addr[ptlid]);
9394
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
94-
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
95+
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
9596
if (MXM_OK != err) {
9697
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
97-
true, (int)ptlid, mxm_error_string(err));
98+
true, (int)ptlid, mxm_error_string(err));
9899
return OMPI_ERROR;
99100
}
100101

101102
return OMPI_SUCCESS;
102103
}
104+
#else
105+
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info,
106+
mxm_domain_id_t domain)
107+
{
108+
size_t addrlen;
109+
mxm_error_t err;
110+
111+
addrlen = sizeof(ep_info->dest_addr[domain]);
112+
err = mxm_ep_address(ompi_mtl_mxm.ep, domain,
113+
(struct sockaddr *) &ep_info->dest_addr[domain], &addrlen);
114+
if (MXM_OK == err) {
115+
ep_info->domain_bitmap |= MXM_BIT(domain);
116+
return OMPI_SUCCESS;
117+
} else if (MXM_ERR_UNREACHABLE == err) {
118+
return OMPI_SUCCESS;
119+
} else {
120+
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
121+
true, (int)domain, mxm_error_string(err));
122+
return OMPI_ERROR;
123+
}
124+
}
125+
#endif
103126

104127
#define max(a,b) ((a)>(b)?(a):(b))
105128

@@ -159,6 +182,17 @@ static mxm_error_t ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_
159182
return err;
160183
}
161184

185+
#if MXM_API >= MXM_VERSION(2,0)
186+
static void ompi_mtl_mxm_set_conn_req(mxm_conn_req_t *conn_req, ompi_mtl_mxm_ep_conn_info_t *ep_info,
187+
mxm_domain_id_t domain)
188+
{
189+
if (ep_info->domain_bitmap & MXM_BIT(domain)) {
190+
conn_req->addr[domain] = (struct sockaddr *)&(ep_info->dest_addr[domain]);
191+
} else {
192+
conn_req->addr[domain] = NULL;
193+
}
194+
}
195+
#endif
162196

163197
int ompi_mtl_mxm_module_init(void)
164198
{
@@ -168,7 +202,7 @@ int ompi_mtl_mxm_module_init(void)
168202
uint32_t jobid;
169203
uint64_t mxlr;
170204
ompi_proc_t *mp, **procs;
171-
unsigned ptl_bitmap;
205+
unsigned ptl_bitmap;
172206
size_t totps, proc;
173207
int lr, nlps;
174208

@@ -210,8 +244,10 @@ int ompi_mtl_mxm_module_init(void)
210244
/* Setup the endpoint options and local addresses to bind to. */
211245
#if MXM_API < MXM_VERSION(1,5)
212246
ptl_bitmap = ompi_mtl_mxm.mxm_opts.ptl_bitmap;
213-
#else
247+
#elif MXM_API < MXM_VERSION(2,0)
214248
ptl_bitmap = ompi_mtl_mxm.mxm_opts->ptl_bitmap;
249+
#else
250+
ptl_bitmap = 0;
215251
#endif
216252

217253
/* Open MXM endpoint */
@@ -227,6 +263,7 @@ int ompi_mtl_mxm_module_init(void)
227263
/*
228264
* Get address for each PTL on this endpoint, and share it with other ranks.
229265
*/
266+
#if MXM_API < MXM_VERSION(2,0)
230267
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) &&
231268
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
232269
return OMPI_ERROR;
@@ -239,6 +276,18 @@ int ompi_mtl_mxm_module_init(void)
239276
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
240277
return OMPI_ERROR;
241278
}
279+
#else
280+
ep_info.domain_bitmap = 0;
281+
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SELF)) {
282+
return OMPI_ERROR;
283+
}
284+
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SHM)) {
285+
return OMPI_ERROR;
286+
}
287+
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_IB)) {
288+
return OMPI_ERROR;
289+
}
290+
#endif
242291

243292
/*
244293
* send information using modex (in some case there is limitation on data size for example ess/pmi)
@@ -350,9 +399,15 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
350399
free(modex_name);
351400
}
352401

402+
#if MXM_API < MXM_VERSION(2,0)
353403
conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
354404
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
355405
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
406+
#else
407+
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_SELF);
408+
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_SHM);
409+
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_IB);
410+
#endif
356411
}
357412

358413
/* Connect to remote peers */

ompi/mca/mtl/mxm/mtl_mxm_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,12 @@ typedef struct mca_mtl_mxm_module_t {
3939

4040

4141
typedef struct ompi_mtl_mxm_ep_conn_info_t {
42+
#if MXM_API < MXM_VERSION(2,0)
4243
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
44+
#else
45+
unsigned domain_bitmap;
46+
struct sockaddr_storage dest_addr[MXM_DOMAIN_LAST];
47+
#endif
4348
} ompi_mtl_mxm_ep_conn_info_t;
4449

4550
extern mca_mtl_mxm_module_t ompi_mtl_mxm;

0 commit comments

Comments
 (0)