Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit a8fcaa8

Browse files
committed
Merge pull request #980 from yburette/topic/merge_v2.x
mtl/ofi: merge commits from master to v2.x
2 parents b43afc0 + d2016f9 commit a8fcaa8

File tree

6 files changed

+46
-19
lines changed

6 files changed

+46
-19
lines changed

ompi/mca/mtl/ofi/mtl_ofi.c

+9
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,15 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
110110
*/
111111
for (i = 0; i < nprocs; ++i) {
112112
endpoint = OBJ_NEW(mca_mtl_ofi_endpoint_t);
113+
if (NULL == endpoint) {
114+
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
115+
"%s:%d: mtl/ofi: could not allocate endpoint"
116+
" structure\n",
117+
__FILE__, __LINE__);
118+
ret = OMPI_ERROR;
119+
goto bail;
120+
}
121+
113122
endpoint->mtl_ofi_module = &ompi_mtl_ofi;
114123
endpoint->peer_fiaddr = fi_addrs[i];
115124

ompi/mca/mtl/ofi/mtl_ofi.h

+14-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
2+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
33
*
44
* $COPYRIGHT$
55
*
@@ -51,10 +51,6 @@ BEGIN_C_DECLS
5151
extern mca_mtl_ofi_module_t ompi_mtl_ofi;
5252
extern mca_base_framework_t ompi_mtl_base_framework;
5353

54-
extern int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
55-
size_t nprocs,
56-
struct ompi_proc_t **procs);
57-
5854
extern int ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl,
5955
size_t nprocs,
6056
struct ompi_proc_t **procs);
@@ -236,7 +232,7 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
236232
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
237233

238234
ompi_proc = ompi_comm_peer_lookup(comm, dest);
239-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
235+
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
240236

241237
ompi_ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
242238
if (OMPI_SUCCESS != ompi_ret) return ompi_ret;
@@ -267,6 +263,7 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
267263
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
268264
"%s:%d: fi_trecv failed: %s(%zd)",
269265
__FILE__, __LINE__, fi_strerror(-ret), ret);
266+
free(ack_req);
270267
return ompi_mtl_ofi_get_error(ret);
271268
}
272269
} else {
@@ -285,6 +282,10 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
285282
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
286283
"%s:%d: fi_tinject failed: %s(%zd)",
287284
__FILE__, __LINE__, fi_strerror(-ret), ret);
285+
if (ack_req) {
286+
fi_cancel((fid_t)ompi_mtl_ofi.ep, &ack_req->ctx);
287+
free(ack_req);
288+
}
288289
return ompi_mtl_ofi_get_error(ret);
289290
}
290291

@@ -461,7 +462,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
461462
if (ompi_mtl_ofi.any_addr == ofi_req->remote_addr) {
462463
src = MTL_OFI_GET_SOURCE(wc->tag);
463464
ompi_proc = ompi_comm_peer_lookup(ofi_req->comm, src);
464-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
465+
endpoint = ompi_mtl_ofi_get_endpoint(ofi_req->mtl, ompi_proc);
465466
ofi_req->remote_addr = endpoint->peer_fiaddr;
466467
}
467468
MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep,
@@ -533,7 +534,7 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl,
533534

534535
if (MPI_ANY_SOURCE != src) {
535536
ompi_proc = ompi_comm_peer_lookup(comm, src);
536-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
537+
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
537538
remote_addr = endpoint->peer_fiaddr;
538539
} else {
539540
remote_addr = ompi_mtl_ofi.any_addr;
@@ -745,7 +746,7 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl,
745746
*/
746747
if (MPI_ANY_SOURCE != src) {
747748
ompi_proc = ompi_comm_peer_lookup( comm, src );
748-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
749+
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
749750
remote_proc = endpoint->peer_fiaddr;
750751
}
751752

@@ -830,7 +831,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
830831
*/
831832
if (MPI_ANY_SOURCE != src) {
832833
ompi_proc = ompi_comm_peer_lookup( comm, src );
833-
endpoint = ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
834+
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
834835
remote_proc = endpoint->peer_fiaddr;
835836
}
836837

@@ -865,11 +866,13 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
865866
* The search request completed but no matching message was found.
866867
*/
867868
*matched = 0;
869+
free(ofi_req);
868870
return OMPI_SUCCESS;
869871
} else if (OPAL_UNLIKELY(0 > ret)) {
870872
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
871873
"%s:%d: fi_trecvmsg failed: %s(%zd)",
872874
__FILE__, __LINE__, fi_strerror(-ret), ret);
875+
free(ofi_req);
873876
return ompi_mtl_ofi_get_error(ret);
874877
}
875878

@@ -895,6 +898,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
895898

896899
} else {
897900
(*message) = MPI_MESSAGE_NULL;
901+
free(ofi_req);
898902
}
899903

900904
return OMPI_SUCCESS;
@@ -962,7 +966,6 @@ ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl,
962966
return OMPI_SUCCESS;
963967
}
964968

965-
966969
END_C_DECLS
967970

968971
#endif /* MTL_OFI_H_HAS_BEEN_INCLUDED */

ompi/mca/mtl/ofi/mtl_ofi_component.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
3+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
44
*
55
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
66
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
@@ -241,6 +241,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
241241
hints->domain_attr->threading = FI_THREAD_UNSPEC;
242242
hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
243243
hints->domain_attr->resource_mgmt = FI_RM_ENABLED;
244+
hints->domain_attr->av_type = FI_AV_MAP;
244245

245246
/**
246247
* FI_VERSION provides binary backward and forward compatibility support

ompi/mca/mtl/ofi/mtl_ofi_endpoint.h

+14-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
2+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
33
*
44
* $COPYRIGHT$
55
*
@@ -11,10 +11,12 @@
1111
#ifndef OMPI_MTL_OFI_ENDPOINT_H
1212
#define OMPI_MTL_OFI_ENDPOINT_H
1313

14-
#include "mtl_ofi.h"
15-
1614
BEGIN_C_DECLS
1715

16+
extern int ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl,
17+
size_t nprocs,
18+
struct ompi_proc_t **procs);
19+
1820
OBJ_CLASS_DECLARATION(mca_mtl_ofi_endpoint_t);
1921

2022
/**
@@ -35,7 +37,15 @@ struct mca_mtl_ofi_endpoint_t {
3537
};
3638

3739
typedef struct mca_mtl_ofi_endpoint_t mca_mtl_ofi_endpoint_t;
38-
OBJ_CLASS_DECLARATION(mca_mtl_ofi_endpoint);
40+
41+
static inline mca_mtl_ofi_endpoint_t *ompi_mtl_ofi_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
42+
{
43+
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
44+
ompi_mtl_ofi_add_procs(mtl, 1, &ompi_proc);
45+
}
46+
47+
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
48+
}
3949

4050
END_C_DECLS
4151
#endif

ompi/mca/mtl/ofi/mtl_ofi_request.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
2+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
33
*
44
* $COPYRIGHT$
55
*
@@ -55,6 +55,10 @@ struct ompi_mtl_ofi_request_t {
5555
/* lookup source of an ANY_SOURCE Recv */
5656
struct ompi_communicator_t *comm;
5757

58+
/** Reference to the MTL used to lookup */
59+
/* source of an ANY_SOURCE Recv */
60+
struct mca_mtl_base_module_t* mtl;
61+
5862
/** Pack buffer */
5963
void *buffer;
6064

ompi/mca/mtl/ofi/mtl_ofi_types.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ typedef struct mca_mtl_ofi_component_t {
8484
{ \
8585
match_bits = contextid; \
8686
match_bits = (match_bits << 16); \
87-
match_bits |= source; \
87+
match_bits |= (uint64_t)source; \
8888
match_bits = (match_bits << 32); \
8989
match_bits |= (MTL_OFI_TAG_MASK & tag) | type; \
9090
}
@@ -106,7 +106,7 @@ typedef struct mca_mtl_ofi_component_t {
106106
match_bits = (match_bits << 32); \
107107
mask_bits |= MTL_OFI_SOURCE_MASK; \
108108
} else { \
109-
match_bits |= source; \
109+
match_bits |= (uint64_t)source; \
110110
match_bits = (match_bits << 32); \
111111
} \
112112
\

0 commit comments

Comments
 (0)