Skip to content

Commit 981b885

Browse files
authored
Merge pull request #7951 from devreal/v4.1.x
(v4.1.x) osc/rdma: fail query_btls if no endpoint for non-local peer is found
2 parents bd16024 + 3d08d79 commit 981b885

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,7 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
803803
{
804804
struct mca_btl_base_module_t **possible_btls = NULL;
805805
int comm_size = ompi_comm_size (comm);
806+
int comm_rank = ompi_comm_rank (comm);
806807
int rc = OMPI_SUCCESS, max_btls = 0;
807808
unsigned int selected_latency = INT_MAX;
808809
struct mca_btl_base_module_t *selected_btl = NULL;
@@ -842,10 +843,11 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
842843
return OMPI_SUCCESS;
843844
}
844845

845-
for (int i = 0 ; i < comm_size ; ++i) {
846-
ompi_proc_t *proc = ompi_comm_peer_lookup (comm, i);
846+
for (int rank = 0 ; rank < comm_size ; ++rank) {
847+
ompi_proc_t *proc = ompi_comm_peer_lookup (comm, rank);
847848
mca_bml_base_endpoint_t *endpoint;
848849
int num_btls, prev_max;
850+
bool found_btl = false;
849851

850852
endpoint = mca_bml_base_get_endpoint (proc);
851853
if (NULL == endpoint) {
@@ -891,23 +893,30 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
891893
for (int j = 0 ; j < max_btls ; ++j) {
892894
if (endpoint->btl_rdma.bml_btls[i_btl].btl == possible_btls[j]) {
893895
++btl_counts[j];
896+
found_btl = true;
894897
break;
895898
} else if (NULL == possible_btls[j]) {
896899
possible_btls[j] = endpoint->btl_rdma.bml_btls[i_btl].btl;
897900
btl_counts[j] = 1;
901+
found_btl = true;
898902
break;
899903
}
900904
}
901905
}
902906
}
907+
908+
/* any non-local rank must have a usable btl */
909+
if (!found_btl && comm_rank == rank) {
910+
/* no btl = no rdma/atomics */
911+
rc = OMPI_ERR_UNREACH;
912+
break;
913+
}
903914
}
904915

905916
if (OMPI_SUCCESS != rc) {
906917
free (possible_btls);
907918
free (btl_counts);
908-
909-
/* no btl = no rdma/atomics */
910-
return OMPI_ERR_NOT_AVAILABLE;
919+
return rc;
911920
}
912921

913922
for (int i = 0 ; i < max_btls ; ++i) {

0 commit comments

Comments
 (0)