@@ -803,6 +803,7 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
803
803
{
804
804
struct mca_btl_base_module_t * * possible_btls = NULL ;
805
805
int comm_size = ompi_comm_size (comm );
806
+ int comm_rank = ompi_comm_rank (comm );
806
807
int rc = OMPI_SUCCESS , max_btls = 0 ;
807
808
unsigned int selected_latency = INT_MAX ;
808
809
struct mca_btl_base_module_t * selected_btl = NULL ;
@@ -842,10 +843,11 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
842
843
return OMPI_SUCCESS ;
843
844
}
844
845
845
- for (int i = 0 ; i < comm_size ; ++ i ) {
846
- ompi_proc_t * proc = ompi_comm_peer_lookup (comm , i );
846
+ for (int rank = 0 ; rank < comm_size ; ++ rank ) {
847
+ ompi_proc_t * proc = ompi_comm_peer_lookup (comm , rank );
847
848
mca_bml_base_endpoint_t * endpoint ;
848
849
int num_btls , prev_max ;
850
+ bool found_btl = false;
849
851
850
852
endpoint = mca_bml_base_get_endpoint (proc );
851
853
if (NULL == endpoint ) {
@@ -891,23 +893,30 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
891
893
for (int j = 0 ; j < max_btls ; ++ j ) {
892
894
if (endpoint -> btl_rdma .bml_btls [i_btl ].btl == possible_btls [j ]) {
893
895
++ btl_counts [j ];
896
+ found_btl = true;
894
897
break ;
895
898
} else if (NULL == possible_btls [j ]) {
896
899
possible_btls [j ] = endpoint -> btl_rdma .bml_btls [i_btl ].btl ;
897
900
btl_counts [j ] = 1 ;
901
+ found_btl = true;
898
902
break ;
899
903
}
900
904
}
901
905
}
902
906
}
907
+
908
+ /* any non-local rank must have a usable btl */
909
+ if (!found_btl && comm_rank == rank ) {
910
+ /* no btl = no rdma/atomics */
911
+ rc = OMPI_ERR_UNREACH ;
912
+ break ;
913
+ }
903
914
}
904
915
905
916
if (OMPI_SUCCESS != rc ) {
906
917
free (possible_btls );
907
918
free (btl_counts );
908
-
909
- /* no btl = no rdma/atomics */
910
- return OMPI_ERR_NOT_AVAILABLE ;
919
+ return rc ;
911
920
}
912
921
913
922
for (int i = 0 ; i < max_btls ; ++ i ) {
0 commit comments