Skip to content

Commit e3e9e1f

Browse files
committed
coll/han/alltoallv: correct loop condition on empty sends
This change makes the loop condition more straightforward, and corrects a logic error that was previously seen when a rank had 0 bytes to receive. With the new logic the first pass through the receive-positing section will walk through all datatypes, and never post a recv buffer. Sender logic remains unchanged.
1 parent 577968e commit e3e9e1f

File tree

1 file changed

+18
-3
lines changed

1 file changed

+18
-3
lines changed

ompi/mca/coll/han/coll_han_alltoallv.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ static int alltoallv_sendrecv_w(
270270
size_t nbytes_pack;
271271

272272
int nsend_req_pending = 0;
273+
int nrecv_req_pending = 0;
273274
opal_convertor_t send_convertor;
274275
opal_convertor_t recv_convertor;
275276
OBJ_CONSTRUCT( &send_convertor, opal_convertor_t );
@@ -323,7 +324,7 @@ static int alltoallv_sendrecv_w(
323324
int ii_more_sends_to_complete = nsend_req_pending > 0;
324325

325326
int ii_more_recvs_to_post = recv_post_remaining_bytes > 0;
326-
int ii_more_recvs_to_complete = recv_convertor_bytes_remaining > 0 || jtype_recv < ntypes_recv;
327+
int ii_more_recvs_to_complete = nrecv_req_pending > 0;
327328

328329

329330
if ( !( ii_more_sends_to_post || ii_more_sends_to_complete ||
@@ -336,10 +337,22 @@ static int alltoallv_sendrecv_w(
336337
/* Common Case: */
337338
/* wait for any send or recv to complete */
338339
rc = ompi_request_wait_any(nreqs, requests, &jreq, MPI_STATUS_IGNORE);
339-
if (rc != 0 || jreq == MPI_UNDEFINED) {
340-
return 1;
340+
if (rc != 0) {
341+
opal_output_verbose(1, mca_coll_han_component.han_output,
342+
"ompi_request_wait_any returned error code %d in alltoallv_sendrecv_w (loop=%ld)\n",rc,jloop);
343+
return rc;
341344
}
342345
have_completion = 1;
346+
if (jreq == MPI_UNDEFINED) {
347+
opal_output_verbose(1, mca_coll_han_component.han_output,
348+
"ERROR: no active requests to wait on! Loop=%ld: %d %d %d %d\n",
349+
jloop,
350+
ii_more_sends_to_post, ii_more_sends_to_complete,
351+
ii_more_recvs_to_post, ii_more_recvs_to_complete );
352+
have_completion = 0;
353+
jreq = jloop % nbufs;
354+
return MPI_ERR_INTERN;
355+
}
343356
} else {
344357
/* priming the loop: post sends or recvs while have_completion=0.
345358
@@ -401,6 +414,7 @@ static int alltoallv_sendrecv_w(
401414

402415
} else { /* recv request */
403416
if (have_completion) {
417+
nrecv_req_pending--;
404418
/* unpack data */
405419
ssize_t buf_remain = buf_len;
406420
size_t buf_converted = 0;
@@ -449,6 +463,7 @@ static int alltoallv_sendrecv_w(
449463

450464
/* update posted_recv_bytes */
451465
recv_post_remaining_bytes -= bytes_to_post;
466+
nrecv_req_pending++;
452467
} else {
453468
requests[jreq] = MPI_REQUEST_NULL;
454469
}

0 commit comments

Comments
 (0)