Skip to content

Commit 92bba55

Browse files
committed
btl/base_am_rdma: fix handling of btl_send returning 1
Active message RDMA uses btl_send to send the initial request and RDMA response. btl_send will return 0 when the descriptor has been successfully queued for send, and will return 1 when the desciptor has been successfully sent. Currently, active message RDMA treats the return value 1 as an error, thus will either return the value to caller, or retry the send. This patch address the issue by correctly handling the return value 1. Signed-off-by: Wei Zhang <[email protected]>
1 parent b277aba commit 92bba55

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

opal/mca/btl/base/btl_base_am_rdma.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ static inline int mca_btl_base_am_rdma_advance(mca_btl_base_module_t *btl,
372372
mca_btl_base_rdma_context_t *context,
373373
bool send_descriptor)
374374
{
375+
int ret;
375376
const size_t remaining = context->total_size - context->sent;
376377

377378
if (0 == remaining) {
@@ -409,7 +410,13 @@ static inline int mca_btl_base_am_rdma_advance(mca_btl_base_module_t *btl,
409410
}
410411

411412
if (send_descriptor) {
412-
return btl->btl_send(btl, endpoint, descriptor, mca_btl_base_rdma_tag(hdr->type));
413+
/* This descriptor was created with MCA_BTL_DES_SEND_ALWAYS_CALLBACK set, so safe to treat
414+
* OPAL_SUCCESS and 1 return codes the same */
415+
assert(0 != (descriptor->des_flags && MCA_BTL_DES_SEND_ALWAYS_CALLBACK));
416+
ret = btl->btl_send(btl, endpoint, descriptor, mca_btl_base_rdma_tag(hdr->type));
417+
if (ret == 1)
418+
ret = OPAL_SUCCESS;
419+
return ret;
413420
}
414421

415422
/* queue for later to avoid btl_send in callback */
@@ -614,7 +621,13 @@ static int mca_btl_base_am_rdma_respond(mca_btl_base_module_t *btl,
614621

615622
send_descriptor->des_cbfunc = NULL;
616623

624+
/* There is no callback for the response descriptor, therefore it is
625+
* safe to treat 0 and 1 return codes the same
626+
*/
617627
int ret = btl->btl_send(btl, endpoint, send_descriptor, mca_btl_base_rdma_resp_tag());
628+
if (ret == 1)
629+
ret = OPAL_SUCCESS;
630+
618631
if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
619632
*descriptor = send_descriptor;
620633
}
@@ -779,6 +792,8 @@ static int mca_btl_base_am_rdma_progress(void)
779792

780793
OPAL_THREAD_SCOPED_LOCK(&default_module.mutex, ACTION1);
781794

795+
/* The initiator descriptor was created with MCA_BTL_DES_SEND_ALWAYS_CALLBACK set, so safe to treat
796+
* 0 and 1 return codes of btl_send the same */
782797
#define ACTION2 \
783798
mca_btl_base_am_rdma_queued_descriptor_t *descriptor, *next; \
784799
OPAL_LIST_FOREACH_SAFE (descriptor, next, \
@@ -787,11 +802,12 @@ static int mca_btl_base_am_rdma_progress(void)
787802
mca_btl_base_rdma_context_t *context = \
788803
(mca_btl_base_rdma_context_t *) \
789804
descriptor->descriptor->des_context; \
805+
assert(0 != (descriptor->descriptor->des_flags && MCA_BTL_DES_SEND_ALWAYS_CALLBACK)); \
790806
int ret = descriptor->btl->btl_send(descriptor->btl, \
791807
descriptor->endpoint, \
792808
descriptor->descriptor, \
793809
mca_btl_base_rdma_tag(context->type)); \
794-
if (OPAL_SUCCESS == ret) { \
810+
if (OPAL_SUCCESS == ret || 1 == ret) { \
795811
opal_list_remove_item(&default_module.queued_initiator_descriptors, \
796812
&descriptor->super); \
797813
} \

0 commit comments

Comments
 (0)