@@ -190,15 +190,15 @@ static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t r
190190 mca_pml_ob1_recv_request_t * recvreq = (mca_pml_ob1_recv_request_t * ) frag -> rdma_req ;
191191 mca_bml_base_btl_t * bml_btl = frag -> rdma_bml ;
192192
193- OPAL_THREAD_SUB_SIZE_T (& recvreq -> req_pipeline_depth , 1 );
193+ OPAL_THREAD_ADD32 (& recvreq -> req_pipeline_depth , - 1 );
194194
195195 MCA_PML_OB1_RDMA_FRAG_RETURN (frag );
196196
197197 if (OPAL_LIKELY (0 < rdma_size )) {
198198 assert ((uint64_t ) rdma_size == frag -> rdma_length );
199199
200200 /* check completion status */
201- OPAL_THREAD_ADD_SIZE_T (& recvreq -> req_bytes_received , ( size_t ) rdma_size );
201+ OPAL_THREAD_ADD_SIZE_T (& recvreq -> req_bytes_received , rdma_size );
202202 if (recv_request_pml_complete_check (recvreq ) == false &&
203203 recvreq -> req_rdma_offset < recvreq -> req_send_offset ) {
204204 /* schedule additional rdma operations */
@@ -951,7 +951,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
951951 }
952952
953953 while (bytes_remaining > 0 &&
954- recvreq -> req_pipeline_depth < mca_pml_ob1 .recv_pipeline_depth ) {
954+ recvreq -> req_pipeline_depth < mca_pml_ob1 .recv_pipeline_depth ) {
955955 mca_pml_ob1_rdma_frag_t * frag = NULL ;
956956 mca_btl_base_module_t * btl ;
957957 int rc , rdma_idx ;
@@ -983,14 +983,10 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
983983 } while (!size );
984984 btl = bml_btl -> btl ;
985985
986- /* NTH: This conditional used to check if there was a registration in
987- * recvreq->req_rdma[rdma_idx].btl_reg. If once existed it was due to
988- * the btl not needed registration (equivalent to btl->btl_register_mem
989- * != NULL. This new check is equivalent. Note: I feel this protocol
990- * needs work to better improve resource usage when running with a
991- * leave pinned protocol. */
992- if (btl -> btl_register_mem && (btl -> btl_rdma_pipeline_frag_size != 0 ) &&
993- (size > btl -> btl_rdma_pipeline_frag_size )) {
986+ /* NTH: Note: I feel this protocol needs work to better improve resource
987+ * usage when running with a leave pinned protocol. */
988+ /* GB: We should always abide by the BTL RDMA pipeline fragment limit (if one is set) */
989+ if ((btl -> btl_rdma_pipeline_frag_size != 0 ) && (size > btl -> btl_rdma_pipeline_frag_size )) {
994990 size = btl -> btl_rdma_pipeline_frag_size ;
995991 }
996992
@@ -1028,7 +1024,7 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
10281024 if (OPAL_LIKELY (OMPI_SUCCESS == rc )) {
10291025 /* update request state */
10301026 recvreq -> req_rdma_offset += size ;
1031- OPAL_THREAD_ADD_SIZE_T (& recvreq -> req_pipeline_depth , 1 );
1027+ OPAL_THREAD_ADD32 (& recvreq -> req_pipeline_depth , 1 );
10321028 recvreq -> req_rdma [rdma_idx ].length -= size ;
10331029 bytes_remaining -= size ;
10341030 } else {
0 commit comments