Skip to content

Commit 0eb6940

Browse files
committed
osc/rdma: Fix MPI_Win_start()/complete() with MPI_GROUP_EMPTY.
- Make sure the epoch type is set before returning from MPI_Win_start(). - Make sure the group is only free'd if it is valid in MPI_Win_complete(). - Fix possible double free() of the group. Signed-off-by: Austen Lauria <[email protected]>
1 parent 3d67c65 commit 0eb6940

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

ompi/mca/osc/rdma/osc_rdma_active_target.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,8 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int mpi_assert, ompi_win_t
385385

386386
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "start group size %d", sync->num_peers);
387387

388+
sync->type = OMPI_OSC_RDMA_SYNC_TYPE_PSCW;
389+
388390
if (0 == ompi_group_size (group)) {
389391
/* nothing more to do. this is an empty start epoch */
390392
OPAL_THREAD_UNLOCK(&module->lock);
@@ -393,8 +395,6 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int mpi_assert, ompi_win_t
393395

394396
opal_atomic_wmb ();
395397

396-
sync->type = OMPI_OSC_RDMA_SYNC_TYPE_PSCW;
397-
398398
/* prevent us from entering a passive-target, fence, or another pscw access epoch until
399399
* the matching complete is called */
400400
sync->epoch_active = true;
@@ -466,17 +466,21 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
466466
sync->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE;
467467
sync->epoch_active = false;
468468

469-
/* phase 2 cleanup group */
470-
OBJ_RELEASE(group);
471-
472469
peers = sync->peer_list.peers;
473470
if (NULL == peers) {
474471
/* empty peer list */
472+
if(MPI_GROUP_EMPTY != group) {
473+
OBJ_RELEASE(group);
474+
}
475475
OPAL_THREAD_UNLOCK(&(module->lock));
476-
OBJ_RELEASE(group);
477476
return OMPI_SUCCESS;
478477
}
479478

479+
/* phase 2 cleanup group */
480+
if(MPI_GROUP_EMPTY != group) {
481+
OBJ_RELEASE(group);
482+
}
483+
480484
sync->peer_list.peers = NULL;
481485

482486
OPAL_THREAD_UNLOCK(&(module->lock));
@@ -508,7 +512,6 @@ int ompi_osc_rdma_wait_atomic (ompi_win_t *win)
508512
{
509513
ompi_osc_rdma_module_t *module = GET_MODULE(win);
510514
ompi_osc_rdma_state_t *state = module->state;
511-
ompi_group_t *group;
512515
int group_size;
513516

514517
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait: %s", win->w_name);
@@ -532,12 +535,12 @@ int ompi_osc_rdma_wait_atomic (ompi_win_t *win)
532535
}
533536

534537
OPAL_THREAD_LOCK(&module->lock);
535-
group = module->pw_group;
538+
if(MPI_GROUP_EMPTY != module->pw_group) {
539+
OBJ_RELEASE(module->pw_group);
540+
}
536541
module->pw_group = NULL;
537542
OPAL_THREAD_UNLOCK(&module->lock);
538543

539-
OBJ_RELEASE(group);
540-
541544
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait complete");
542545

543546
return OMPI_SUCCESS;

0 commit comments

Comments
 (0)