Skip to content

Commit d52a2d0

Browse files
committed
ompi/comm: refactor communicator cid code
This commit simplifies the communicator context ID generation by removing the blocking code. The high level calls: ompi_comm_nextcid and ompi_comm_activate remain but now call the non-blocking variants and wait on the resulting request. This was done to remove the parallel paths for context ID generation in preperation for further improvements of the CID generation code. Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from commit 035c2e2) Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 1f21f54 commit d52a2d0

File tree

9 files changed

+718
-1195
lines changed

9 files changed

+718
-1195
lines changed

ompi/communicator/comm.c

Lines changed: 36 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -358,13 +358,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group,
358358
}
359359

360360
/* Determine context id. It is identical to f_2_c_handle */
361-
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
362-
comm, /* old comm */
363-
NULL, /* bridge comm */
364-
NULL, /* local leader */
365-
NULL, /* remote_leader */
366-
mode, /* mode */
367-
-1 ); /* send first */
361+
rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode);
368362
if ( OMPI_SUCCESS != rc ) {
369363
goto exit;
370364
}
@@ -374,13 +368,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group,
374368
newcomp->c_contextid, comm->c_contextid );
375369

376370
/* Activate the communicator and init coll-component */
377-
rc = ompi_comm_activate( &newcomp, /* new communicator */
378-
comm,
379-
NULL,
380-
NULL,
381-
NULL,
382-
mode,
383-
-1 );
371+
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
384372
if ( OMPI_SUCCESS != rc ) {
385373
goto exit;
386374
}
@@ -609,13 +597,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
609597
}
610598

611599
/* Determine context id. It is identical to f_2_c_handle */
612-
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
613-
comm, /* old comm */
614-
NULL, /* bridge comm */
615-
NULL, /* local leader */
616-
NULL, /* remote_leader */
617-
mode, /* mode */
618-
-1 ); /* send first, doesn't matter */
600+
rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode);
619601
if ( OMPI_SUCCESS != rc ) {
620602
goto exit;
621603
}
@@ -634,36 +616,15 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
634616

635617

636618
/* Activate the communicator and init coll-component */
637-
rc = ompi_comm_activate( &newcomp, /* new communicator */
638-
comm,
639-
NULL,
640-
NULL,
641-
NULL,
642-
mode,
643-
-1 );
644-
if ( OMPI_SUCCESS != rc ) {
645-
goto exit;
646-
}
619+
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
647620

648621
exit:
649-
if ( NULL != results ) {
650-
free ( results );
651-
}
652-
if ( NULL != sorted ) {
653-
free ( sorted );
654-
}
655-
if ( NULL != rresults) {
656-
free ( rresults );
657-
}
658-
if ( NULL != rsorted ) {
659-
free ( rsorted );
660-
}
661-
if ( NULL != lranks ) {
662-
free ( lranks );
663-
}
664-
if ( NULL != rranks ) {
665-
free ( rranks );
666-
}
622+
free ( results );
623+
free ( sorted );
624+
free ( rresults );
625+
free ( rsorted );
626+
free ( lranks );
627+
free ( rranks );
667628

668629
/* Step 4: if we are not part of the comm, free the struct */
669630
/* --------------------------------------------------------- */
@@ -675,7 +636,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
675636
}
676637

677638
*newcomm = newcomp;
678-
return ( rc );
639+
return rc;
679640
}
680641

681642

@@ -925,13 +886,7 @@ ompi_comm_split_type(ompi_communicator_t *comm,
925886
}
926887

927888
/* Determine context id. It is identical to f_2_c_handle */
928-
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
929-
comm, /* old comm */
930-
NULL, /* bridge comm */
931-
NULL, /* local leader */
932-
NULL, /* remote_leader */
933-
mode, /* mode */
934-
-1 ); /* send first, doesn't matter */
889+
rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode);
935890
if ( OMPI_SUCCESS != rc ) {
936891
goto exit;
937892
}
@@ -950,13 +905,7 @@ ompi_comm_split_type(ompi_communicator_t *comm,
950905

951906

952907
/* Activate the communicator and init coll-component */
953-
rc = ompi_comm_activate( &newcomp, /* new communicator */
954-
comm,
955-
NULL,
956-
NULL,
957-
NULL,
958-
mode,
959-
-1 );
908+
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
960909
if ( OMPI_SUCCESS != rc ) {
961910
goto exit;
962911
}
@@ -1031,13 +980,7 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, omp
1031980
}
1032981

1033982
/* Determine context id. It is identical to f_2_c_handle */
1034-
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
1035-
comm, /* old comm */
1036-
NULL, /* bridge comm */
1037-
NULL, /* local leader */
1038-
NULL, /* remote_leader */
1039-
mode, /* mode */
1040-
-1 ); /* send_first */
983+
rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode);
1041984
if ( OMPI_SUCCESS != rc ) {
1042985
return rc;
1043986
}
@@ -1047,13 +990,7 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, omp
1047990
newcomp->c_contextid, comm->c_contextid );
1048991

1049992
/* activate communicator and init coll-module */
1050-
rc = ompi_comm_activate( &newcomp, /* new communicator */
1051-
comm,
1052-
NULL,
1053-
NULL,
1054-
NULL,
1055-
mode,
1056-
-1 );
993+
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
1057994
if ( OMPI_SUCCESS != rc ) {
1058995
return rc;
1059996
}
@@ -1062,11 +999,15 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, omp
1062999
return MPI_SUCCESS;
10631000
}
10641001

1065-
struct ompi_comm_idup_with_info_context {
1002+
struct ompi_comm_idup_with_info_context_t {
1003+
opal_object_t super;
10661004
ompi_communicator_t *comm;
10671005
ompi_communicator_t *newcomp;
10681006
};
10691007

1008+
typedef struct ompi_comm_idup_with_info_context_t ompi_comm_idup_with_info_context_t;
1009+
OBJ_CLASS_INSTANCE(ompi_comm_idup_with_info_context_t, opal_object_t, NULL, NULL);
1010+
10701011
static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request);
10711012
static int ompi_comm_idup_with_info_finish (ompi_comm_request_t *request);
10721013
static int ompi_comm_idup_getcid (ompi_comm_request_t *request);
@@ -1085,7 +1026,7 @@ int ompi_comm_idup_with_info (ompi_communicator_t *comm, ompi_info_t *info, ompi
10851026
static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_group_t *remote_group,
10861027
ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req)
10871028
{
1088-
struct ompi_comm_idup_with_info_context *context;
1029+
ompi_comm_idup_with_info_context_t *context;
10891030
ompi_comm_request_t *request;
10901031
ompi_request_t *subreq[1];
10911032
int rc;
@@ -1101,15 +1042,15 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro
11011042
return OMPI_ERR_OUT_OF_RESOURCE;
11021043
}
11031044

1104-
context = calloc (1, sizeof (*context));
1045+
context = OBJ_NEW(ompi_comm_idup_with_info_context_t);
11051046
if (NULL == context) {
11061047
ompi_comm_request_return (request);
11071048
return OMPI_ERR_OUT_OF_RESOURCE;
11081049
}
11091050

11101051
context->comm = comm;
11111052

1112-
request->context = context;
1053+
request->context = &context->super;
11131054

11141055
rc = ompi_comm_set_nb (&context->newcomp, /* new comm */
11151056
comm, /* old comm */
@@ -1142,8 +1083,8 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro
11421083

11431084
static int ompi_comm_idup_getcid (ompi_comm_request_t *request)
11441085
{
1145-
struct ompi_comm_idup_with_info_context *context =
1146-
(struct ompi_comm_idup_with_info_context *) request->context;
1086+
ompi_comm_idup_with_info_context_t *context =
1087+
(ompi_comm_idup_with_info_context_t *) request->context;
11471088
ompi_request_t *subreq[1];
11481089
int rc, mode;
11491090

@@ -1154,11 +1095,8 @@ static int ompi_comm_idup_getcid (ompi_comm_request_t *request)
11541095
}
11551096

11561097
/* Determine context id. It is identical to f_2_c_handle */
1157-
rc = ompi_comm_nextcid_nb (context->newcomp, /* new communicator */
1158-
context->comm, /* old comm */
1159-
NULL, /* bridge comm */
1160-
mode, /* mode */
1161-
subreq); /* new subrequest */
1098+
rc = ompi_comm_nextcid_nb (context->newcomp, context->comm, NULL, NULL,
1099+
NULL, false, mode, subreq);
11621100
if (OMPI_SUCCESS != rc) {
11631101
ompi_comm_request_return (request);
11641102
return rc;
@@ -1171,8 +1109,8 @@ static int ompi_comm_idup_getcid (ompi_comm_request_t *request)
11711109

11721110
static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request)
11731111
{
1174-
struct ompi_comm_idup_with_info_context *context =
1175-
(struct ompi_comm_idup_with_info_context *) request->context;
1112+
ompi_comm_idup_with_info_context_t *context =
1113+
(ompi_comm_idup_with_info_context_t *) request->context;
11761114
ompi_request_t *subreq[1];
11771115
int rc, mode;
11781116

@@ -1187,7 +1125,7 @@ static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request)
11871125
context->newcomp->c_contextid, context->comm->c_contextid );
11881126

11891127
/* activate communicator and init coll-module */
1190-
rc = ompi_comm_activate_nb (&context->newcomp, context->comm, NULL, mode, subreq);
1128+
rc = ompi_comm_activate_nb (&context->newcomp, context->comm, NULL, NULL, NULL, false, mode, subreq);
11911129
if ( OMPI_SUCCESS != rc ) {
11921130
return rc;
11931131
}
@@ -1233,13 +1171,7 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int
12331171
}
12341172

12351173
/* Determine context id. It is identical to f_2_c_handle */
1236-
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
1237-
comm, /* old comm */
1238-
newcomp, /* bridge comm (used to pass the group into the group allreduce) */
1239-
&tag, /* user defined tag */
1240-
NULL, /* remote_leader */
1241-
mode, /* mode */
1242-
-1 ); /* send_first */
1174+
rc = ompi_comm_nextcid (newcomp, comm, NULL, &tag, NULL, false, mode);
12431175
if ( OMPI_SUCCESS != rc ) {
12441176
return rc;
12451177
}
@@ -1249,13 +1181,7 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int
12491181
newcomp->c_contextid, comm->c_contextid );
12501182

12511183
/* activate communicator and init coll-module */
1252-
rc = ompi_comm_activate( &newcomp, /* new communicator */
1253-
comm,
1254-
newcomp,
1255-
&tag,
1256-
NULL,
1257-
mode,
1258-
-1 );
1184+
rc = ompi_comm_activate (&newcomp, comm, NULL, &tag, NULL, false, mode);
12591185
if ( OMPI_SUCCESS != rc ) {
12601186
return rc;
12611187
}
@@ -1924,13 +1850,8 @@ int ompi_comm_enable(ompi_communicator_t *old_comm,
19241850
int ret = OMPI_SUCCESS;
19251851

19261852
/* Determine context id. It is identical to f_2_c_handle */
1927-
ret = ompi_comm_nextcid ( new_comm, /* new communicator */
1928-
old_comm, /* old comm */
1929-
NULL, /* bridge comm */
1930-
NULL, /* local leader */
1931-
NULL, /* remote_leader */
1932-
OMPI_COMM_CID_INTRA, /* mode */
1933-
-1 ); /* send first, doesn't matter */
1853+
ret = ompi_comm_nextcid (new_comm, old_comm, NULL, NULL, NULL, false,
1854+
OMPI_COMM_CID_INTRA);
19341855
if (OMPI_SUCCESS != ret) {
19351856
/* something wrong happened while setting the communicator */
19361857
goto complete_and_return;
@@ -1953,15 +1874,8 @@ int ompi_comm_enable(ompi_communicator_t *old_comm,
19531874
goto complete_and_return;
19541875
}
19551876

1956-
ret = ompi_comm_activate( &new_comm, /* new communicator */
1957-
old_comm, /* old comm */
1958-
NULL, /* bridge comm */
1959-
NULL, /* local leader */
1960-
NULL, /* remote_leader */
1961-
OMPI_COMM_CID_INTRA, /* mode */
1962-
-1 ); /* send first, doesn't matter */
1963-
1964-
1877+
ret = ompi_comm_activate (&new_comm, old_comm, NULL, NULL, NULL, false,
1878+
OMPI_COMM_CID_INTRA);
19651879
if (OMPI_SUCCESS != ret) {
19661880
/* something wrong happened while setting the communicator */
19671881
goto complete_and_return;

0 commit comments

Comments
 (0)