Skip to content

Commit f670364

Browse files
committed
COLL TUNED: Use per-rank data size instead of total size for decision
The total size depends on number of ranks so the usual ranges don't work. Thus, use the average across all ranks to make a decision. Signed-off-by: Joseph Schuchart <[email protected]>
1 parent 30831fb commit f670364

File tree

2 files changed

+34
-39
lines changed

2 files changed

+34
-39
lines changed

ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -528,15 +528,17 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(const void *sbuf, int scount,
528528
- calculate message size and other necessary information */
529529
int comsize, i;
530530
int alg, faninout, segsize, ignoreme;
531-
size_t dsize, total_size;
531+
size_t dsize, total_size, per_rank_size;
532532

533533
comsize = ompi_comm_size(comm);
534534
ompi_datatype_type_size (sdtype, &dsize);
535535
total_size = 0;
536536
for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; }
537537

538+
per_rank_size = total_size / comsize;
539+
538540
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHERV],
539-
total_size, &faninout, &segsize, &ignoreme);
541+
per_rank_size, &faninout, &segsize, &ignoreme);
540542
if (alg) {
541543
/* we have found a valid choice from the file based rules for
542544
this message size */

ompi/mca/coll/tuned/coll_tuned_decision_fixed.c

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
12451245
mca_coll_base_module_t *module)
12461246
{
12471247
int communicator_size, alg, i;
1248-
size_t dsize, total_dsize;
1248+
size_t dsize, total_dsize, per_rank_dsize;
12491249

12501250
communicator_size = ompi_comm_size(comm);
12511251

@@ -1258,6 +1258,9 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
12581258
total_dsize = 0;
12591259
for (i = 0; i < communicator_size; i++) { total_dsize += dsize * rcounts[i]; }
12601260

1261+
/* use the per-rank data size as basis, similar to allgather */
1262+
per_rank_dsize = total_dsize / communicator_size;
1263+
12611264
/** Algorithms:
12621265
* {1, "default"},
12631266
* {2, "bruck"},
@@ -1266,97 +1269,87 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
12661269
* {5, "two_proc"},
12671270
*/
12681271
if (communicator_size == 2) {
1269-
if (total_dsize < 2048) {
1272+
if (per_rank_dsize < 2048) {
12701273
alg = 3;
1271-
} else if (total_dsize < 4096) {
1274+
} else if (per_rank_dsize < 4096) {
12721275
alg = 5;
1273-
} else if (total_dsize < 8192) {
1276+
} else if (per_rank_dsize < 8192) {
12741277
alg = 3;
12751278
} else {
12761279
alg = 5;
12771280
}
12781281
} else if (communicator_size < 8) {
1279-
if (total_dsize < 256) {
1282+
if (per_rank_dsize < 256) {
12801283
alg = 1;
1281-
} else if (total_dsize < 4096) {
1284+
} else if (per_rank_dsize < 4096) {
12821285
alg = 4;
1283-
} else if (total_dsize < 8192) {
1286+
} else if (per_rank_dsize < 8192) {
12841287
alg = 3;
1285-
} else if (total_dsize < 16384) {
1288+
} else if (per_rank_dsize < 16384) {
12861289
alg = 4;
1287-
} else if (total_dsize < 262144) {
1290+
} else if (per_rank_dsize < 262144) {
12881291
alg = 2;
12891292
} else {
12901293
alg = 4;
12911294
}
12921295
} else if (communicator_size < 16) {
1293-
if (total_dsize < 1024) {
1296+
if (per_rank_dsize < 1024) {
12941297
alg = 1;
12951298
} else {
12961299
alg = 2;
12971300
}
12981301
} else if (communicator_size < 32) {
1299-
if (total_dsize < 32) {
1302+
if (per_rank_dsize < 128) {
13001303
alg = 1;
1301-
} else {
1304+
} else if (per_rank_dsize < 262144) {
13021305
alg = 2;
1306+
} else {
1307+
alg = 3;
13031308
}
13041309
} else if (communicator_size < 64) {
1305-
if (total_dsize < 1024) {
1310+
if (per_rank_dsize < 256) {
1311+
alg = 1;
1312+
} else if (per_rank_dsize < 8192) {
13061313
alg = 2;
1307-
} else if (total_dsize < 2048) {
1308-
alg = 4;
1309-
} else if (total_dsize < 8192) {
1310-
alg = 3;
1311-
} else if (total_dsize < 16384) {
1312-
alg = 4;
1313-
} else if (total_dsize < 32768) {
1314-
alg = 3;
1315-
} else if (total_dsize < 65536) {
1316-
alg = 4;
13171314
} else {
13181315
alg = 3;
13191316
}
13201317
} else if (communicator_size < 128) {
1321-
if (total_dsize < 16) {
1318+
if (per_rank_dsize < 256) {
13221319
alg = 1;
1323-
} else if (total_dsize < 1024) {
1320+
} else if (per_rank_dsize < 4096) {
13241321
alg = 2;
1325-
} else if (total_dsize < 65536) {
1326-
alg = 4;
13271322
} else {
13281323
alg = 3;
13291324
}
13301325
} else if (communicator_size < 256) {
1331-
if (total_dsize < 1024) {
1326+
if (per_rank_dsize < 1024) {
13321327
alg = 2;
1333-
} else if (total_dsize < 65536) {
1328+
} else if (per_rank_dsize < 65536) {
13341329
alg = 4;
13351330
} else {
13361331
alg = 3;
13371332
}
13381333
} else if (communicator_size < 512) {
1339-
if (total_dsize < 1024) {
1334+
if (per_rank_dsize < 1024) {
13401335
alg = 2;
1341-
} else if (total_dsize < 131072) {
1342-
alg = 4;
13431336
} else {
13441337
alg = 3;
13451338
}
13461339
} else if (communicator_size < 1024) {
1347-
if (total_dsize < 512) {
1340+
if (per_rank_dsize < 512) {
13481341
alg = 2;
1349-
} else if (total_dsize < 1024) {
1342+
} else if (per_rank_dsize < 1024) {
13501343
alg = 1;
1351-
} else if (total_dsize < 4096) {
1344+
} else if (per_rank_dsize < 4096) {
13521345
alg = 2;
1353-
} else if (total_dsize < 1048576) {
1346+
} else if (per_rank_dsize < 1048576) {
13541347
alg = 4;
13551348
} else {
13561349
alg = 3;
13571350
}
13581351
} else {
1359-
if (total_dsize < 4096) {
1352+
if (per_rank_dsize < 4096) {
13601353
alg = 2;
13611354
} else {
13621355
alg = 4;

0 commit comments

Comments
 (0)