Skip to content

Commit 12796a4

Browse files
authored
Merge pull request #8209 from devreal/fix-tuned-allgatherv
COLL TUNED: Use per-rank data size instead of total size for decision in allgatherv
2 parents 282be20 + f670364 commit 12796a4

File tree

2 files changed

+34
-39
lines changed

2 files changed

+34
-39
lines changed

ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -528,15 +528,17 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(const void *sbuf, int scount,
528528
- calculate message size and other necessary information */
529529
int comsize, i;
530530
int alg, faninout, segsize, ignoreme;
531-
size_t dsize, total_size;
531+
size_t dsize, total_size, per_rank_size;
532532

533533
comsize = ompi_comm_size(comm);
534534
ompi_datatype_type_size (sdtype, &dsize);
535535
total_size = 0;
536536
for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; }
537537

538+
per_rank_size = total_size / comsize;
539+
538540
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHERV],
539-
total_size, &faninout, &segsize, &ignoreme);
541+
per_rank_size, &faninout, &segsize, &ignoreme);
540542
if (alg) {
541543
/* we have found a valid choice from the file based rules for
542544
this message size */

ompi/mca/coll/tuned/coll_tuned_decision_fixed.c

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
12451245
mca_coll_base_module_t *module)
12461246
{
12471247
int communicator_size, alg, i;
1248-
size_t dsize, total_dsize;
1248+
size_t dsize, total_dsize, per_rank_dsize;
12491249

12501250
communicator_size = ompi_comm_size(comm);
12511251

@@ -1258,6 +1258,9 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
12581258
total_dsize = 0;
12591259
for (i = 0; i < communicator_size; i++) { total_dsize += dsize * rcounts[i]; }
12601260

1261+
/* use the per-rank data size as basis, similar to allgather */
1262+
per_rank_dsize = total_dsize / communicator_size;
1263+
12611264
/** Algorithms:
12621265
* {1, "default"},
12631266
* {2, "bruck"},
@@ -1266,97 +1269,87 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
12661269
* {5, "two_proc"},
12671270
*/
12681271
if (communicator_size == 2) {
1269-
if (total_dsize < 2048) {
1272+
if (per_rank_dsize < 2048) {
12701273
alg = 3;
1271-
} else if (total_dsize < 4096) {
1274+
} else if (per_rank_dsize < 4096) {
12721275
alg = 5;
1273-
} else if (total_dsize < 8192) {
1276+
} else if (per_rank_dsize < 8192) {
12741277
alg = 3;
12751278
} else {
12761279
alg = 5;
12771280
}
12781281
} else if (communicator_size < 8) {
1279-
if (total_dsize < 256) {
1282+
if (per_rank_dsize < 256) {
12801283
alg = 1;
1281-
} else if (total_dsize < 4096) {
1284+
} else if (per_rank_dsize < 4096) {
12821285
alg = 4;
1283-
} else if (total_dsize < 8192) {
1286+
} else if (per_rank_dsize < 8192) {
12841287
alg = 3;
1285-
} else if (total_dsize < 16384) {
1288+
} else if (per_rank_dsize < 16384) {
12861289
alg = 4;
1287-
} else if (total_dsize < 262144) {
1290+
} else if (per_rank_dsize < 262144) {
12881291
alg = 2;
12891292
} else {
12901293
alg = 4;
12911294
}
12921295
} else if (communicator_size < 16) {
1293-
if (total_dsize < 1024) {
1296+
if (per_rank_dsize < 1024) {
12941297
alg = 1;
12951298
} else {
12961299
alg = 2;
12971300
}
12981301
} else if (communicator_size < 32) {
1299-
if (total_dsize < 32) {
1302+
if (per_rank_dsize < 128) {
13001303
alg = 1;
1301-
} else {
1304+
} else if (per_rank_dsize < 262144) {
13021305
alg = 2;
1306+
} else {
1307+
alg = 3;
13031308
}
13041309
} else if (communicator_size < 64) {
1305-
if (total_dsize < 1024) {
1310+
if (per_rank_dsize < 256) {
1311+
alg = 1;
1312+
} else if (per_rank_dsize < 8192) {
13061313
alg = 2;
1307-
} else if (total_dsize < 2048) {
1308-
alg = 4;
1309-
} else if (total_dsize < 8192) {
1310-
alg = 3;
1311-
} else if (total_dsize < 16384) {
1312-
alg = 4;
1313-
} else if (total_dsize < 32768) {
1314-
alg = 3;
1315-
} else if (total_dsize < 65536) {
1316-
alg = 4;
13171314
} else {
13181315
alg = 3;
13191316
}
13201317
} else if (communicator_size < 128) {
1321-
if (total_dsize < 16) {
1318+
if (per_rank_dsize < 256) {
13221319
alg = 1;
1323-
} else if (total_dsize < 1024) {
1320+
} else if (per_rank_dsize < 4096) {
13241321
alg = 2;
1325-
} else if (total_dsize < 65536) {
1326-
alg = 4;
13271322
} else {
13281323
alg = 3;
13291324
}
13301325
} else if (communicator_size < 256) {
1331-
if (total_dsize < 1024) {
1326+
if (per_rank_dsize < 1024) {
13321327
alg = 2;
1333-
} else if (total_dsize < 65536) {
1328+
} else if (per_rank_dsize < 65536) {
13341329
alg = 4;
13351330
} else {
13361331
alg = 3;
13371332
}
13381333
} else if (communicator_size < 512) {
1339-
if (total_dsize < 1024) {
1334+
if (per_rank_dsize < 1024) {
13401335
alg = 2;
1341-
} else if (total_dsize < 131072) {
1342-
alg = 4;
13431336
} else {
13441337
alg = 3;
13451338
}
13461339
} else if (communicator_size < 1024) {
1347-
if (total_dsize < 512) {
1340+
if (per_rank_dsize < 512) {
13481341
alg = 2;
1349-
} else if (total_dsize < 1024) {
1342+
} else if (per_rank_dsize < 1024) {
13501343
alg = 1;
1351-
} else if (total_dsize < 4096) {
1344+
} else if (per_rank_dsize < 4096) {
13521345
alg = 2;
1353-
} else if (total_dsize < 1048576) {
1346+
} else if (per_rank_dsize < 1048576) {
13541347
alg = 4;
13551348
} else {
13561349
alg = 3;
13571350
}
13581351
} else {
1359-
if (total_dsize < 4096) {
1352+
if (per_rank_dsize < 4096) {
13601353
alg = 2;
13611354
} else {
13621355
alg = 4;

0 commit comments

Comments
 (0)