@@ -1245,7 +1245,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
1245
1245
mca_coll_base_module_t * module )
1246
1246
{
1247
1247
int communicator_size , alg , i ;
1248
- size_t dsize , total_dsize ;
1248
+ size_t dsize , total_dsize , per_rank_dsize ;
1249
1249
1250
1250
communicator_size = ompi_comm_size (comm );
1251
1251
@@ -1258,6 +1258,9 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
1258
1258
total_dsize = 0 ;
1259
1259
for (i = 0 ; i < communicator_size ; i ++ ) { total_dsize += dsize * rcounts [i ]; }
1260
1260
1261
+ /* use the per-rank data size as basis, similar to allgather */
1262
+ per_rank_dsize = total_dsize / communicator_size ;
1263
+
1261
1264
/** Algorithms:
1262
1265
* {1, "default"},
1263
1266
* {2, "bruck"},
@@ -1266,97 +1269,87 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
1266
1269
* {5, "two_proc"},
1267
1270
*/
1268
1271
if (communicator_size == 2 ) {
1269
- if (total_dsize < 2048 ) {
1272
+ if (per_rank_dsize < 2048 ) {
1270
1273
alg = 3 ;
1271
- } else if (total_dsize < 4096 ) {
1274
+ } else if (per_rank_dsize < 4096 ) {
1272
1275
alg = 5 ;
1273
- } else if (total_dsize < 8192 ) {
1276
+ } else if (per_rank_dsize < 8192 ) {
1274
1277
alg = 3 ;
1275
1278
} else {
1276
1279
alg = 5 ;
1277
1280
}
1278
1281
} else if (communicator_size < 8 ) {
1279
- if (total_dsize < 256 ) {
1282
+ if (per_rank_dsize < 256 ) {
1280
1283
alg = 1 ;
1281
- } else if (total_dsize < 4096 ) {
1284
+ } else if (per_rank_dsize < 4096 ) {
1282
1285
alg = 4 ;
1283
- } else if (total_dsize < 8192 ) {
1286
+ } else if (per_rank_dsize < 8192 ) {
1284
1287
alg = 3 ;
1285
- } else if (total_dsize < 16384 ) {
1288
+ } else if (per_rank_dsize < 16384 ) {
1286
1289
alg = 4 ;
1287
- } else if (total_dsize < 262144 ) {
1290
+ } else if (per_rank_dsize < 262144 ) {
1288
1291
alg = 2 ;
1289
1292
} else {
1290
1293
alg = 4 ;
1291
1294
}
1292
1295
} else if (communicator_size < 16 ) {
1293
- if (total_dsize < 1024 ) {
1296
+ if (per_rank_dsize < 1024 ) {
1294
1297
alg = 1 ;
1295
1298
} else {
1296
1299
alg = 2 ;
1297
1300
}
1298
1301
} else if (communicator_size < 32 ) {
1299
- if (total_dsize < 32 ) {
1302
+ if (per_rank_dsize < 128 ) {
1300
1303
alg = 1 ;
1301
- } else {
1304
+ } else if ( per_rank_dsize < 262144 ) {
1302
1305
alg = 2 ;
1306
+ } else {
1307
+ alg = 3 ;
1303
1308
}
1304
1309
} else if (communicator_size < 64 ) {
1305
- if (total_dsize < 1024 ) {
1310
+ if (per_rank_dsize < 256 ) {
1311
+ alg = 1 ;
1312
+ } else if (per_rank_dsize < 8192 ) {
1306
1313
alg = 2 ;
1307
- } else if (total_dsize < 2048 ) {
1308
- alg = 4 ;
1309
- } else if (total_dsize < 8192 ) {
1310
- alg = 3 ;
1311
- } else if (total_dsize < 16384 ) {
1312
- alg = 4 ;
1313
- } else if (total_dsize < 32768 ) {
1314
- alg = 3 ;
1315
- } else if (total_dsize < 65536 ) {
1316
- alg = 4 ;
1317
1314
} else {
1318
1315
alg = 3 ;
1319
1316
}
1320
1317
} else if (communicator_size < 128 ) {
1321
- if (total_dsize < 16 ) {
1318
+ if (per_rank_dsize < 256 ) {
1322
1319
alg = 1 ;
1323
- } else if (total_dsize < 1024 ) {
1320
+ } else if (per_rank_dsize < 4096 ) {
1324
1321
alg = 2 ;
1325
- } else if (total_dsize < 65536 ) {
1326
- alg = 4 ;
1327
1322
} else {
1328
1323
alg = 3 ;
1329
1324
}
1330
1325
} else if (communicator_size < 256 ) {
1331
- if (total_dsize < 1024 ) {
1326
+ if (per_rank_dsize < 1024 ) {
1332
1327
alg = 2 ;
1333
- } else if (total_dsize < 65536 ) {
1328
+ } else if (per_rank_dsize < 65536 ) {
1334
1329
alg = 4 ;
1335
1330
} else {
1336
1331
alg = 3 ;
1337
1332
}
1338
1333
} else if (communicator_size < 512 ) {
1339
- if (total_dsize < 1024 ) {
1334
+ if (per_rank_dsize < 1024 ) {
1340
1335
alg = 2 ;
1341
- } else if (total_dsize < 131072 ) {
1342
- alg = 4 ;
1343
1336
} else {
1344
1337
alg = 3 ;
1345
1338
}
1346
1339
} else if (communicator_size < 1024 ) {
1347
- if (total_dsize < 512 ) {
1340
+ if (per_rank_dsize < 512 ) {
1348
1341
alg = 2 ;
1349
- } else if (total_dsize < 1024 ) {
1342
+ } else if (per_rank_dsize < 1024 ) {
1350
1343
alg = 1 ;
1351
- } else if (total_dsize < 4096 ) {
1344
+ } else if (per_rank_dsize < 4096 ) {
1352
1345
alg = 2 ;
1353
- } else if (total_dsize < 1048576 ) {
1346
+ } else if (per_rank_dsize < 1048576 ) {
1354
1347
alg = 4 ;
1355
1348
} else {
1356
1349
alg = 3 ;
1357
1350
}
1358
1351
} else {
1359
- if (total_dsize < 4096 ) {
1352
+ if (per_rank_dsize < 4096 ) {
1360
1353
alg = 2 ;
1361
1354
} else {
1362
1355
alg = 4 ;
0 commit comments