Skip to content

Commit ef38ca5

Browse files
authored
Merge pull request #4644 from bosilca/topic/treematch
Fix treematch topology assert
2 parents 8b8aae3 + 3845584 commit ef38ca5

File tree

2 files changed

+110
-87
lines changed

2 files changed

+110
-87
lines changed

ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c

Lines changed: 108 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636

3737
#include "opal/mca/pmix/pmix.h"
3838

39-
/* #define __DEBUG__ 1 */
39+
/* #define __DEBUG__ 1 */
4040

4141
/**
4242
* This function is a allreduce between all processes to detect for oversubscription.
@@ -72,23 +72,25 @@ static int check_oversubscribing(int rank,
7272
}
7373

7474
#ifdef __DEBUG__
75-
static void dump_int_array( char* prolog, char* line_prolog, int* array, size_t length )
75+
static void dump_int_array( int level, int output_id, char* prolog, char* line_prolog, int* array, size_t length )
7676
{
7777
size_t i;
78+
if( -1 == output_id ) return;
7879

79-
fprintf(stdout,"%s : ", prolog);
80+
opal_output_verbose(level, output_id, "%s : ", prolog);
8081
for(i = 0; i < length ; i++)
81-
fprintf(stdout,"%s [%lu:%i] ", line_prolog, i, array[i]);
82-
fprintf(stdout,"\n");
82+
opal_output_verbose(level, output_id, "%s [%lu:%i] ", line_prolog, i, array[i]);
83+
opal_output_verbose(level, output_id, "\n");
8384
}
84-
static void dump_double_array( char* prolog, char* line_prolog, double* array, size_t length )
85+
static void dump_double_array( int level, int output_id, char* prolog, char* line_prolog, double* array, size_t length )
8586
{
8687
size_t i;
8788

88-
fprintf(stdout,"%s : ", prolog);
89+
if( -1 == output_id ) return;
90+
opal_output_verbose(level, output_id, "%s : ", prolog);
8991
for(i = 0; i < length ; i++)
90-
fprintf(stdout,"%s [%lu:%lf] ", line_prolog, i, array[i]);
91-
fprintf(stdout,"\n");
92+
opal_output_verbose(level, output_id, "%s [%lu:%lf] ", line_prolog, i, array[i]);
93+
opal_output_verbose(level, output_id, "\n");
9294
}
9395
#endif
9496

@@ -152,9 +154,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
152154
rank = ompi_comm_rank(comm_old);
153155
size = ompi_comm_size(comm_old);
154156

155-
#ifdef __DEBUG__
156-
fprintf(stdout,"Process rank is : %i\n",rank);
157-
#endif
157+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
158+
"Process rank is : %i\n",rank));
158159
/**
159160
* In order to decrease the number of loops let's use a trick:
160161
* build the lindex_to_grank in the vpids array, and only allocate
@@ -184,8 +185,10 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
184185

185186
#ifdef __DEBUG__
186187
if ( 0 == rank ) {
187-
dump_int_array("lindex_to_grank : ", "", lindex_to_grank, num_procs_in_node);
188-
dump_int_array("Vpids : ", "", colors, size);
188+
dump_int_array(10, ompi_topo_base_framework.framework_output,
189+
"lindex_to_grank : ", "", lindex_to_grank, num_procs_in_node);
190+
dump_int_array(10, ompi_topo_base_framework.framework_output,
191+
"Vpids : ", "", colors, size);
189192
}
190193
#endif
191194
/* clean-up dupes in the array */
@@ -210,9 +213,11 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
210213
for(i = idx = 0; i < size; i++)
211214
if( vpids[i] != -1 )
212215
nodes_roots[idx++] = i;
216+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
217+
"num nodes is %i\n", num_nodes));
213218
#ifdef __DEBUG__
214-
fprintf(stdout, "num nodes is %i\n", num_nodes);
215-
dump_int_array("Root nodes are :\n", "root ", nodes_roots, num_nodes);
219+
dump_int_array(10, ompi_topo_base_framework.framework_output,
220+
"Root nodes are :\n", "root ", nodes_roots, num_nodes);
216221
#endif
217222
}
218223
free(vpids);
@@ -229,10 +234,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
229234
*/
230235

231236
if(hwloc_bitmap_isincluded(root_obj->cpuset,set)) { /* processes are not bound on the machine */
232-
#ifdef __DEBUG__
233237
if (0 == rank)
234-
fprintf(stdout,">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n");
235-
#endif /* __DEBUG__ */
238+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
239+
">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n"));
236240

237241
/* we try to bind to cores or above objects if enough are present */
238242
/* Not sure that cores are present in ALL nodes */
@@ -255,9 +259,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
255259

256260
if(oversubscribing_objs) {
257261
if(hwloc_bitmap_isincluded(root_obj->cpuset, set)) { /* processes are not bound on the machine */
258-
#ifdef __DEBUG__
259-
fprintf(stdout,"Oversubscribing OBJ/CORES resources => Trying to use PUs \n");
260-
#endif
262+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
263+
"Oversubscribing OBJ/CORES resources => Trying to use PUs \n"));
264+
261265
oversubscribed_pus = check_oversubscribing(rank, num_nodes,
262266
num_pus_in_node, num_procs_in_node,
263267
nodes_roots, lindex_to_grank, comm_old);
@@ -266,9 +270,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
266270
obj_rank = ompi_process_info.my_local_rank%num_pus_in_node;
267271
effective_depth = hwloc_topology_get_depth(opal_hwloc_topology) - 1;
268272
num_objs_in_node = num_pus_in_node;
269-
#ifdef __DEBUG__
270-
fprintf(stdout, "Process not bound : binding on PU#%i \n", obj_rank);
271-
#endif
273+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
274+
"Process %i not bound : binding on PU#%i \n", rank, obj_rank));
272275
}
273276
} else {
274277
/* Bound processes will participate with the same data as before */
@@ -293,23 +296,24 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
293296
hwloc_bitmap_singlify(set); /* we don't want the process to move */
294297
hwloc_err = hwloc_set_cpubind(opal_hwloc_topology, set, 0);
295298
if( -1 == hwloc_err) {
296-
free(colors);
297-
hwloc_bitmap_free(set);
298-
goto fallback; /* return with success */
299-
}
300-
#ifdef __DEBUG__
301-
fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank);
302-
#endif
299+
/* This is a local issue. Either we agree with the rest of the processes to stop the
300+
* reordering or we have to complete the entire process. Let's complete.
301+
*/
302+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
303+
"Process %i failed to bind on OBJ#%i \n", rank, obj_rank));
304+
} else
305+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
306+
"Process %i not bound : binding on OBJ#%i \n",rank, obj_rank));
303307
} else {
304-
#ifdef __DEBUG__
305-
fprintf(stdout, "Process %i bound on OBJ #%i \n", rank, obj_rank);
306-
fprintf(stdout, "=====> Num obj in node : %i | num pus in node : %i\n", num_objs_in_node, num_pus_in_node);
307-
#endif
308+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
309+
"Process %i bound on OBJ #%i \n"
310+
"=====> Num obj in node : %i | num pus in node : %i\n",
311+
rank, obj_rank,
312+
num_objs_in_node, num_pus_in_node));
308313
}
309314
} else {
310-
#ifdef __DEBUG__
311-
fprintf(stdout, "Oversubscribing PUs resources => Rank Reordering Impossible \n");
312-
#endif
315+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
316+
"Oversubscribing PUs resources => Rank Reordering Impossible \n"));
313317
free(colors);
314318
hwloc_bitmap_free(set);
315319
goto fallback; /* return with success */
@@ -324,9 +328,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
324328
myhierarchy[0] = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, 0);
325329
for (i = 1; i < array_size ; i++) {
326330
myhierarchy[i] = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, i);
327-
#ifdef __DEBUG__
328-
fprintf(stdout,"hierarchy[%i] = %i\n", i, myhierarchy[i]);
329-
#endif
331+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
332+
"hierarchy[%i] = %i\n", i, myhierarchy[i]));
330333
if ((myhierarchy[i] != 0) && (myhierarchy[i] != myhierarchy[i-1]))
331334
numlevels++;
332335
}
@@ -339,12 +342,14 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
339342
tracker[idx] = hwloc_get_obj_by_depth(opal_hwloc_topology, effective_depth, 0);
340343
free(myhierarchy);
341344

342-
#ifdef __DEBUG__
343-
fprintf(stdout, ">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n",
344-
effective_depth, hwloc_topology_get_depth(opal_hwloc_topology), numlevels);
345-
for(i = 0 ; i < numlevels ; i++)
346-
fprintf(stdout, "tracker[%i] : arity %i | depth %i\n", i, tracker[i]->arity, tracker[i]->depth);
347-
#endif
345+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
346+
">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n",
347+
effective_depth, hwloc_topology_get_depth(opal_hwloc_topology), numlevels));
348+
for(i = 0 ; i < numlevels ; i++) {
349+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
350+
"tracker[%i] : arity %i | depth %i\n",
351+
i, tracker[i]->arity, tracker[i]->depth));
352+
}
348353
/* get the obj number */
349354
localrank_to_objnum = (int *)calloc(num_procs_in_node, sizeof(int));
350355
localrank_to_objnum[0] = obj_rank;
@@ -383,9 +388,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
383388
*/
384389
if( 0 == rank ) {
385390

386-
#ifdef __DEBUG__
387-
fprintf(stderr,"========== Centralized Reordering ========= \n");
388-
#endif
391+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
392+
"========== Centralized Reordering ========= \n"));
389393
local_pattern = (double *)calloc(size*size,sizeof(double));
390394
} else {
391395
local_pattern = (double *)calloc(size,sizeof(double));
@@ -474,7 +478,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
474478
memcpy(obj_mapping, obj_to_rank_in_comm, num_objs_total*sizeof(int));
475479
}
476480
#ifdef __DEBUG__
477-
dump_int_array( "Obj mapping : ", "", obj_mapping, num_objs_total );
481+
dump_int_array(10, ompi_topo_base_framework.framework_output,
482+
"Obj mapping : ", "", obj_mapping, num_objs_total );
478483
#endif
479484
} else {
480485
if ( num_nodes > 1 ) {
@@ -539,7 +544,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
539544
double **comm_pattern = NULL;
540545

541546
#ifdef __DEBUG__
542-
dump_int_array("hierarchies : ", "", hierarchies, num_nodes*(TM_MAX_LEVELS+1));
547+
dump_int_array(10, ompi_topo_base_framework.framework_output,
548+
"hierarchies : ", "", hierarchies, num_nodes*(TM_MAX_LEVELS+1));
543549
#endif
544550
tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
545551
tm_topology->nb_levels = hierarchies[0];
@@ -574,10 +580,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
574580
tm_topology->arity[i] = hierarchies[i+1];
575581
}
576582
free(hierarchies);
577-
#ifdef __DEBUG__
578-
for(i = 0; i < tm_topology->nb_levels; i++)
579-
fprintf(stdout,"topo_arity[%i] = %i\n", i, tm_topology->arity[i]);
580-
#endif
583+
584+
for(i = 0; i < tm_topology->nb_levels; i++) {
585+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
586+
"topo_arity[%i] = %i\n", i, tm_topology->arity[i]));
587+
}
588+
581589
/* compute the number of processing elements */
582590
tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
583591
tm_topology->nb_nodes[0] = 1;
@@ -624,11 +632,13 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
624632
tm_topology->oversub_fact = 1;
625633

626634
#ifdef __DEBUG__
627-
assert(num_objs_total == tm_topology->nb_nodes[tm_topology->nb_levels-1]);
635+
assert(num_objs_total == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
628636

629637
for(i = 0; i < tm_topology->nb_levels ; i++) {
630-
fprintf(stdout,"tm topo node_id for level [%i] : ",i);
631-
dump_int_array("", "", obj_mapping, tm_topology->nb_nodes[i]);
638+
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
639+
"tm topo node_id for level [%i] : ",i);
640+
dump_int_array(10, ompi_topo_base_framework.framework_output,
641+
"", "", obj_mapping, tm_topology->nb_nodes[i]);
632642
}
633643
tm_display_topology(tm_topology);
634644
#endif
@@ -643,27 +653,30 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
643653
comm_pattern[j][i] = comm_pattern[i][j];
644654
}
645655
#ifdef __DEBUG__
646-
fprintf(stdout,"==== COMM PATTERN ====\n");
656+
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
657+
"==== COMM PATTERN ====\n");
647658
for( i = 0 ; i < size ; i++) {
648-
dump_double_array("", "", comm_pattern[i], size);
659+
dump_double_array(10, ompi_topo_base_framework.framework_output,
660+
"", "", comm_pattern[i], size);
649661
}
650662
#endif
651663
tm_optimize_topology(&tm_topology);
652664
aff_mat = tm_build_affinity_mat(comm_pattern,size);
653665
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
654666
sol = tm_compute_mapping(tm_topology, comm_tree);
655667

656-
assert((int)sol->k_length == size);
657-
658668
k = (int *)calloc(sol->k_length, sizeof(int));
659669
for(idx = 0 ; idx < (int)sol->k_length ; idx++)
660670
k[idx] = sol->k[idx][0];
661671

662672
#ifdef __DEBUG__
663-
fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels);
664-
dump_int_array("Rank permutation sigma/k : ", "", k, num_objs_total);
665-
assert(size == sol->sigma_length);
666-
dump_int_array("Matching : ", "",sol->sigma, sol->sigma_length);
673+
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
674+
"====> nb levels : %i\n",tm_topology->nb_levels);
675+
dump_int_array(10, ompi_topo_base_framework.framework_output,
676+
"Rank permutation sigma/k : ", "", k, num_objs_total);
677+
assert(size == (int)sol->sigma_length);
678+
dump_int_array(10, ompi_topo_base_framework.framework_output,
679+
"Matching : ", "",sol->sigma, sol->sigma_length);
667680
#endif
668681
free(obj_mapping);
669682
free(comm_pattern);
@@ -725,9 +738,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
725738

726739
/* Discover the local patterns */
727740
if (rank == lindex_to_grank[0]) {
728-
#ifdef __DEBUG__
729-
fprintf(stderr,"========== Partially Distributed Reordering ========= \n");
730-
#endif
741+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
742+
"========== Partially Distributed Reordering ========= \n"));
731743
local_pattern = (double *)calloc(num_procs_in_node * num_procs_in_node, sizeof(double));
732744
} else {
733745
local_pattern = (double *)calloc(num_procs_in_node, sizeof(double));
@@ -773,12 +785,15 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
773785
}
774786

775787
#ifdef __DEBUG__
776-
fprintf(stdout,"========== COMM PATTERN ============= \n");
788+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
789+
"========== COMM PATTERN ============= \n"));
777790
for(i = 0 ; i < num_procs_in_node ; i++){
778-
fprintf(stdout," %i : ",i);
779-
dump_double_array("", "", comm_pattern[i], num_procs_in_node);
791+
opal_output_verbose(10, ompi_topo_base_framework.framework_output," %i : ",i);
792+
dump_double_array(10, ompi_topo_base_framework.framework_output,
793+
"", "", comm_pattern[i], num_procs_in_node);
780794
}
781-
fprintf(stdout,"======================= \n");
795+
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
796+
"======================= \n");
782797
#endif
783798

784799
tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
@@ -818,11 +833,16 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
818833
tm_topology->oversub_fact = 1;
819834

820835
#ifdef __DEBUG__
821-
assert(num_objs_in_node == tm_topology->nb_nodes[tm_topology->nb_levels-1]);
822-
fprintf(stdout,"Levels in topo : %i | num procs in node : %i\n",tm_topology->nb_levels,num_procs_in_node);
823-
for(i = 0; i < tm_topology->nb_levels ; i++){
824-
fprintf(stdout,"Nb objs for level %i : %i | arity %i\n ",i,tm_topology->nb_nodes[i],tm_topology->arity[i]);
825-
dump_int_array("", "Obj id ", tm_topology->node_id[i], tm_topology->nb_nodes[i]);
836+
assert(num_objs_in_node == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
837+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
838+
"Levels in topo : %i | num procs in node : %i\n",
839+
tm_topology->nb_levels,num_procs_in_node));
840+
for(i = 0; i < tm_topology->nb_levels ; i++) {
841+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
842+
"Nb objs for level %i : %lu | arity %i\n ",
843+
i, tm_topology->nb_nodes[i],tm_topology->arity[i]));
844+
dump_int_array(10, ompi_topo_base_framework.framework_output,
845+
"", "Obj id ", tm_topology->node_id[i], tm_topology->nb_nodes[i]);
826846
}
827847
tm_display_topology(tm_topology);
828848
#endif
@@ -831,17 +851,20 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
831851
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
832852
sol = tm_compute_mapping(tm_topology, comm_tree);
833853

834-
assert((int)sol->k_length == num_procs_in_node);
854+
assert((int)sol->k_length == num_objs_in_node);
835855

836856
k = (int *)calloc(sol->k_length, sizeof(int));
837857
for(idx = 0 ; idx < (int)sol->k_length ; idx++)
838858
k[idx] = sol->k[idx][0];
839859

840860
#ifdef __DEBUG__
841-
fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels);
842-
dump_int_array("Rank permutation sigma/k : ", "", k, num_procs_in_node);
843-
assert(num_procs_in_node == sol->sigma_length);
844-
dump_int_array("Matching : ", "", sol->sigma, sol->sigma_length);
861+
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
862+
"====> nb levels : %i\n",tm_topology->nb_levels));
863+
dump_int_array(10, ompi_topo_base_framework.framework_output,
864+
"Rank permutation sigma/k : ", "", k, num_procs_in_node);
865+
assert(num_procs_in_node == (int)sol->sigma_length);
866+
dump_int_array(10, ompi_topo_base_framework.framework_output,
867+
"Matching : ", "", sol->sigma, sol->sigma_length);
845868
#endif
846869

847870
free(aff_mat->sum_row);

0 commit comments

Comments
 (0)