36
36
37
37
#include "opal/mca/pmix/pmix.h"
38
38
39
- /* #define __DEBUG__ 1 */
39
+ /* #define __DEBUG__ 1 */
40
40
41
41
/**
42
42
* This function is a allreduce between all processes to detect for oversubscription.
@@ -72,23 +72,25 @@ static int check_oversubscribing(int rank,
72
72
}
73
73
74
74
#ifdef __DEBUG__
75
- static void dump_int_array ( char * prolog , char * line_prolog , int * array , size_t length )
75
+ static void dump_int_array ( int level , int output_id , char * prolog , char * line_prolog , int * array , size_t length )
76
76
{
77
77
size_t i ;
78
+ if ( -1 == output_id ) return ;
78
79
79
- fprintf ( stdout , "%s : " , prolog );
80
+ opal_output_verbose ( level , output_id , "%s : " , prolog );
80
81
for (i = 0 ; i < length ; i ++ )
81
- fprintf ( stdout , "%s [%lu:%i] " , line_prolog , i , array [i ]);
82
- fprintf ( stdout , "\n" );
82
+ opal_output_verbose ( level , output_id , "%s [%lu:%i] " , line_prolog , i , array [i ]);
83
+ opal_output_verbose ( level , output_id , "\n" );
83
84
}
84
- static void dump_double_array ( char * prolog , char * line_prolog , double * array , size_t length )
85
+ static void dump_double_array ( int level , int output_id , char * prolog , char * line_prolog , double * array , size_t length )
85
86
{
86
87
size_t i ;
87
88
88
- fprintf (stdout ,"%s : " , prolog );
89
+ if ( -1 == output_id ) return ;
90
+ opal_output_verbose (level , output_id , "%s : " , prolog );
89
91
for (i = 0 ; i < length ; i ++ )
90
- fprintf ( stdout , "%s [%lu:%lf] " , line_prolog , i , array [i ]);
91
- fprintf ( stdout , "\n" );
92
+ opal_output_verbose ( level , output_id , "%s [%lu:%lf] " , line_prolog , i , array [i ]);
93
+ opal_output_verbose ( level , output_id , "\n" );
92
94
}
93
95
#endif
94
96
@@ -152,9 +154,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
152
154
rank = ompi_comm_rank (comm_old );
153
155
size = ompi_comm_size (comm_old );
154
156
155
- #ifdef __DEBUG__
156
- fprintf (stdout ,"Process rank is : %i\n" ,rank );
157
- #endif
157
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
158
+ "Process rank is : %i\n" ,rank ));
158
159
/**
159
160
* In order to decrease the number of loops let's use a trick:
160
161
* build the lindex_to_grank in the vpids array, and only allocate
@@ -184,8 +185,10 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
184
185
185
186
#ifdef __DEBUG__
186
187
if ( 0 == rank ) {
187
- dump_int_array ("lindex_to_grank : " , "" , lindex_to_grank , num_procs_in_node );
188
- dump_int_array ("Vpids : " , "" , colors , size );
188
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
189
+ "lindex_to_grank : " , "" , lindex_to_grank , num_procs_in_node );
190
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
191
+ "Vpids : " , "" , colors , size );
189
192
}
190
193
#endif
191
194
/* clean-up dupes in the array */
@@ -210,9 +213,11 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
210
213
for (i = idx = 0 ; i < size ; i ++ )
211
214
if ( vpids [i ] != -1 )
212
215
nodes_roots [idx ++ ] = i ;
216
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
217
+ "num nodes is %i\n" , num_nodes ));
213
218
#ifdef __DEBUG__
214
- fprintf ( stdout , "num nodes is %i\n" , num_nodes );
215
- dump_int_array ( "Root nodes are :\n" , "root " , nodes_roots , num_nodes );
219
+ dump_int_array ( 10 , ompi_topo_base_framework . framework_output ,
220
+ "Root nodes are :\n" , "root " , nodes_roots , num_nodes );
216
221
#endif
217
222
}
218
223
free (vpids );
@@ -229,10 +234,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
229
234
*/
230
235
231
236
if (hwloc_bitmap_isincluded (root_obj -> cpuset ,set )) { /* processes are not bound on the machine */
232
- #ifdef __DEBUG__
233
237
if (0 == rank )
234
- fprintf ( stdout , ">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n" );
235
- #endif /* __DEBUG__ */
238
+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_topo_base_framework . framework_output ,
239
+ ">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n" ));
236
240
237
241
/* we try to bind to cores or above objects if enough are present */
238
242
/* Not sure that cores are present in ALL nodes */
@@ -255,9 +259,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
255
259
256
260
if (oversubscribing_objs ) {
257
261
if (hwloc_bitmap_isincluded (root_obj -> cpuset , set )) { /* processes are not bound on the machine */
258
- #ifdef __DEBUG__
259
- fprintf ( stdout , "Oversubscribing OBJ/CORES resources => Trying to use PUs \n" );
260
- #endif
262
+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_topo_base_framework . framework_output ,
263
+ "Oversubscribing OBJ/CORES resources => Trying to use PUs \n" ) );
264
+
261
265
oversubscribed_pus = check_oversubscribing (rank , num_nodes ,
262
266
num_pus_in_node , num_procs_in_node ,
263
267
nodes_roots , lindex_to_grank , comm_old );
@@ -266,9 +270,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
266
270
obj_rank = ompi_process_info .my_local_rank %num_pus_in_node ;
267
271
effective_depth = hwloc_topology_get_depth (opal_hwloc_topology ) - 1 ;
268
272
num_objs_in_node = num_pus_in_node ;
269
- #ifdef __DEBUG__
270
- fprintf (stdout , "Process not bound : binding on PU#%i \n" , obj_rank );
271
- #endif
273
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
274
+ "Process %i not bound : binding on PU#%i \n" , rank , obj_rank ));
272
275
}
273
276
} else {
274
277
/* Bound processes will participate with the same data as before */
@@ -293,23 +296,24 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
293
296
hwloc_bitmap_singlify (set ); /* we don't want the process to move */
294
297
hwloc_err = hwloc_set_cpubind (opal_hwloc_topology , set , 0 );
295
298
if ( -1 == hwloc_err ) {
296
- free (colors );
297
- hwloc_bitmap_free (set );
298
- goto fallback ; /* return with success */
299
- }
300
- #ifdef __DEBUG__
301
- fprintf (stdout ,"Process not bound : binding on OBJ#%i \n" ,obj_rank );
302
- #endif
299
+ /* This is a local issue. Either we agree with the rest of the processes to stop the
300
+ * reordering or we have to complete the entire process. Let's complete.
301
+ */
302
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
303
+ "Process %i failed to bind on OBJ#%i \n" , rank , obj_rank ));
304
+ } else
305
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
306
+ "Process %i not bound : binding on OBJ#%i \n" ,rank , obj_rank ));
303
307
} else {
304
- #ifdef __DEBUG__
305
- fprintf (stdout , "Process %i bound on OBJ #%i \n" , rank , obj_rank );
306
- fprintf (stdout , "=====> Num obj in node : %i | num pus in node : %i\n" , num_objs_in_node , num_pus_in_node );
307
- #endif
308
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
309
+ "Process %i bound on OBJ #%i \n"
310
+ "=====> Num obj in node : %i | num pus in node : %i\n" ,
311
+ rank , obj_rank ,
312
+ num_objs_in_node , num_pus_in_node ));
308
313
}
309
314
} else {
310
- #ifdef __DEBUG__
311
- fprintf (stdout , "Oversubscribing PUs resources => Rank Reordering Impossible \n" );
312
- #endif
315
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
316
+ "Oversubscribing PUs resources => Rank Reordering Impossible \n" ));
313
317
free (colors );
314
318
hwloc_bitmap_free (set );
315
319
goto fallback ; /* return with success */
@@ -324,9 +328,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
324
328
myhierarchy [0 ] = hwloc_get_nbobjs_by_depth (opal_hwloc_topology , 0 );
325
329
for (i = 1 ; i < array_size ; i ++ ) {
326
330
myhierarchy [i ] = hwloc_get_nbobjs_by_depth (opal_hwloc_topology , i );
327
- #ifdef __DEBUG__
328
- fprintf (stdout ,"hierarchy[%i] = %i\n" , i , myhierarchy [i ]);
329
- #endif
331
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
332
+ "hierarchy[%i] = %i\n" , i , myhierarchy [i ]));
330
333
if ((myhierarchy [i ] != 0 ) && (myhierarchy [i ] != myhierarchy [i - 1 ]))
331
334
numlevels ++ ;
332
335
}
@@ -339,12 +342,14 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
339
342
tracker [idx ] = hwloc_get_obj_by_depth (opal_hwloc_topology , effective_depth , 0 );
340
343
free (myhierarchy );
341
344
342
- #ifdef __DEBUG__
343
- fprintf (stdout , ">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n" ,
344
- effective_depth , hwloc_topology_get_depth (opal_hwloc_topology ), numlevels );
345
- for (i = 0 ; i < numlevels ; i ++ )
346
- fprintf (stdout , "tracker[%i] : arity %i | depth %i\n" , i , tracker [i ]-> arity , tracker [i ]-> depth );
347
- #endif
345
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
346
+ ">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n" ,
347
+ effective_depth , hwloc_topology_get_depth (opal_hwloc_topology ), numlevels ));
348
+ for (i = 0 ; i < numlevels ; i ++ ) {
349
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
350
+ "tracker[%i] : arity %i | depth %i\n" ,
351
+ i , tracker [i ]-> arity , tracker [i ]-> depth ));
352
+ }
348
353
/* get the obj number */
349
354
localrank_to_objnum = (int * )calloc (num_procs_in_node , sizeof (int ));
350
355
localrank_to_objnum [0 ] = obj_rank ;
@@ -383,9 +388,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
383
388
*/
384
389
if ( 0 == rank ) {
385
390
386
- #ifdef __DEBUG__
387
- fprintf (stderr ,"========== Centralized Reordering ========= \n" );
388
- #endif
391
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
392
+ "========== Centralized Reordering ========= \n" ));
389
393
local_pattern = (double * )calloc (size * size ,sizeof (double ));
390
394
} else {
391
395
local_pattern = (double * )calloc (size ,sizeof (double ));
@@ -474,7 +478,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
474
478
memcpy (obj_mapping , obj_to_rank_in_comm , num_objs_total * sizeof (int ));
475
479
}
476
480
#ifdef __DEBUG__
477
- dump_int_array ( "Obj mapping : " , "" , obj_mapping , num_objs_total );
481
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
482
+ "Obj mapping : " , "" , obj_mapping , num_objs_total );
478
483
#endif
479
484
} else {
480
485
if ( num_nodes > 1 ) {
@@ -539,7 +544,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
539
544
double * * comm_pattern = NULL ;
540
545
541
546
#ifdef __DEBUG__
542
- dump_int_array ("hierarchies : " , "" , hierarchies , num_nodes * (TM_MAX_LEVELS + 1 ));
547
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
548
+ "hierarchies : " , "" , hierarchies , num_nodes * (TM_MAX_LEVELS + 1 ));
543
549
#endif
544
550
tm_topology = (tm_topology_t * )malloc (sizeof (tm_topology_t ));
545
551
tm_topology -> nb_levels = hierarchies [0 ];
@@ -574,10 +580,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
574
580
tm_topology -> arity [i ] = hierarchies [i + 1 ];
575
581
}
576
582
free (hierarchies );
577
- #ifdef __DEBUG__
578
- for (i = 0 ; i < tm_topology -> nb_levels ; i ++ )
579
- fprintf (stdout ,"topo_arity[%i] = %i\n" , i , tm_topology -> arity [i ]);
580
- #endif
583
+
584
+ for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ) {
585
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
586
+ "topo_arity[%i] = %i\n" , i , tm_topology -> arity [i ]));
587
+ }
588
+
581
589
/* compute the number of processing elements */
582
590
tm_topology -> nb_nodes = (size_t * )calloc (tm_topology -> nb_levels , sizeof (size_t ));
583
591
tm_topology -> nb_nodes [0 ] = 1 ;
@@ -624,11 +632,13 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
624
632
tm_topology -> oversub_fact = 1 ;
625
633
626
634
#ifdef __DEBUG__
627
- assert (num_objs_total == tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
635
+ assert (num_objs_total == ( int ) tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
628
636
629
637
for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ) {
630
- fprintf (stdout ,"tm topo node_id for level [%i] : " ,i );
631
- dump_int_array ("" , "" , obj_mapping , tm_topology -> nb_nodes [i ]);
638
+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
639
+ "tm topo node_id for level [%i] : " ,i );
640
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
641
+ "" , "" , obj_mapping , tm_topology -> nb_nodes [i ]);
632
642
}
633
643
tm_display_topology (tm_topology );
634
644
#endif
@@ -643,27 +653,30 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
643
653
comm_pattern [j ][i ] = comm_pattern [i ][j ];
644
654
}
645
655
#ifdef __DEBUG__
646
- fprintf (stdout ,"==== COMM PATTERN ====\n" );
656
+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
657
+ "==== COMM PATTERN ====\n" );
647
658
for ( i = 0 ; i < size ; i ++ ) {
648
- dump_double_array ("" , "" , comm_pattern [i ], size );
659
+ dump_double_array (10 , ompi_topo_base_framework .framework_output ,
660
+ "" , "" , comm_pattern [i ], size );
649
661
}
650
662
#endif
651
663
tm_optimize_topology (& tm_topology );
652
664
aff_mat = tm_build_affinity_mat (comm_pattern ,size );
653
665
comm_tree = tm_build_tree_from_topology (tm_topology ,aff_mat , NULL , NULL );
654
666
sol = tm_compute_mapping (tm_topology , comm_tree );
655
667
656
- assert ((int )sol -> k_length == size );
657
-
658
668
k = (int * )calloc (sol -> k_length , sizeof (int ));
659
669
for (idx = 0 ; idx < (int )sol -> k_length ; idx ++ )
660
670
k [idx ] = sol -> k [idx ][0 ];
661
671
662
672
#ifdef __DEBUG__
663
- fprintf (stdout ,"====> nb levels : %i\n" ,tm_topology -> nb_levels );
664
- dump_int_array ("Rank permutation sigma/k : " , "" , k , num_objs_total );
665
- assert (size == sol -> sigma_length );
666
- dump_int_array ("Matching : " , "" ,sol -> sigma , sol -> sigma_length );
673
+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
674
+ "====> nb levels : %i\n" ,tm_topology -> nb_levels );
675
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
676
+ "Rank permutation sigma/k : " , "" , k , num_objs_total );
677
+ assert (size == (int )sol -> sigma_length );
678
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
679
+ "Matching : " , "" ,sol -> sigma , sol -> sigma_length );
667
680
#endif
668
681
free (obj_mapping );
669
682
free (comm_pattern );
@@ -725,9 +738,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
725
738
726
739
/* Discover the local patterns */
727
740
if (rank == lindex_to_grank [0 ]) {
728
- #ifdef __DEBUG__
729
- fprintf (stderr ,"========== Partially Distributed Reordering ========= \n" );
730
- #endif
741
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
742
+ "========== Partially Distributed Reordering ========= \n" ));
731
743
local_pattern = (double * )calloc (num_procs_in_node * num_procs_in_node , sizeof (double ));
732
744
} else {
733
745
local_pattern = (double * )calloc (num_procs_in_node , sizeof (double ));
@@ -773,12 +785,15 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
773
785
}
774
786
775
787
#ifdef __DEBUG__
776
- fprintf (stdout ,"========== COMM PATTERN ============= \n" );
788
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
789
+ "========== COMM PATTERN ============= \n" ));
777
790
for (i = 0 ; i < num_procs_in_node ; i ++ ){
778
- fprintf (stdout ," %i : " ,i );
779
- dump_double_array ("" , "" , comm_pattern [i ], num_procs_in_node );
791
+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ," %i : " ,i );
792
+ dump_double_array (10 , ompi_topo_base_framework .framework_output ,
793
+ "" , "" , comm_pattern [i ], num_procs_in_node );
780
794
}
781
- fprintf (stdout ,"======================= \n" );
795
+ opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
796
+ "======================= \n" );
782
797
#endif
783
798
784
799
tm_topology = (tm_topology_t * )malloc (sizeof (tm_topology_t ));
@@ -818,11 +833,16 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
818
833
tm_topology -> oversub_fact = 1 ;
819
834
820
835
#ifdef __DEBUG__
821
- assert (num_objs_in_node == tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
822
- fprintf (stdout ,"Levels in topo : %i | num procs in node : %i\n" ,tm_topology -> nb_levels ,num_procs_in_node );
823
- for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ){
824
- fprintf (stdout ,"Nb objs for level %i : %i | arity %i\n " ,i ,tm_topology -> nb_nodes [i ],tm_topology -> arity [i ]);
825
- dump_int_array ("" , "Obj id " , tm_topology -> node_id [i ], tm_topology -> nb_nodes [i ]);
836
+ assert (num_objs_in_node == (int )tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
837
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
838
+ "Levels in topo : %i | num procs in node : %i\n" ,
839
+ tm_topology -> nb_levels ,num_procs_in_node ));
840
+ for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ) {
841
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
842
+ "Nb objs for level %i : %lu | arity %i\n " ,
843
+ i , tm_topology -> nb_nodes [i ],tm_topology -> arity [i ]));
844
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
845
+ "" , "Obj id " , tm_topology -> node_id [i ], tm_topology -> nb_nodes [i ]);
826
846
}
827
847
tm_display_topology (tm_topology );
828
848
#endif
@@ -831,17 +851,20 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
831
851
comm_tree = tm_build_tree_from_topology (tm_topology ,aff_mat , NULL , NULL );
832
852
sol = tm_compute_mapping (tm_topology , comm_tree );
833
853
834
- assert ((int )sol -> k_length == num_procs_in_node );
854
+ assert ((int )sol -> k_length == num_objs_in_node );
835
855
836
856
k = (int * )calloc (sol -> k_length , sizeof (int ));
837
857
for (idx = 0 ; idx < (int )sol -> k_length ; idx ++ )
838
858
k [idx ] = sol -> k [idx ][0 ];
839
859
840
860
#ifdef __DEBUG__
841
- fprintf (stdout ,"====> nb levels : %i\n" ,tm_topology -> nb_levels );
842
- dump_int_array ("Rank permutation sigma/k : " , "" , k , num_procs_in_node );
843
- assert (num_procs_in_node == sol -> sigma_length );
844
- dump_int_array ("Matching : " , "" , sol -> sigma , sol -> sigma_length );
861
+ OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
862
+ "====> nb levels : %i\n" ,tm_topology -> nb_levels ));
863
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
864
+ "Rank permutation sigma/k : " , "" , k , num_procs_in_node );
865
+ assert (num_procs_in_node == (int )sol -> sigma_length );
866
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
867
+ "Matching : " , "" , sol -> sigma , sol -> sigma_length );
845
868
#endif
846
869
847
870
free (aff_mat -> sum_row );
0 commit comments