Skip to content

Commit ebd1404

Browse files
committed
[COLL/LIBNBC] Upgrade dynamic rules support
Signed-off-by: Thomas Goncalves <[email protected]>
1 parent 8aca231 commit ebd1404

File tree

13 files changed

+928
-261
lines changed

13 files changed

+928
-261
lines changed

ompi/mca/coll/libnbc/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
# $HEADER$
2323
#
2424

25+
dist_ompidata_DATA = help-mpi-coll-libnbc.txt
26+
2527
sources = \
2628
coll_libnbc.h \
2729
coll_libnbc_component.c \

ompi/mca/coll/libnbc/coll_libnbc.h

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
* and Technology (RIST). All rights reserved.
1818
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
1919
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
20+
* Copyright (c) 2020 Bull SAS. All rights reserved.
2021
* $COPYRIGHT$
2122
*
2223
* Additional copyrights may follow
@@ -30,6 +31,8 @@
3031
#include "ompi/mca/coll/coll.h"
3132
#include "ompi/mca/coll/base/coll_base_util.h"
3233
#include "opal/sys/atomic.h"
34+
#include "ompi/mca/coll/base/coll_base_functions.h"
35+
#include "ompi/mca/coll/base/coll_base_dynamic_rules.h"
3336

3437
BEGIN_C_DECLS
3538

@@ -69,21 +72,29 @@ BEGIN_C_DECLS
6972
/* number of implemented collective functions */
7073
#define NBC_NUM_COLL 17
7174

72-
extern bool libnbc_ibcast_skip_dt_decision;
73-
extern int libnbc_iallgather_algorithm;
74-
extern int libnbc_iallreduce_algorithm;
75-
extern int libnbc_ibcast_algorithm;
76-
extern int libnbc_ibcast_knomial_radix;
77-
extern int libnbc_iexscan_algorithm;
78-
extern int libnbc_ireduce_algorithm;
79-
extern int libnbc_iscan_algorithm;
75+
/* forced algorithm choices */
76+
/* this structure is for storing the indexes to the forced algorithm mca params... */
77+
/* we get these at component query (so that registered values appear in ompi_info) */
78+
struct coll_libnbc_force_algorithm_mca_param_indices_t {
79+
int algorithm; /* which algorithm you want to force */
80+
int segsize;
81+
int topo;
82+
};
83+
typedef struct coll_libnbc_force_algorithm_mca_param_indices_t coll_libnbc_force_algorithm_mca_param_indices_t;
8084

8185
struct ompi_coll_libnbc_component_t {
8286
mca_coll_base_component_2_0_0_t super;
8387
opal_free_list_t requests;
8488
opal_list_t active_requests;
8589
opal_atomic_int32_t active_comms;
8690
opal_mutex_t lock; /* protect access to the active_requests list */
91+
int dynamic_rules_verbose;
92+
int stream;
93+
coll_libnbc_force_algorithm_mca_param_indices_t forced_params[COLLCOUNT];
94+
/* cached decision table stuff */
95+
ompi_coll_base_alg_rule_t *all_base_rules;
96+
int dynamic_rules_fileformat;
97+
char* dynamic_rules_filename;
8798
};
8899
typedef struct ompi_coll_libnbc_component_t ompi_coll_libnbc_component_t;
89100

@@ -94,6 +105,9 @@ struct ompi_coll_libnbc_module_t {
94105
mca_coll_base_module_t super;
95106
opal_mutex_t mutex;
96107
bool comm_registered;
108+
109+
/* the communicator rules for each MPI collective for ONLY my comsize */
110+
ompi_coll_base_com_rule_t *com_rules[COLLCOUNT];
97111
#ifdef NBC_CACHE_SCHEDULE
98112
void *NBC_Dict[NBC_NUM_COLL]; /* this should point to a struct
99113
hb_tree, but since this is a
@@ -160,6 +174,27 @@ int ompi_coll_libnbc_progress(void);
160174
int NBC_Init_comm(MPI_Comm comm, ompi_coll_libnbc_module_t *module);
161175
int NBC_Progress(NBC_Handle *handle);
162176

177+
int ompi_coll_libnbc_allgather_check_forced_init (void);
178+
int ompi_coll_libnbc_allreduce_check_forced_init (void);
179+
int ompi_coll_libnbc_alltoall_check_forced_init (void);
180+
int ompi_coll_libnbc_alltoallv_check_forced_init (void);
181+
int ompi_coll_libnbc_alltoallw_check_forced_init (void);
182+
int ompi_coll_libnbc_barrier_check_forced_init (void);
183+
int ompi_coll_libnbc_bcast_check_forced_init (void);
184+
int ompi_coll_libnbc_exscan_check_forced_init (void);
185+
int ompi_coll_libnbc_gather_check_forced_init (void);
186+
int ompi_coll_libnbc_gatherv_check_forced_init (void);
187+
int ompi_coll_libnbc_reduce_check_forced_init (void);
188+
int ompi_coll_libnbc_reduce_scatter_check_forced_init (void);
189+
int ompi_coll_libnbc_reduce_scatter_block_check_forced_init (void);
190+
int ompi_coll_libnbc_scan_check_forced_init (void);
191+
int ompi_coll_libnbc_scatter_check_forced_init (void);
192+
int ompi_coll_libnbc_scatterv_check_forced_init (void);
193+
int ompi_coll_libnbc_neighbor_allgather_check_forced_init (void);
194+
int ompi_coll_libnbc_neighbor_allgatherv_check_forced_init (void);
195+
int ompi_coll_libnbc_neighbor_alltoall_check_forced_init (void);
196+
int ompi_coll_libnbc_neighbor_alltoallv_check_forced_init (void);
197+
int ompi_coll_libnbc_neighbor_alltoallw_check_forced_init (void);
163198

164199
int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
165200
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,

ompi/mca/coll/libnbc/coll_libnbc_component.c

Lines changed: 95 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* Copyright (c) 2017 Ian Bradley Morgan and Anthony Skjellum. All
2020
* rights reserved.
2121
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
22+
* Copyright (c) 2020 Bull SAS. All rights reserved.
2223
* $COPYRIGHT$
2324
*
2425
* Additional copyrights may follow
@@ -34,6 +35,8 @@
3435
#include "mpi.h"
3536
#include "ompi/mca/coll/coll.h"
3637
#include "ompi/communicator/communicator.h"
38+
#include "ompi/mca/coll/base/coll_base_dynamic_file.h"
39+
#include "opal/util/show_help.h"
3740

3841
/*
3942
* Public string showing the coll ompi_libnbc component version number
@@ -44,61 +47,6 @@ const char *mca_coll_libnbc_component_version_string =
4447

4548
static int libnbc_priority = 10;
4649
static bool libnbc_in_progress = false; /* protect from recursive calls */
47-
bool libnbc_ibcast_skip_dt_decision = true;
48-
49-
int libnbc_iallgather_algorithm = 0; /* iallgather user forced algorithm */
50-
static mca_base_var_enum_value_t iallgather_algorithms[] = {
51-
{0, "ignore"},
52-
{1, "linear"},
53-
{2, "recursive_doubling"},
54-
{0, NULL}
55-
};
56-
57-
int libnbc_iallreduce_algorithm = 0; /* iallreduce user forced algorithm */
58-
static mca_base_var_enum_value_t iallreduce_algorithms[] = {
59-
{0, "ignore"},
60-
{1, "ring"},
61-
{2, "binomial"},
62-
{3, "rabenseifner"},
63-
{4, "recursive_doubling"},
64-
{0, NULL}
65-
};
66-
67-
int libnbc_ibcast_algorithm = 0; /* ibcast user forced algorithm */
68-
int libnbc_ibcast_knomial_radix = 4;
69-
static mca_base_var_enum_value_t ibcast_algorithms[] = {
70-
{0, "ignore"},
71-
{1, "linear"},
72-
{2, "binomial"},
73-
{3, "chain"},
74-
{4, "knomial"},
75-
{0, NULL}
76-
};
77-
78-
int libnbc_iexscan_algorithm = 0; /* iexscan user forced algorithm */
79-
static mca_base_var_enum_value_t iexscan_algorithms[] = {
80-
{0, "ignore"},
81-
{1, "linear"},
82-
{2, "recursive_doubling"},
83-
{0, NULL}
84-
};
85-
86-
int libnbc_ireduce_algorithm = 0; /* ireduce user forced algorithm */
87-
static mca_base_var_enum_value_t ireduce_algorithms[] = {
88-
{0, "ignore"},
89-
{1, "chain"},
90-
{2, "binomial"},
91-
{3, "rabenseifner"},
92-
{0, NULL}
93-
};
94-
95-
int libnbc_iscan_algorithm = 0; /* iscan user forced algorithm */
96-
static mca_base_var_enum_value_t iscan_algorithms[] = {
97-
{0, "ignore"},
98-
{1, "linear"},
99-
{2, "recursive_doubling"},
100-
{0, NULL}
101-
};
10250

10351
static int libnbc_open(void);
10452
static int libnbc_close(void);
@@ -145,6 +93,38 @@ static int
14593
libnbc_open(void)
14694
{
14795
int ret;
96+
if (mca_coll_libnbc_component.dynamic_rules_verbose > 0) {
97+
mca_coll_libnbc_component.stream = opal_output_open(NULL);
98+
opal_output_set_verbosity(mca_coll_libnbc_component.stream, mca_coll_libnbc_component.dynamic_rules_verbose);
99+
} else {
100+
mca_coll_libnbc_component.stream = -1;
101+
}
102+
if(mca_coll_libnbc_component.dynamic_rules_filename ) {
103+
int rc;
104+
opal_output_verbose(10, mca_coll_libnbc_component.stream ,
105+
"coll:libnbc:component_open Reading collective rules file [%s] which format is %d",
106+
mca_coll_libnbc_component.dynamic_rules_filename,
107+
mca_coll_libnbc_component.dynamic_rules_fileformat);
108+
rc = ompi_coll_base_read_rules_config_file( mca_coll_libnbc_component.dynamic_rules_filename,
109+
mca_coll_libnbc_component.dynamic_rules_fileformat,
110+
&(mca_coll_libnbc_component.all_base_rules), COLLCOUNT);
111+
if( rc >= 0 ) {
112+
opal_output_verbose(10, mca_coll_libnbc_component.stream ,"coll:libnbc:module_open Read %d valid rules\n", rc);
113+
if(ompi_coll_base_framework.framework_verbose >= 50) {
114+
ompi_coll_base_dump_all_rules (mca_coll_libnbc_component.all_base_rules, COLLCOUNT);
115+
}
116+
} else {
117+
opal_output_verbose(1, mca_coll_libnbc_component.stream ,"coll:libnbc:module_open Reading collective rules file failed\n");
118+
char error_name[12];
119+
sprintf(error_name,"file fail%1d", rc);
120+
error_name[11] = '\0';
121+
opal_show_help("help-mpi-coll-libnbc.txt", (const char*)error_name, true,
122+
mca_coll_libnbc_component.dynamic_rules_filename, mca_coll_libnbc_component.dynamic_rules_fileformat);
123+
mca_coll_libnbc_component.all_base_rules = NULL;
124+
}
125+
} else {
126+
mca_coll_libnbc_component.all_base_rules = NULL;
127+
}
148128

149129
OBJ_CONSTRUCT(&mca_coll_libnbc_component.requests, opal_free_list_t);
150130
OBJ_CONSTRUCT(&mca_coll_libnbc_component.active_requests, opal_list_t);
@@ -173,6 +153,14 @@ libnbc_close(void)
173153
OBJ_DESTRUCT(&mca_coll_libnbc_component.active_requests);
174154
OBJ_DESTRUCT(&mca_coll_libnbc_component.lock);
175155

156+
if( NULL != mca_coll_libnbc_component.all_base_rules ) {
157+
ompi_coll_base_free_all_rules(mca_coll_libnbc_component.all_base_rules, COLLCOUNT);
158+
mca_coll_libnbc_component.all_base_rules = NULL;
159+
}
160+
/* close stream */
161+
if(mca_coll_libnbc_component.stream >= 0) {
162+
opal_output_close(mca_coll_libnbc_component.stream);
163+
}
176164
return OMPI_SUCCESS;
177165
}
178166

@@ -191,94 +179,42 @@ libnbc_register(void)
191179
MCA_BASE_VAR_SCOPE_READONLY,
192180
&libnbc_priority);
193181

194-
/* ibcast decision function can make the wrong decision if a legal
195-
* non-uniform data type signature is used. This has resulted in the
196-
* collective operation failing, and possibly producing wrong answers.
197-
* We are investigating a fix for this problem, but it is taking a while.
198-
* https://github.com/open-mpi/ompi/issues/2256
199-
* https://github.com/open-mpi/ompi/issues/1763
200-
* As a result we are adding an MCA parameter to make a conservative
201-
* decision to avoid this issue. If the user knows that their application
202-
* does not use data types in this way, then they can set this parameter
203-
* to get the old behavior. Once the issue is truely fixed, then this
204-
* parameter can be removed.
205-
*/
206-
libnbc_ibcast_skip_dt_decision = true;
207-
(void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
208-
"ibcast_skip_dt_decision",
209-
"In ibcast only use size of communicator to choose algorithm, exclude data type signature. Set to 'false' to use data type signature in decision. WARNING: If you set this to 'false' then your application should not use non-uniform data type signatures in calls to ibcast.",
210-
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
182+
mca_coll_libnbc_component.dynamic_rules_verbose = 0;
183+
(void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version, "dynamic_rules_verbose",
184+
"Verbose level of the libnbc coll component regarding on dynamic rules."
185+
" Examples: 0: no verbose, 1: selection errors, 10: selection output",
186+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
211187
OPAL_INFO_LVL_9,
212188
MCA_BASE_VAR_SCOPE_READONLY,
213-
&libnbc_ibcast_skip_dt_decision);
214-
215-
libnbc_iallgather_algorithm = 0;
216-
(void) mca_base_var_enum_create("coll_libnbc_iallgather_algorithms", iallgather_algorithms, &new_enum);
217-
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
218-
"iallgather_algorithm",
219-
"Which iallgather algorithm is used: 0 ignore, 1 linear, 2 recursive_doubling",
220-
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
221-
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
222-
&libnbc_iallgather_algorithm);
223-
OBJ_RELEASE(new_enum);
224-
225-
libnbc_iallreduce_algorithm = 0;
226-
(void) mca_base_var_enum_create("coll_libnbc_iallreduce_algorithms", iallreduce_algorithms, &new_enum);
227-
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
228-
"iallreduce_algorithm",
229-
"Which iallreduce algorithm is used: 0 ignore, 1 ring, 2 binomial, 3 rabenseifner, 4 recursive_doubling",
230-
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
231-
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
232-
&libnbc_iallreduce_algorithm);
233-
OBJ_RELEASE(new_enum);
234-
235-
libnbc_ibcast_algorithm = 0;
236-
(void) mca_base_var_enum_create("coll_libnbc_ibcast_algorithms", ibcast_algorithms, &new_enum);
237-
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
238-
"ibcast_algorithm",
239-
"Which ibcast algorithm is used: 0 ignore, 1 linear, 2 binomial, 3 chain, 4 knomial",
240-
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
241-
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
242-
&libnbc_ibcast_algorithm);
243-
OBJ_RELEASE(new_enum);
244-
245-
libnbc_ibcast_knomial_radix = 4;
189+
&mca_coll_libnbc_component.dynamic_rules_verbose);
190+
191+
mca_coll_libnbc_component.dynamic_rules_filename = NULL;
246192
(void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
247-
"ibcast_knomial_radix", "k-nomial tree radix for the ibcast algorithm (radix > 1)",
193+
"dynamic_rules_filename",
194+
"Filename of configuration file that contains the dynamic (@runtime) decision function rules",
195+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
196+
OPAL_INFO_LVL_6,
197+
MCA_BASE_VAR_SCOPE_READONLY,
198+
&mca_coll_libnbc_component.dynamic_rules_filename);
199+
200+
mca_coll_libnbc_component.dynamic_rules_fileformat = 0;
201+
(void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
202+
"dynamic_rules_fileformat",
203+
"Format of configuration file that contains the dynamic (@runtime) decision function rules. Accepted values are: 0 <comm_size, msg_size>, 1 <nodes_nb, comm_size, msg_size>",
248204
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
249-
OPAL_INFO_LVL_9,
205+
OPAL_INFO_LVL_6,
250206
MCA_BASE_VAR_SCOPE_READONLY,
251-
&libnbc_ibcast_knomial_radix);
252-
253-
libnbc_iexscan_algorithm = 0;
254-
(void) mca_base_var_enum_create("coll_libnbc_iexscan_algorithms", iexscan_algorithms, &new_enum);
255-
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
256-
"iexscan_algorithm",
257-
"Which iexscan algorithm is used: 0 ignore, 1 linear, 2 recursive_doubling",
258-
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
259-
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
260-
&libnbc_iexscan_algorithm);
261-
OBJ_RELEASE(new_enum);
262-
263-
libnbc_ireduce_algorithm = 0;
264-
(void) mca_base_var_enum_create("coll_libnbc_ireduce_algorithms", ireduce_algorithms, &new_enum);
265-
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
266-
"ireduce_algorithm",
267-
"Which ireduce algorithm is used: 0 ignore, 1 chain, 2 binomial, 3 rabenseifner",
268-
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
269-
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
270-
&libnbc_ireduce_algorithm);
271-
OBJ_RELEASE(new_enum);
272-
273-
libnbc_iscan_algorithm = 0;
274-
(void) mca_base_var_enum_create("coll_libnbc_iscan_algorithms", iscan_algorithms, &new_enum);
275-
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
276-
"iscan_algorithm",
277-
"Which iscan algorithm is used: 0 ignore, 1 linear, 2 recursive_doubling",
278-
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
279-
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
280-
&libnbc_iscan_algorithm);
281-
OBJ_RELEASE(new_enum);
207+
&mca_coll_libnbc_component.dynamic_rules_fileformat);
208+
209+
ompi_coll_libnbc_allgather_check_forced_init ();
210+
ompi_coll_libnbc_allreduce_check_forced_init ();
211+
ompi_coll_libnbc_alltoall_check_forced_init ();
212+
ompi_coll_libnbc_alltoallv_check_forced_init ();
213+
ompi_coll_libnbc_alltoallw_check_forced_init ();
214+
ompi_coll_libnbc_bcast_check_forced_init ();
215+
ompi_coll_libnbc_exscan_check_forced_init ();
216+
ompi_coll_libnbc_reduce_check_forced_init ();
217+
ompi_coll_libnbc_scan_check_forced_init ();
282218

283219
return OMPI_SUCCESS;
284220
}
@@ -417,6 +353,27 @@ static int
417353
libnbc_module_enable(mca_coll_base_module_t *module,
418354
struct ompi_communicator_t *comm)
419355
{
356+
ompi_coll_libnbc_module_t* nbc_module = (ompi_coll_libnbc_module_t*) module;
357+
int i;
358+
if(mca_coll_libnbc_component.all_base_rules) {
359+
int size, nnodes;
360+
/* Allocate the data that hangs off the communicator */
361+
if (OMPI_COMM_IS_INTER(comm)) {
362+
size = ompi_comm_remote_size(comm);
363+
} else {
364+
size = ompi_comm_size(comm);
365+
}
366+
/* Get the number of nodes in communicator */
367+
nnodes = ompi_coll_base_get_nnodes(comm);
368+
for(i=0;i<COLLCOUNT;i++) {
369+
nbc_module->com_rules[i] = ompi_coll_base_get_com_rule_ptr(mca_coll_libnbc_component.all_base_rules,
370+
i, nnodes, size );
371+
}
372+
} else {
373+
for(i=0;i<COLLCOUNT;i++) {
374+
nbc_module->com_rules[i] = NULL;
375+
}
376+
}
420377
/* All done */
421378
return OMPI_SUCCESS;
422379
}

0 commit comments

Comments
 (0)