Skip to content

Commit 94593ca

Browse files
committed
Adding ofi plugin to allow for opening a conduit to use ethernet/fabric.
modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/base/rml_base_stubs.c deleted: ../orte/mca/rml/ofi/.opal_ignore modified: ../orte/mca/rml/ofi/Makefile.am modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c modified: ../orte/test/system/ofi_conduit_stress.c Removed stale include directive modified: ../orte/mca/rml/ofi/Makefile.am The ofi plugin supports multiple providers, and identifies them by ofi_prov_id, changed the previous name conduit_id to ofi_prov_id modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_request.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c Adding ofi plugin to allow for opening a conduit to use ethernet/fabric. modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/base/rml_base_stubs.c deleted: ../orte/mca/rml/ofi/.opal_ignore modified: ../orte/mca/rml/ofi/Makefile.am modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c modified: ../orte/test/system/ofi_conduit_stress.c Removed stale include directive modified: ../orte/mca/rml/ofi/Makefile.am The ofi plugin supports multiple providers, and identifies them by ofi_prov_id, changed the previous name conduit_id to ofi_prov_id modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_request.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c Fixed merge issues, and minor pull-request comments modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c Adding ofi plugin to allow for opening a conduit to use ethernet/fabric. modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/base/rml_base_stubs.c deleted: ../orte/mca/rml/ofi/.opal_ignore modified: ../orte/mca/rml/ofi/Makefile.am modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c modified: ../orte/test/system/ofi_conduit_stress.c Removed stale include directive modified: ../orte/mca/rml/ofi/Makefile.am The ofi plugin supports multiple providers, and identifies them by ofi_prov_id, changed the previous name conduit_id to ofi_prov_id modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_request.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c Adding ofi plugin to allow for opening a conduit to use ethernet/fabric. modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/base/rml_base_stubs.c deleted: ../orte/mca/rml/ofi/.opal_ignore modified: ../orte/mca/rml/ofi/Makefile.am modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c modified: ../orte/test/system/ofi_conduit_stress.c Removed stale include directive modified: ../orte/mca/rml/ofi/Makefile.am Fixed merge issues, and minor pull-request comments modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c Removed trailing space modified: ../orte/mca/rml/ofi/rml_ofi_component.c Cleaned up test- ofi_conduit_stress.c modified: ../orte/test/system/ofi_conduit_stress.c cleaned up printing the provider info during initialisation modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c Signed-off-by: Anandhi S Jayakumar <[email protected]> Fixing warnings modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi S Jayakumar <[email protected]> minor cleanup modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi S Jayakumar <[email protected]> more cleanup modified: ../orte/mca/rml/ofi/rml_ofi_component.c Signed-off-by: Anandhi S Jayakumar <[email protected]> Sending the ethernet address only in the get_contact_info, rest will be sent through modex modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c Signed-off-by: Anandhi S Jayakumar <[email protected]> Adding error logging on failures modified: ../orte/mca/rml/ofi/rml_ofi_component.c Signed-off-by: Anandhi S Jayakumar <[email protected]> Handling the OPAL_MODEX_SEND/RECV generically for all ofi providers. modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi S Jayakumar <[email protected]> Adding to build ofi for limited people new file: ../orte/mca/rml/ofi/.opal_ignore new file: ../orte/mca/rml/ofi/.opal_unignore Signed-off-by: Anandhi S Jayakumar <[email protected]> Removign the error logging for now modified: ../orte/mca/rml/ofi/rml_ofi_component.c
1 parent 8cc3f28 commit 94593ca

File tree

8 files changed

+757
-637
lines changed

8 files changed

+757
-637
lines changed

orte/mca/rml/base/base.h

-2
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,6 @@ typedef struct {
145145
opal_object_t super;
146146
opal_event_t ev;
147147
orte_rml_send_t send;
148-
/* conduit_id */
149-
orte_rml_conduit_t conduit_id;
150148
} orte_rml_send_request_t;
151149
OBJ_CLASS_DECLARATION(orte_rml_send_request_t);
152150

orte/mca/rml/base/rml_base_stubs.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,8 @@ orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes)
7474
"%s rml:base:open_conduit Component %s provided a conduit",
7575
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
7676
active->component->base.mca_component_name);
77-
/* retain this answer */
78-
if (NULL != ourmod) {
79-
free(ourmod);
80-
}
8177
ourmod = mod;
78+
break;
8279
}
8380
}
8481
}
@@ -140,6 +137,9 @@ char* orte_rml_API_get_contact_info(void)
140137
} else {
141138
tmp = NULL;
142139
}
140+
opal_output_verbose(10,orte_rml_base_framework.framework_output,
141+
"%s rml:base:get_contact_info() returning -> %s",
142+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),tmp);
143143
return tmp;
144144
}
145145

orte/mca/rml/ofi/.opal_unignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
anandhis
2+
rhc

orte/mca/rml/ofi/rml_ofi.h

+30-16
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,15 @@
2626

2727
#include "rml_ofi_request.h"
2828

29+
/** the maximum open OFI ofi_prov - assuming system will have no more than 20 transports*/
30+
#define MAX_OFI_PROVIDERS 40
31+
#define RML_OFI_PROV_ID_INVALID 0xFF
2932

3033
/** RML/OFI key values **/
3134
/* (char*) ofi socket address (type IN) of the node process is running on */
32-
#define OPAL_RML_OFI_FI_SOCKADDR_IN "rml.ofi.fisockaddrin"
35+
#define OPAL_RML_OFI_FI_SOCKADDR_IN "rml.ofi.fisockaddrin"
3336
/* (char*) ofi socket address (type PSM) of the node process is running on */
34-
#define OPAL_RML_OFI_FI_ADDR_PSMX "rml.ofi.fiaddrpsmx"
37+
#define OPAL_RML_OFI_FI_ADDR_PSMX "rml.ofi.fiaddrpsmx"
3538

3639
// MULTI_BUF_SIZE_FACTOR defines how large the multi recv buffer will be.
3740
// In order to use FI_MULTI_RECV feature efficiently, we need to have a
@@ -40,6 +43,8 @@
4043
#define MULTI_BUF_SIZE_FACTOR 128
4144
#define MIN_MULTI_BUF_SIZE (1024 * 1024)
4245

46+
#define OFIADDR "ofiaddr"
47+
4348
#define CLOSE_FID(fd) \
4449
do { \
4550
int _ret = 0; \
@@ -72,8 +77,8 @@ and also the corresponding fi_info
7277
**/
7378
typedef struct {
7479

75-
/** OFI conduit ID **/
76-
uint8_t conduit_id;
80+
/** ofi provider ID **/
81+
uint8_t ofi_prov_id;
7782

7883
/** fi_info for this transport */
7984
struct fi_info *fabric_info;
@@ -116,46 +121,52 @@ typedef struct {
116121

117122
struct fi_context rx_ctx1;
118123

119-
/* module associated with this conduit_id returned to rml
120-
from open_conduit call */
121-
struct orte_rml_ofi_module_t *ofi_module;
122-
123-
} ofi_transport_conduit_t;
124+
} ofi_transport_ofi_prov_t;
124125

125126

126127
struct orte_rml_ofi_module_t {
127128
orte_rml_base_module_t api;
128129

129130
/** current ofi transport id the component is using, this will be initialised
130-
** in the open_conduit() call **/
131+
** in the open_ofi_prov() call **/
131132
int cur_transport_id;
132133

133134
/** Fabric info structure of all supported transports in system **/
134135
struct fi_info *fi_info_list;
135136

136-
/** OFI ep and corr fi_info for all the transports (conduit) **/
137-
ofi_transport_conduit_t ofi_conduits[MAX_CONDUIT];
137+
/** OFI ep and corr fi_info for all the transports (ofi_providers) **/
138+
ofi_transport_ofi_prov_t ofi_prov[MAX_OFI_PROVIDERS];
138139

139140
size_t min_ofi_recv_buf_sz;
140141

141142
/** "Any source" address */
142143
fi_addr_t any_addr;
143144

144-
/** number of conduits currently opened **/
145-
uint8_t conduit_open_num;
145+
/** number of ofi providers currently opened **/
146+
uint8_t ofi_prov_open_num;
146147

147148
/** Unique message id for every message that is fragmented to be sent over OFI **/
148149
uint32_t cur_msgid;
149150

151+
/* hashtable stores the peer addresses */
152+
opal_hash_table_t peers;
153+
150154
opal_list_t recv_msg_queue_list;
151155
opal_list_t queued_routing_messages;
152156
opal_event_t *timer_event;
153157
struct timeval timeout;
154158
} ;
155159
typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t;
156160

161+
typedef struct {
162+
opal_object_t super;
163+
void* ofi_ep;
164+
size_t ofi_ep_len;
165+
} orte_rml_ofi_peer_t;
166+
OBJ_CLASS_DECLARATION(orte_rml_ofi_peer_t);
157167

158168
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ofi_component;
169+
extern orte_rml_ofi_module_t orte_rml_ofi;
159170

160171
int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod,
161172
orte_process_name_t* peer,
@@ -172,8 +183,11 @@ int orte_rml_ofi_send_nb(struct orte_rml_base_module_t *mod,
172183
void* cbdata);
173184

174185
/****************** INTERNAL OFI Functions*************/
175-
void free_conduit_resources( int conduit_id);
186+
void free_ofi_prov_resources( int ofi_prov_id);
176187
void print_provider_list_info (struct fi_info *fi );
188+
void print_provider_info (struct fi_info *cur_fi );
189+
int cq_progress_handler(int sd, short flags, void *cbdata);
190+
int get_ofi_prov_id( opal_list_t *attributes);
177191

178192
/** Send callback */
179193
int orte_rml_ofi_send_callback(struct fi_cq_data_entry *wc,
@@ -184,7 +198,7 @@ int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
184198
orte_rml_ofi_request_t*);
185199

186200
/* OFI Recv handler */
187-
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t conduit_id);
201+
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id);
188202

189203
END_C_DECLS
190204

0 commit comments

Comments
 (0)