26
26
27
27
#include "rml_ofi_request.h"
28
28
29
+ /** the maximum open OFI ofi_prov - assuming system will have no more than 20 transports*/
30
+ #define MAX_OFI_PROVIDERS 40
31
+ #define RML_OFI_PROV_ID_INVALID 0xFF
29
32
30
33
/** RML/OFI key values **/
31
34
/* (char*) ofi socket address (type IN) of the node process is running on */
32
- #define OPAL_RML_OFI_FI_SOCKADDR_IN "rml.ofi.fisockaddrin"
35
+ #define OPAL_RML_OFI_FI_SOCKADDR_IN "rml.ofi.fisockaddrin"
33
36
/* (char*) ofi socket address (type PSM) of the node process is running on */
34
- #define OPAL_RML_OFI_FI_ADDR_PSMX "rml.ofi.fiaddrpsmx"
37
+ #define OPAL_RML_OFI_FI_ADDR_PSMX "rml.ofi.fiaddrpsmx"
35
38
36
39
// MULTI_BUF_SIZE_FACTOR defines how large the multi recv buffer will be.
37
40
// In order to use FI_MULTI_RECV feature efficiently, we need to have a
40
43
#define MULTI_BUF_SIZE_FACTOR 128
41
44
#define MIN_MULTI_BUF_SIZE (1024 * 1024)
42
45
46
+ #define OFIADDR "ofiaddr"
47
+
43
48
#define CLOSE_FID (fd ) \
44
49
do { \
45
50
int _ret = 0; \
@@ -72,8 +77,8 @@ and also the corresponding fi_info
72
77
**/
73
78
typedef struct {
74
79
75
- /** OFI conduit ID **/
76
- uint8_t conduit_id ;
80
+ /** ofi provider ID **/
81
+ uint8_t ofi_prov_id ;
77
82
78
83
/** fi_info for this transport */
79
84
struct fi_info * fabric_info ;
@@ -116,46 +121,52 @@ typedef struct {
116
121
117
122
struct fi_context rx_ctx1 ;
118
123
119
- /* module associated with this conduit_id returned to rml
120
- from open_conduit call */
121
- struct orte_rml_ofi_module_t * ofi_module ;
122
-
123
- } ofi_transport_conduit_t ;
124
+ } ofi_transport_ofi_prov_t ;
124
125
125
126
126
127
struct orte_rml_ofi_module_t {
127
128
orte_rml_base_module_t api ;
128
129
129
130
/** current ofi transport id the component is using, this will be initialised
130
- ** in the open_conduit () call **/
131
+ ** in the open_ofi_prov () call **/
131
132
int cur_transport_id ;
132
133
133
134
/** Fabric info structure of all supported transports in system **/
134
135
struct fi_info * fi_info_list ;
135
136
136
- /** OFI ep and corr fi_info for all the transports (conduit ) **/
137
- ofi_transport_conduit_t ofi_conduits [ MAX_CONDUIT ];
137
+ /** OFI ep and corr fi_info for all the transports (ofi_providers ) **/
138
+ ofi_transport_ofi_prov_t ofi_prov [ MAX_OFI_PROVIDERS ];
138
139
139
140
size_t min_ofi_recv_buf_sz ;
140
141
141
142
/** "Any source" address */
142
143
fi_addr_t any_addr ;
143
144
144
- /** number of conduits currently opened **/
145
- uint8_t conduit_open_num ;
145
+ /** number of ofi providers currently opened **/
146
+ uint8_t ofi_prov_open_num ;
146
147
147
148
/** Unique message id for every message that is fragmented to be sent over OFI **/
148
149
uint32_t cur_msgid ;
149
150
151
+ /* hashtable stores the peer addresses */
152
+ opal_hash_table_t peers ;
153
+
150
154
opal_list_t recv_msg_queue_list ;
151
155
opal_list_t queued_routing_messages ;
152
156
opal_event_t * timer_event ;
153
157
struct timeval timeout ;
154
158
} ;
155
159
typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t ;
156
160
161
+ typedef struct {
162
+ opal_object_t super ;
163
+ void * ofi_ep ;
164
+ size_t ofi_ep_len ;
165
+ } orte_rml_ofi_peer_t ;
166
+ OBJ_CLASS_DECLARATION (orte_rml_ofi_peer_t );
157
167
158
168
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ofi_component ;
169
+ extern orte_rml_ofi_module_t orte_rml_ofi ;
159
170
160
171
int orte_rml_ofi_send_buffer_nb (struct orte_rml_base_module_t * mod ,
161
172
orte_process_name_t * peer ,
@@ -172,8 +183,11 @@ int orte_rml_ofi_send_nb(struct orte_rml_base_module_t *mod,
172
183
void * cbdata );
173
184
174
185
/****************** INTERNAL OFI Functions*************/
175
- void free_conduit_resources ( int conduit_id );
186
+ void free_ofi_prov_resources ( int ofi_prov_id );
176
187
void print_provider_list_info (struct fi_info * fi );
188
+ void print_provider_info (struct fi_info * cur_fi );
189
+ int cq_progress_handler (int sd , short flags , void * cbdata );
190
+ int get_ofi_prov_id ( opal_list_t * attributes );
177
191
178
192
/** Send callback */
179
193
int orte_rml_ofi_send_callback (struct fi_cq_data_entry * wc ,
@@ -184,7 +198,7 @@ int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
184
198
orte_rml_ofi_request_t * );
185
199
186
200
/* OFI Recv handler */
187
- int orte_rml_ofi_recv_handler (struct fi_cq_data_entry * wc , uint8_t conduit_id );
201
+ int orte_rml_ofi_recv_handler (struct fi_cq_data_entry * wc , uint8_t ofi_prov_id );
188
202
189
203
END_C_DECLS
190
204
0 commit comments