@@ -22,12 +22,12 @@ typedef union mca_btl_vader_fbox_hdr_t {
22
22
* in multiple instructions. To ensure that seq is never loaded before tag
23
23
* and the tag is never read before seq put them in the same 32-bits of the
24
24
* header. */
25
+ /** message size */
26
+ uint32_t size ;
25
27
/** message tag */
26
28
uint16_t tag ;
27
29
/** sequence number */
28
30
uint16_t seq ;
29
- /** message size */
30
- uint32_t size ;
31
31
} data ;
32
32
uint64_t ival ;
33
33
} mca_btl_vader_fbox_hdr_t ;
@@ -52,20 +52,24 @@ static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr,
52
52
{
53
53
mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag , .seq = seq , .size = size }};
54
54
hdr -> ival = tmp .ival ;
55
+ opal_atomic_wmb ();
55
56
}
56
57
57
58
/* attempt to reserve a contiguous segment from the remote ep */
58
- static inline unsigned char * mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t * ep , size_t size )
59
+ static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t * ep , unsigned char tag ,
60
+ void * restrict header , const size_t header_size ,
61
+ void * restrict payload , const size_t payload_size )
59
62
{
60
63
const unsigned int fbox_size = mca_btl_vader_component .fbox_size ;
64
+ size_t size = header_size + payload_size ;
61
65
unsigned int start , end , buffer_free ;
62
66
size_t data_size = size ;
63
- unsigned char * dst ;
67
+ unsigned char * dst , * data ;
64
68
bool hbs , hbm ;
65
69
66
70
/* don't try to use the per-peer buffer for messages that will fill up more than 25% of the buffer */
67
71
if (OPAL_UNLIKELY (NULL == ep -> fbox_out .buffer || size > (fbox_size >> 2 ))) {
68
- return NULL ;
72
+ return false ;
69
73
}
70
74
71
75
OPAL_THREAD_LOCK (& ep -> lock );
@@ -119,15 +123,23 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
119
123
ep -> fbox_out .end = (hbs << 31 ) | end ;
120
124
opal_atomic_wmb ();
121
125
OPAL_THREAD_UNLOCK (& ep -> lock );
122
- return NULL ;
126
+ return false ;
123
127
}
124
128
}
125
129
126
130
BTL_VERBOSE (("writing fragment of size %u to offset %u {start: 0x%x, end: 0x%x (hbs: %d)} of peer's buffer. free = %u" ,
127
131
(unsigned int ) size , end , start , end , hbs , buffer_free ));
128
132
133
+ data = dst + sizeof (mca_btl_vader_fbox_hdr_t );
134
+
135
+ memcpy (data , header , header_size );
136
+ if (payload ) {
137
+ /* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
138
+ memcpy (data + header_size , payload , payload_size );
139
+ }
140
+
129
141
/* write out part of the header now. the tag will be written when the data is available */
130
- mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR (dst ), 0 , ep -> fbox_out .seq ++ , data_size );
142
+ mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR (dst ), tag , ep -> fbox_out .seq ++ , data_size );
131
143
132
144
end += size ;
133
145
@@ -145,40 +157,6 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
145
157
opal_atomic_wmb ();
146
158
OPAL_THREAD_UNLOCK (& ep -> lock );
147
159
148
- return dst + sizeof (mca_btl_vader_fbox_hdr_t );
149
- }
150
-
151
- static inline void mca_btl_vader_fbox_send (unsigned char * restrict fbox , unsigned char tag )
152
- {
153
- /* ensure data writes have completed before we mark the data as available */
154
- opal_atomic_wmb ();
155
-
156
- /* the header proceeds the fbox buffer */
157
- MCA_BTL_VADER_FBOX_HDR ((intptr_t ) fbox )[-1 ].data .tag = tag ;
158
- }
159
-
160
- static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t * ep , unsigned char tag ,
161
- void * restrict header , const size_t header_size ,
162
- void * restrict payload , const size_t payload_size )
163
- {
164
- const size_t total_size = header_size + payload_size ;
165
- unsigned char * restrict fbox ;
166
-
167
- fbox = mca_btl_vader_reserve_fbox (ep , total_size );
168
- if (OPAL_UNLIKELY (NULL == fbox )) {
169
- return false;
170
- }
171
-
172
- memcpy (fbox , header , header_size );
173
- if (payload ) {
174
- /* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
175
- memcpy (fbox + header_size , payload , payload_size );
176
- }
177
-
178
- /* mark the fbox as sent */
179
- mca_btl_vader_fbox_send (fbox , tag );
180
-
181
- /* send complete */
182
160
return true;
183
161
}
184
162
0 commit comments