Skip to content

Commit a69a84e

Browse files
authored
Merge pull request #4599 from hjelmn/v3.0.x_vader
v3.0.x: btl/vader: change the way fast boxes are used
2 parents f547296 + 2fa7168 commit a69a84e

File tree

5 files changed

+19
-66
lines changed

5 files changed

+19
-66
lines changed

opal/mca/btl/vader/btl_vader_fbox.h

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ typedef union mca_btl_vader_fbox_hdr_t {
2222
* in multiple instructions. To ensure that seq is never loaded before tag
2323
* and the tag is never read before seq put them in the same 32-bits of the
2424
* header. */
25+
/** message size */
26+
uint32_t size;
2527
/** message tag */
2628
uint16_t tag;
2729
/** sequence number */
2830
uint16_t seq;
29-
/** message size */
30-
uint32_t size;
3131
} data;
3232
uint64_t ival;
3333
} mca_btl_vader_fbox_hdr_t;
@@ -52,20 +52,24 @@ static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr,
5252
{
5353
mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}};
5454
hdr->ival = tmp.ival;
55+
opal_atomic_wmb ();
5556
}
5657

5758
/* attempt to reserve a contiguous segment from the remote ep */
58-
static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t *ep, size_t size)
59+
static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsigned char tag,
60+
void * restrict header, const size_t header_size,
61+
void * restrict payload, const size_t payload_size)
5962
{
6063
const unsigned int fbox_size = mca_btl_vader_component.fbox_size;
64+
size_t size = header_size + payload_size;
6165
unsigned int start, end, buffer_free;
6266
size_t data_size = size;
63-
unsigned char *dst;
67+
unsigned char *dst, *data;
6468
bool hbs, hbm;
6569

6670
/* don't try to use the per-peer buffer for messages that will fill up more than 25% of the buffer */
6771
if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer || size > (fbox_size >> 2))) {
68-
return NULL;
72+
return false;
6973
}
7074

7175
OPAL_THREAD_LOCK(&ep->lock);
@@ -119,15 +123,23 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
119123
ep->fbox_out.end = (hbs << 31) | end;
120124
opal_atomic_wmb ();
121125
OPAL_THREAD_UNLOCK(&ep->lock);
122-
return NULL;
126+
return false;
123127
}
124128
}
125129

126130
BTL_VERBOSE(("writing fragment of size %u to offset %u {start: 0x%x, end: 0x%x (hbs: %d)} of peer's buffer. free = %u",
127131
(unsigned int) size, end, start, end, hbs, buffer_free));
128132

133+
data = dst + sizeof (mca_btl_vader_fbox_hdr_t);
134+
135+
memcpy (data, header, header_size);
136+
if (payload) {
137+
/* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
138+
memcpy (data + header_size, payload, payload_size);
139+
}
140+
129141
/* write out part of the header now. the tag will be written when the data is available */
130-
mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0, ep->fbox_out.seq++, data_size);
142+
mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), tag, ep->fbox_out.seq++, data_size);
131143

132144
end += size;
133145

@@ -145,40 +157,6 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
145157
opal_atomic_wmb ();
146158
OPAL_THREAD_UNLOCK(&ep->lock);
147159

148-
return dst + sizeof (mca_btl_vader_fbox_hdr_t);
149-
}
150-
151-
static inline void mca_btl_vader_fbox_send (unsigned char * restrict fbox, unsigned char tag)
152-
{
153-
/* ensure data writes have completed before we mark the data as available */
154-
opal_atomic_wmb ();
155-
156-
/* the header proceeds the fbox buffer */
157-
MCA_BTL_VADER_FBOX_HDR ((intptr_t) fbox)[-1].data.tag = tag;
158-
}
159-
160-
static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsigned char tag,
161-
void * restrict header, const size_t header_size,
162-
void * restrict payload, const size_t payload_size)
163-
{
164-
const size_t total_size = header_size + payload_size;
165-
unsigned char * restrict fbox;
166-
167-
fbox = mca_btl_vader_reserve_fbox(ep, total_size);
168-
if (OPAL_UNLIKELY(NULL == fbox)) {
169-
return false;
170-
}
171-
172-
memcpy (fbox, header, header_size);
173-
if (payload) {
174-
/* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
175-
memcpy (fbox + header_size, payload, payload_size);
176-
}
177-
178-
/* mark the fbox as sent */
179-
mca_btl_vader_fbox_send (fbox, tag);
180-
181-
/* send complete */
182160
return true;
183161
}
184162

opal/mca/btl/vader/btl_vader_frag.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag)
3636

3737
frag->base.des_segments = frag->segments;
3838
frag->base.des_segment_count = 1;
39-
frag->fbox = NULL;
4039
}
4140

4241
int mca_btl_vader_frag_init (opal_free_list_item_t *item, void *ctx)

opal/mca/btl/vader/btl_vader_frag.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ struct mca_btl_vader_frag_t {
6767
mca_btl_base_segment_t segments[2];
6868
/** endpoint this fragment is active on */
6969
struct mca_btl_base_endpoint_t *endpoint;
70-
/** fast box in use (or NULL) */
71-
unsigned char * restrict fbox;
7270
/** fragment header (in the shared memory region) */
7371
mca_btl_vader_hdr_t *hdr;
7472
/** free list this fragment was allocated within */
@@ -95,7 +93,6 @@ static inline void mca_btl_vader_frag_return (mca_btl_vader_frag_t *frag)
9593

9694
frag->segments[0].seg_addr.pval = (char *)(frag->hdr + 1);
9795
frag->base.des_segment_count = 1;
98-
frag->fbox = NULL;
9996

10097
opal_free_list_return (frag->my_list, (opal_free_list_item_t *)frag);
10198
}

opal/mca/btl/vader/btl_vader_module.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,6 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
440440
{
441441
const size_t total_size = reserve + *size;
442442
mca_btl_vader_frag_t *frag;
443-
unsigned char *fbox;
444443
void *data_ptr;
445444
int rc;
446445

@@ -506,19 +505,6 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
506505
frag->base.des_segment_count = 2;
507506
} else {
508507
#endif
509-
510-
/* inline send */
511-
if (OPAL_LIKELY(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP & flags)) {
512-
/* try to reserve a fast box for this transfer only if the
513-
* fragment does not belong to the caller */
514-
fbox = mca_btl_vader_reserve_fbox (endpoint, total_size);
515-
if (OPAL_LIKELY(fbox)) {
516-
frag->segments[0].seg_addr.pval = fbox;
517-
}
518-
519-
frag->fbox = fbox;
520-
}
521-
522508
/* NTH: the covertor adds some latency so we bypass it here */
523509
memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve), data_ptr, *size);
524510
frag->segments[0].seg_len = total_size;

opal/mca/btl/vader/btl_vader_send.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,6 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
4242
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
4343
const size_t total_size = frag->segments[0].seg_len;
4444

45-
if (OPAL_LIKELY(frag->fbox)) {
46-
mca_btl_vader_fbox_send (frag->fbox, tag);
47-
mca_btl_vader_frag_complete (frag);
48-
49-
return 1;
50-
}
51-
5245
/* in order to work around a long standing ob1 bug (see #3845) we have to always
5346
* make the callback. once this is fixed in ob1 we can restore the code below. */
5447
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;

0 commit comments

Comments
 (0)