Skip to content

Commit e1d31c7

Browse files
committed
Provide a better fix for #6285.
The issue was a little complicated due to the internal stack used in the convertor. The main issue was that in the case where we run out of iov space to save the raw description of the data while hanbdling a repetition (loop), instead of saving the current position and bailing out directly we reading of the next predefined type element. It worked in most cases, except the one identified by the HDF5 test. However, the biggest issue here was the drop in performance for all ensuing calls to the convertor pack/unpack, as instead of handling contiguous loops as a whole (and minimizing the number of memory copies) we copied data description by data description. Signed-off-by: George Bosilca <[email protected]>
1 parent 29915fc commit e1d31c7

File tree

2 files changed

+54
-17
lines changed

2 files changed

+54
-17
lines changed

opal/datatype/opal_convertor_raw.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
102102
/* now here we have a basic datatype */
103103
OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf,
104104
pConvertor->pDesc, pConvertor->count );
105-
DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %lu}\n",
105+
DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %" PRIsize_t "}\n",
106106
index, (void*)source_base, (unsigned long)blength ); );
107107
iov[index].iov_base = (IOVBASE_TYPE *) source_base;
108108
iov[index].iov_len = blength;
@@ -115,7 +115,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
115115
for(size_t i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) {
116116
OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf,
117117
pConvertor->pDesc, pConvertor->count );
118-
DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %lu}\n",
118+
DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %" PRIsize_t "}\n",
119119
index, (void*)source_base, (unsigned long)blength ); );
120120
iov[index].iov_base = (IOVBASE_TYPE *) source_base;
121121
iov[index].iov_len = blength;
@@ -189,13 +189,20 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
189189
source_base += pElem->loop.extent;
190190
raw_data += end_loop->size;
191191
count_desc--;
192+
DO_DEBUG( opal_output( 0, "raw contig loop generate iov[%d] = {base %p, length %" PRIsize_t "}"
193+
"space %lu [pos_desc %d]\n",
194+
index, iov[index].iov_base, iov[index].iov_len,
195+
(unsigned long)raw_data, pos_desc ); );
192196
}
193197
source_base -= offset;
194198
if( 0 == count_desc ) { /* completed */
195199
pos_desc += pElem->loop.items + 1;
196200
goto update_loop_description;
197201
}
198202
}
203+
if( index == *iov_count ) { /* all iov have been filled, we need to bail out */
204+
goto complete_loop;
205+
}
199206
local_disp = (ptrdiff_t)source_base - local_disp;
200207
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc,
201208
pStack->disp + local_disp);

test/datatype/ddt_raw2.c

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2019 The University of Tennessee and The University
4+
* of Tennessee Research Foundation. All rights
5+
* reserved.
6+
* Copyright (c) 2019 Research Organization for Information Science
7+
* and Technology (RIST). All rights reserved.
8+
* $COPYRIGHT$
9+
*
10+
* Additional copyrights may follow
11+
*
12+
* $HEADER$
13+
*/
14+
115
#include "ompi_config.h"
216
#include "ddt_lib.h"
317
#include "opal/datatype/opal_convertor.h"
@@ -12,11 +26,12 @@
1226
#include <stdio.h>
1327

1428

15-
int mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
16-
int count,
17-
struct iovec **iov,
18-
uint32_t *iovec_count,
19-
int increment)
29+
static int
30+
mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
31+
int count,
32+
struct iovec **iov,
33+
uint32_t *iovec_count,
34+
int increment)
2035
{
2136

2237

@@ -310,20 +325,35 @@ int main (int argc, char *argv[]) {
310325
datatype->super.opt_desc.used = 184;
311326
datatype->super.opt_desc.desc = descs;
312327

313-
uint32_t iovec_count = 0;
314-
struct iovec * iov = NULL;
315-
mca_common_ompio_decode_datatype ( datatype, 1, &iov, &iovec_count, 300);
316-
uint32_t iovec_count2 = 0;
317-
struct iovec * iov2 = NULL;
318-
mca_common_ompio_decode_datatype ( datatype, 1, &iov2, &iovec_count2, 100);
328+
/* Get the entire raw description of the datatype in a single call */
329+
uint32_t iovec_count_300 = 0;
330+
struct iovec * iov_300 = NULL;
331+
mca_common_ompio_decode_datatype ( datatype, 1, &iov_300, &iovec_count_300, 300);
332+
/* Get the raw description of the datatype 10 elements at the time. This stresses some
333+
* of the execution paths in the convertor raw.
334+
*/
335+
uint32_t iovec_count_10 = 0;
336+
struct iovec * iov_10 = NULL;
337+
mca_common_ompio_decode_datatype ( datatype, 1, &iov_10, &iovec_count_10, 10);
338+
/* Get the raw description of the datatype one element at the time. This stresses all
339+
* execution paths in the convertor raw.
340+
*/
341+
uint32_t iovec_count_1 = 0;
342+
struct iovec * iov_1 = NULL;
343+
mca_common_ompio_decode_datatype ( datatype, 1, &iov_1, &iovec_count_1, 1);
344+
319345

320-
assert(iovec_count == iovec_count2);
346+
assert(iovec_count_300 == iovec_count_10);
347+
assert(iovec_count_300 == iovec_count_1);
321348
// assert(iov[100].iov_base == iov2[100].iov_base);
322349
// assert(iov[100].iov_len == iov2[100].iov_len);
323-
for (int i=0; i<iovec_count; i++) {
324-
assert(iov[i].iov_base == iov2[i].iov_base);
325-
assert(iov[i].iov_len == iov2[i].iov_len);
350+
for (uint32_t i = 0; i < iovec_count_300; i++) {
351+
assert(iov_300[i].iov_base == iov_10[i].iov_base);
352+
assert(iov_300[i].iov_len == iov_10[i].iov_len);
353+
assert(iov_300[i].iov_base == iov_1[i].iov_base);
354+
assert(iov_300[i].iov_len == iov_1[i].iov_len);
326355
}
327356

328357
return 0;
329358
}
359+

0 commit comments

Comments
 (0)