From fb0796020e90ebb068bfda39f8af98c7146ebc9c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 25 Feb 2021 10:16:36 -0500 Subject: [PATCH] Fixing the partial pack unpack issue. When unpacking a partial predefined element check the boundaries of the description vector type, and adjust the memory pointer accordingly (to reflect not only when a single basic type was correctly unpacked, but also when an entire blocklen has been unpacked). Signed-off-by: George Bosilca --- opal/datatype/opal_datatype_unpack.c | 161 ++++++++++++------------- opal/datatype/opal_datatype_unpack.h | 89 +++++++------- test/datatype/Makefile.am | 8 +- test/datatype/partial.c | 171 +++++++++++++++++++++++++++ test/datatype/unpack_ooo.c | 81 +++++++++---- 5 files changed, 362 insertions(+), 148 deletions(-) create mode 100644 test/datatype/partial.c diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index 629c7762ec0..b97f2548b8a 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -153,7 +153,7 @@ int32_t opal_unpack_homogeneous_contig_function(opal_convertor_t *pConv, struct } } } - *out_size = iov_idx; /* we only reach this line after the for loop succesfully complete */ + *out_size = iov_idx; /* we only reach this line after the for loop successfully complete */ *max_data = pConv->bConverted - initial_bytes_converted; if (pConv->bConverted == pConv->local_size) { pConv->flags |= CONVERTOR_COMPLETED; @@ -173,63 +173,71 @@ int32_t opal_unpack_homogeneous_contig_function(opal_convertor_t *pConv, struct * change the content of the data (as in all conversions that require changing the size * of the exponent or mantissa). */ -static inline void opal_unpack_partial_datatype(opal_convertor_t *pConvertor, dt_elem_desc_t *pElem, - unsigned char *partial_data, - ptrdiff_t start_position, size_t length, - unsigned char **user_buffer) +static inline void +opal_unpack_partial_predefined(opal_convertor_t *pConvertor, const dt_elem_desc_t *pElem, + size_t *COUNT, unsigned char **packed, + unsigned char **memory, size_t *SPACE) { char unused_byte = 0x7F, saved_data[16]; unsigned char temporary[16], *temporary_buffer = temporary; - unsigned char *user_data = *user_buffer + pElem->elem.disp; - size_t count_desc = 1; + unsigned char *user_data = *memory + pElem->elem.disp; size_t data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; + unsigned char *partial_data = *packed; + ptrdiff_t start_position = pConvertor->partial_length; + size_t length = data_length - start_position; + size_t count_desc = 1; + dt_elem_desc_t single_elem = { .elem = { .common = pElem->elem.common, .count = 1, .blocklen = 1, + .extent = data_length, /* advance by a full data element */ + .disp = 0 /* right where the pointer is */ } }; + if( *SPACE < length ) { + length = *SPACE; + } - DO_DEBUG(opal_output(0, - "unpack partial data start %lu end %lu data_length %lu user %p\n" - "\tbConverted %lu total_length %lu count %ld\n", - (unsigned long) start_position, (unsigned long) start_position + length, - (unsigned long) data_length, (void *) *user_buffer, - (unsigned long) pConvertor->bConverted, - (unsigned long) pConvertor->local_size, pConvertor->count);); - - /* Find a byte that is not used in the partial buffer */ -find_unused_byte: - for (size_t i = 0; i < length; i++) { - if (unused_byte == partial_data[i]) { + DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n" + "\tbConverted %lu total_length %lu count %ld\n", + (unsigned long)start_position, (unsigned long)start_position + length, + (unsigned long)data_length, (void*)*memory, + (unsigned long)pConvertor->bConverted, + (unsigned long)pConvertor->local_size, pConvertor->count ); ); + COMPUTE_CSUM( partial_data, length, pConvertor ); + + /* Find a byte value that is not used in the partial buffer. We use it as a marker + * to identify what has not been modified by the unpack call. */ + find_unused_byte: + for (size_t i = 0; i < length; i++ ) { + if( unused_byte == partial_data[i] ) { unused_byte--; goto find_unused_byte; } } - /* Copy and fill the rest of the buffer with the unused byte */ - memset(temporary, unused_byte, data_length); - MEMCPY(temporary + start_position, partial_data, length); + /* Prepare an full element of the predefined type, by populating an entire type + * with the unused byte and then put the partial data at the right position. */ + memset( temporary, unused_byte, data_length ); + MEMCPY( temporary + start_position, partial_data, length ); + /* Save the original content of the user memory */ #if OPAL_CUDA_SUPPORT /* In the case where the data is being unpacked from device memory, need to - * use the special host to device memory copy. Note this code path was only - * seen on large receives of noncontiguous data via buffered sends. */ - pConvertor->cbmemcpy(saved_data, user_data, data_length, pConvertor); + * use the special host to device memory copy. */ + pConvertor->cbmemcpy(saved_data, user_data, data_length, pConvertor ); #else - /* Save the content of the user memory */ - MEMCPY(saved_data, user_data, data_length); + MEMCPY( saved_data, user_data, data_length ); #endif /* Then unpack the data into the user memory */ - UNPACK_PREDEFINED_DATATYPE(pConvertor, pElem, count_desc, temporary_buffer, *user_buffer, + UNPACK_PREDEFINED_DATATYPE(pConvertor, &single_elem, count_desc, temporary_buffer, user_data, data_length); - /* reload the length as it is reset by the macro */ + /* reload the length and user buffer as they have been updated by the macro */ data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; + user_data = *memory + pElem->elem.disp; - /* For every occurrence of the unused byte move data from the saved - * buffer back into the user memory. - */ + /* Rebuild the data by pulling back the unmodified bytes from the original + * content in the user memory. */ #if OPAL_CUDA_SUPPORT /* Need to copy the modified user_data again so we can see which - * bytes need to be converted back to their original values. Note - * this code path was only seen on large receives of noncontiguous - * data via buffered sends. */ + * bytes need to be converted back to their original values. */ { char resaved_data[16]; pConvertor->cbmemcpy(resaved_data, user_data, data_length, pConvertor); @@ -245,6 +253,16 @@ static inline void opal_unpack_partial_datatype(opal_convertor_t *pConvertor, dt } } #endif + pConvertor->partial_length = (pConvertor->partial_length + length) % data_length; + *SPACE -= length; + *packed += length; + if (0 == pConvertor->partial_length) { + (*COUNT)--; /* we have enough to complete one full predefined type */ + *memory += data_length; + if (0 == (*COUNT % pElem->elem.blocklen)) { + *memory += pElem->elem.extent - (pElem->elem.blocklen * data_length); + } + } } /* The pack/unpack functions need a cleanup. I have to create a proper interface to access @@ -271,9 +289,8 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct size_t iov_len_local; uint32_t iov_count; - DO_DEBUG(opal_output(0, "opal_convertor_generic_simple_unpack( %p, {%p, %lu}, %u )\n", - (void *) pConvertor, (void *) iov[0].iov_base, - (unsigned long) iov[0].iov_len, *out_size);); + DO_DEBUG( opal_output( 0, "opal_convertor_generic_simple_unpack( %p, iov[%u] = {%p, %lu} )\n", + (void*)pConvertor, *out_size, (void*)iov[0].iov_base, (unsigned long)iov[0].iov_len ); ); description = pConvertor->use_desc->desc; @@ -300,26 +317,25 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct iov_ptr = (unsigned char *) iov[iov_count].iov_base; iov_len_local = iov[iov_count].iov_len; - if (0 != pConvertor->partial_length) { - size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; - size_t missing_length = element_length - pConvertor->partial_length; - - assert(pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA); - COMPUTE_CSUM(iov_ptr, missing_length, pConvertor); - opal_unpack_partial_datatype(pConvertor, pElem, iov_ptr, pConvertor->partial_length, - (size_t)(element_length - pConvertor->partial_length), - &conv_ptr); - --count_desc; - if (0 == count_desc) { - conv_ptr = pConvertor->pBaseBuf + pStack->disp; - pos_desc++; /* advance to the next data */ - UPDATE_INTERNAL_COUNTERS(description, pos_desc, pElem, count_desc); - } - iov_ptr += missing_length; - iov_len_local -= missing_length; - pConvertor->partial_length = 0; /* nothing more inside */ - } + /* Deal with all types of partial predefined datatype unpacking, including when + * unpacking a partial predefined element and when unpacking a part smaller than + * the blocklen. + */ if (pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) { + if (0 != pConvertor->partial_length) { /* partial predefined element */ + assert( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ); + opal_unpack_partial_predefined( pConvertor, pElem, &count_desc, + &iov_ptr, &conv_ptr, &iov_len_local ); + if (0 == count_desc) { /* the end of the vector ? */ + assert( 0 == pConvertor->partial_length ); + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + pos_desc++; /* advance to the next data */ + UPDATE_INTERNAL_COUNTERS(description, pos_desc, pElem, count_desc); + goto next_vector; + } + if( 0 == iov_len_local ) + goto complete_loop; + } if (((size_t) pElem->elem.count * pElem->elem.blocklen) != count_desc) { /* we have a partial (less than blocklen) basic datatype */ int rc = UNPACK_PARTIAL_BLOCKLEN(pConvertor, pElem, count_desc, iov_ptr, conv_ptr, @@ -336,6 +352,7 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct } while (1) { + next_vector: while (pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) { /* we have a basic datatype (working on full blocks) */ UNPACK_PREDEFINED_DATATYPE(pConvertor, pElem, count_desc, iov_ptr, conv_ptr, @@ -401,19 +418,14 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct } } complete_loop: - assert(pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED); - if ((pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) && (0 != iov_len_local)) { - unsigned char *temp = conv_ptr; + assert( pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED ); + if( (pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) && (0 != iov_len_local) ) { + unsigned char* temp = conv_ptr; /* We have some partial data here. Let's copy it into the convertor * and keep it hot until the next round. */ - assert(iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size); - COMPUTE_CSUM(iov_ptr, iov_len_local, pConvertor); - - opal_unpack_partial_datatype(pConvertor, pElem, iov_ptr, 0, iov_len_local, &temp); - - pConvertor->partial_length = iov_len_local; - iov_len_local = 0; + assert( iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size ); + opal_unpack_partial_predefined(pConvertor, pElem, &count_desc, &iov_ptr, &temp, &iov_len_local); } iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ @@ -543,11 +555,6 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec unsigned char *conv_ptr, *iov_ptr; uint32_t iov_count; size_t iov_len_local; -#if 0 - const opal_convertor_master_t *master = pConvertor->master; - ptrdiff_t advance; /* number of bytes that we should advance the buffer */ -#endif - size_t rc; DO_DEBUG(opal_output(0, "opal_convertor_general_unpack( %p, {%p, %lu}, %d )\n", (void *) pConvertor, (void *) iov[0].iov_base, @@ -609,13 +616,9 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec * and keep it hot until the next round. */ assert(iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size); - COMPUTE_CSUM(iov_ptr, iov_len_local, pConvertor); - - opal_unpack_partial_datatype(pConvertor, pElem, iov_ptr, 0, iov_len_local, - &temp); - - pConvertor->partial_length = iov_len_local; - iov_len_local = 0; + opal_unpack_partial_predefined(pConvertor, pElem, &count_desc, &iov_ptr, + &temp, &iov_len_local); + assert( 0 == iov_len_local ); } goto complete_loop; } diff --git a/opal/datatype/opal_datatype_unpack.h b/opal/datatype/opal_datatype_unpack.h index 9ec013d54a9..3601a51981a 100644 --- a/opal/datatype/opal_datatype_unpack.h +++ b/opal/datatype/opal_datatype_unpack.h @@ -29,9 +29,10 @@ #endif /** - * This function deals only with partial elements. The COUNT points however to the whole leftover - * count, but this function is only expected to operate on an amount less than blength, that would - * allow the rest of the pack process to handle only entire blength blocks (plus the left over). + * This function deals only with partial elements. The COUNT points however to + * the whole leftover count, but this function is only expected to operate on + * an amount less than blength, that would allow the rest of the pack process + * to handle only entire blength blocks (plus the left over). * * Return 1 if we are now aligned on a block, 0 otherwise. */ @@ -47,6 +48,8 @@ static inline int unpack_partial_blocklen(opal_convertor_t *CONVERTOR, const dt_ assert(*(COUNT) <= ((size_t)(_elem->count * _elem->blocklen))); + if( (*SPACE) < do_now_bytes ) /* Can we do anything ? */ + return 0; /** * First check if we already did something on this element ? The COUNT is the number * of remaining predefined types in the current elem, not how many predefined types @@ -63,19 +66,18 @@ static inline int unpack_partial_blocklen(opal_convertor_t *CONVERTOR, const dt_ do_now_bytes *= do_now; - OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count); - DO_DEBUG(opal_output(0, "unpack memcpy( %p, %p, %lu ) => space %lu [prolog]\n", - (void *) _memory, (void *) _packed, (unsigned long) do_now_bytes, - (unsigned long) (*(SPACE)));); - MEMCPY_CSUM(_memory, _packed, do_now_bytes, (CONVERTOR)); - *(memory) += (ptrdiff_t) do_now_bytes; - if (do_now == left_in_block) /* compensate if completed a blocklen */ - *(memory) += _elem->extent - - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); - - *(COUNT) -= do_now; - *(SPACE) -= do_now_bytes; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack memcpy( %p [%ld], %p, %lu ) => space %lu [prolog]\n", + (void*)_memory, _memory - CONVERTOR->pBaseBuf, + (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) ); + *(memory) += (ptrdiff_t)do_now_bytes; + if( do_now == left_in_block ) /* compensate if completed a blocklen */ + *(memory) += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + + *(COUNT) -= do_now; + *(SPACE) -= do_now_bytes; *(packed) += do_now_bytes; return (do_now == left_in_block); } @@ -96,7 +98,7 @@ static inline void unpack_predefined_data(opal_convertor_t *CONVERTOR, const dt_ if ((blocklen_bytes * cando_count) > *(SPACE)) cando_count = (*SPACE) / blocklen_bytes; - /* premptively update the number of COUNT we will return. */ + /* preemptively update the number of COUNT we will return. */ *(COUNT) -= cando_count; if (_elem->blocklen < 9) { @@ -109,16 +111,17 @@ static inline void unpack_predefined_data(opal_convertor_t *CONVERTOR, const dt_ /* else unrecognized _elem->common.type, use the memcpy path */ } - if (1 == _elem->blocklen) { /* Do as many full blocklen as possible */ + if (1 == _elem->blocklen) { /* Do as many full blocklen as possible */ for (; cando_count > 0; cando_count--) { - OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count); - DO_DEBUG(opal_output(0, "unpack memcpy( %p, %p, %lu ) => space %lu [blen = 1]\n", - (void *) _memory, (void *) _packed, (unsigned long) blocklen_bytes, - (unsigned long) (*(SPACE) - (_packed - *(packed))));); - MEMCPY_CSUM(_memory, _packed, blocklen_bytes, (CONVERTOR)); - _packed += blocklen_bytes; - _memory += _elem->extent; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack memcpy( %p [%ld], %p [%ld], %lu ) => space %lu [blen = 1]\n", + (void*)_memory, _memory - CONVERTOR->pBaseBuf, + (void*)_packed, _packed - *packed, + (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; } goto update_and_return; } @@ -127,14 +130,15 @@ static inline void unpack_predefined_data(opal_convertor_t *CONVERTOR, const dt_ blocklen_bytes *= _elem->blocklen; do { /* Do as many full blocklen as possible */ - OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count); - DO_DEBUG(opal_output(0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void *) _memory, (void *) _packed, (unsigned long) blocklen_bytes, - (unsigned long) (*(SPACE) - (_packed - *(packed))));); - MEMCPY_CSUM(_memory, _packed, blocklen_bytes, (CONVERTOR)); - _packed += blocklen_bytes; - _memory += _elem->extent; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p [%ld], %p [%ld], %lu ) => space %lu\n", + (void*)_memory, _memory - CONVERTOR->pBaseBuf, + (void*)_packed, _packed - *packed, + (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; cando_count -= _elem->blocklen; } while (_elem->blocklen <= cando_count); } @@ -146,14 +150,15 @@ static inline void unpack_predefined_data(opal_convertor_t *CONVERTOR, const dt_ assert((cando_count < _elem->blocklen) || ((1 == _elem->count) && (cando_count <= _elem->blocklen))); do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; - OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count); - DO_DEBUG(opal_output(0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", - (void *) _memory, (void *) _packed, (unsigned long) do_now_bytes, - (unsigned long) (*(SPACE) - (_packed - *(packed))));); - MEMCPY_CSUM(_memory, _packed, do_now_bytes, (CONVERTOR)); - _memory += do_now_bytes; - _packed += do_now_bytes; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p [%ld], %p [%ld], %lu ) => space %lu [epilog]\n", + (void*)_memory, _memory - CONVERTOR->pBaseBuf, + (void*)_packed, _packed - *packed, + (unsigned long)do_now_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) ); + _memory += do_now_bytes; + _packed += do_now_bytes; } update_and_return: diff --git a/test/datatype/Makefile.am b/test/datatype/Makefile.am index 3da8fdffb73..6cd807e627f 100644 --- a/test/datatype/Makefile.am +++ b/test/datatype/Makefile.am @@ -15,7 +15,7 @@ # if PROJECT_OMPI - MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw ddt_raw2 unpack_ooo ddt_pack external32 large_data + MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw ddt_raw2 unpack_ooo ddt_pack external32 large_data partial MPI_CHECKS = to_self reduce_local endif TESTS = opal_datatype_test unpack_hetero $(MPI_TESTS) @@ -102,5 +102,11 @@ reduce_local_LDADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la +partial_local_SOURCES = partial.c +partial_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) +partial_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + distclean: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/datatype/partial.c b/test/datatype/partial.c new file mode 100644 index 00000000000..c064db7193d --- /dev/null +++ b/test/datatype/partial.c @@ -0,0 +1,171 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "opal/datatype/opal_convertor.h" +#include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_datatype_checksum.h" +#include "opal/runtime/opal.h" + +#include +#include +#include + +#define TYPE_COUNT 3 +#define TYPE_BLEN 2 +#define TYPE_STRIDE 4 + +#define CONT_COUNT 2 + +#define COUNT 3 + +#define CHUNK ((TYPE_BLEN*8)*2-4) + +/** + * Print how many elements on both sides of ptr. + */ +static void show_neighborhood(double* ptr, int how_many, bool show_hex) +{ + int i; + + printf("%12p: ", (void*)ptr); + for( i = -how_many; i < how_many; i++ ) { + if( 0 == i ) { + printf(" <%g> ", ptr[i]); + } else { + printf(" %g ", ptr[i]); + } + } + if( show_hex ) { + char* cptr = (char*)ptr; + printf("\n : "); + for( i = -how_many; i < how_many; i++ ) { + if( 0 == i ) printf(" <"); + for( int j = 0; j < sizeof(double); j++ ) { + printf("%02x", cptr[i * sizeof(double)+j]); + } + if( 0 == i ) printf("> "); + else printf(" "); + } + } + printf("\n\n"); +} + +/** + * -------G---[---][---] OPAL_LOOP_S 19 times the next 2 elements extent 18432 + * -cC---P-DB-[---][---] OPAL_FLOAT8 count 72 disp 0x80 (128) blen 16 extent 256 (size 9216) + * -------G---[---][---] OPAL_LOOP_E prev 2 elements first elem displacement 128 size of data 9216 + * -------G---[---][---] OPAL_LOOP_E prev 3 elements first elem displacement 128 size of data 175104 + */ + +int main( int argc, char* argv[] ) +{ + opal_datatype_t* vector; + ompi_datatype_t* base; + uint32_t iov_count; + size_t max_data, size, length; + struct iovec iov[2]; + opal_convertor_t* convertor; + ptrdiff_t extent, base_extent; + double *array, *packed; + char* bpacked; + int i, j; + + opal_init_util (NULL, NULL); + ompi_datatype_init(); + + ompi_datatype_create_vector(TYPE_COUNT, TYPE_BLEN, TYPE_STRIDE, MPI_DOUBLE, &base); + ompi_datatype_create_contiguous(CONT_COUNT, base, &vector); + + opal_datatype_commit( vector ); + + ompi_datatype_dump(vector); + + opal_datatype_type_size(vector, &size); + opal_datatype_type_extent(vector, &extent); + opal_datatype_type_extent(base, &base_extent); + + array = (double*)malloc( extent * COUNT ); + packed = (double*)malloc( size * COUNT ); + bpacked = (char*)packed; + + /** + * Initialize the sparse data using the index. + */ + for( i = 0; i < (TYPE_BLEN * TYPE_COUNT * CONT_COUNT * COUNT); i++ ) { + packed[i] = (double)(i % TYPE_BLEN); + } + memset(array, extent * COUNT, TYPE_BLEN + 1); + + /** + * Pack the sparse data into the packed array. This simulate the first step + * of the buffered operation. + */ + convertor = opal_convertor_create( opal_local_arch, 0 ); + opal_convertor_prepare_for_recv( convertor, vector, COUNT, array ); + + for( length = 0; length < (size * COUNT); ) { + iov[0].iov_base = bpacked + length; + iov[0].iov_len = CHUNK; + max_data = iov[0].iov_len; + + iov_count = 1; + opal_convertor_unpack( convertor, iov, &iov_count, &max_data ); + length += max_data; + + int idx = 0, checked = 0; + for( int m = 0; m < COUNT; m++ ) { + char* mptr = (char*)array + m * extent; + for( int k = 0; k < CONT_COUNT; k++ ) { + char* kptr = mptr + k * base_extent; + for( j = 0; j < TYPE_COUNT; j++ ) { + double* jarray = (double*)kptr + j * TYPE_STRIDE; + for( i = 0; i < TYPE_BLEN; i++ ) { + checked += sizeof(double); + if( checked > length ) + goto next_iteration; + if( jarray[i] != (double)(idx % TYPE_BLEN) ) { + fprintf(stderr, "\n\n\nError during check for the %d element, length %" PRIsize_t " (chunk %d)\n", + idx, length, CHUNK); + fprintf(stderr, "Error at position %d [%d:%d:%d:%d] found %g expected %g\n\n\n", + idx, m, k, j, i, jarray[i], (double)(idx % TYPE_BLEN)); + show_neighborhood(jarray + i, 4, true); + exit(-1); + } + idx++; + } + } + } + } +next_iteration: + /* nothing special to do here, just move to the next conversion */ + continue; + } + + OBJ_RELEASE(convertor); + + /** + * The datatype is not useful anymore + */ + OBJ_RELEASE(vector); + + free(array); + free(packed); + + /* clean-ups all data allocations */ + ompi_datatype_finalize(); + opal_finalize_util (); + + return 0; +} diff --git a/test/datatype/unpack_ooo.c b/test/datatype/unpack_ooo.c index 58ef8a95774..febc78bc924 100644 --- a/test/datatype/unpack_ooo.c +++ b/test/datatype/unpack_ooo.c @@ -27,6 +27,7 @@ #define N 331 uint32_t remote_arch = 0xffffffff; +bool report_all_errors = true; struct foo_t { int i[3]; @@ -38,26 +39,28 @@ struct pfoo_t { double d[2]; } pfoo = {0}, *pbar = NULL; -static void print_hex(void* ptr, int count, int space) +static void print_hex(void* ptr, int count, char* epilog, char* prolog) { - for( int i = 0; i < count; i++ ) { + if ( NULL != epilog) fprintf(stderr, "%s", epilog); + for ( int i = 0; i < count; i++ ) { fprintf(stderr, "%02x", (unsigned int)(((unsigned char*)ptr)[i])); } - if(space) fprintf(stderr, " "); + if (NULL != prolog) fprintf(stderr, "%s", prolog); } -static void print_bar_pbar(struct foo_t* bar, struct pfoo_t* pbar) +static void print_bar_pbar(struct foo_t* _bar, struct pfoo_t* _pbar) { - print_hex(&bar->i[0], sizeof(int), 1); - print_hex(&bar->i[2], sizeof(int), 1); - print_hex(&bar->d[0], sizeof(double), 1); - print_hex(&bar->d[2], sizeof(double), 1); - fprintf(stderr, "\n"); - print_hex(&pbar->i[0], sizeof(int), 1); - print_hex(&pbar->i[1], sizeof(int), 1); - print_hex(&pbar->d[0], sizeof(double), 1); - print_hex(&pbar->d[1], sizeof(double), 1); - fprintf(stderr, "\n"); + print_hex(&_bar->i[0], sizeof(int), NULL, " "); + print_hex(&_bar->i[1], sizeof(int), "[", "] "); + print_hex(&_bar->i[2], sizeof(int), NULL, " "); + print_hex(&_bar->d[0], sizeof(double), NULL, " "); + print_hex(&_bar->d[1], sizeof(double), "[", "] "); + print_hex(&_bar->d[2], sizeof(double), NULL, "\n"); + + print_hex(&_pbar->i[0], sizeof(int), NULL, " "); + print_hex(&_pbar->i[1], sizeof(int), NULL, " "); + print_hex(&_pbar->d[0], sizeof(double), NULL, " "); + print_hex(&_pbar->d[1], sizeof(double), NULL, "\n"); } static void print_stack(opal_convertor_t* conv) @@ -72,7 +75,7 @@ static void print_stack(opal_convertor_t* conv) printf("\n"); } -static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { +static int testcase(ompi_datatype_t * newtype, size_t arr[][2]) { int i, j, errors = 0; struct iovec a; unsigned int iov_count; @@ -99,7 +102,7 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { return OMPI_ERROR; } - for (i=0; arr[i][0] != 0; i++) { + for ( i = 0; 0 != arr[i][0]; i++) { /* add some garbage before and after the source data */ a.iov_base = malloc(arr[i][0]+2048); if (NULL == a.iov_base) { @@ -129,11 +132,36 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { bar[j].d[0] != pbar[j].d[0] || bar[j].d[1] != 0.0 || bar[j].d[2] != pbar[j].d[1]) { - if(0 == errors) { + if(0 == errors || report_all_errors) { + ptrdiff_t displ; + char* error_location = "in gaps"; + if (bar[j].i[0] != pbar[j].i[0]) { + displ = (char*)&bar[j].i[0] - (char*)&bar[0]; + error_location = "i[0]"; + } else if (bar[j].i[2] != pbar[j].i[1]) { + displ = (char*)&bar[j].i[1] - (char*)&bar[0]; + error_location = "i[2]"; + } else if (bar[j].d[0] != pbar[j].d[0]) { + displ = (char*)&bar[j].d[0] - (char*)&bar[0]; + error_location = "d[0]"; + } else if (bar[j].d[2] != pbar[j].d[1]) { + displ = (char*)&bar[j].d[1] - (char*)&bar[0]; + error_location = "d[2]"; + } else { + displ = (char*)&bar[j] - (char*)&bar[0]; + } + for (i = 0; 0 != arr[i][0]; i++) { + if( (displ >= arr[i][1]) && (displ <= (arr[i][1] + arr[i][0])) ) { + fprintf(stderr, "Problem encountered %li bytes into the %d unpack [%"PRIsize_t":%"PRIsize_t"]\n", + displ - arr[i][1], i, arr[i][1], arr[i][0]); + break; + } + } + (void)opal_datatype_dump(&newtype->super); - fprintf(stderr, "ERROR ! position=%d/%d, ptr = %p" + fprintf(stderr, "ERROR ! struct %d/%d in field %s, ptr = %p" " got (%d,%d,%d,%g,%g,%g) expected (%d,%d,%d,%g,%g,%g)\n", - j, N, (void*)&bar[j], + j, N, error_location, (void*)&bar[j], bar[j].i[0], bar[j].i[1], bar[j].i[2], @@ -147,6 +175,7 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { 0.0, pbar[j].d[1]); print_bar_pbar(&bar[j], &pbar[j]); + if( report_all_errors ) fprintf(stderr, "\n\n"); } errors++; } @@ -198,13 +227,13 @@ static int unpack_ooo(void) */ size_t test1[9][2] = { {992, 0}, - {1325, 992}, - {992, 2317}, - {992, 3309}, - {992, 4301}, - {992, 5293}, - {992, 6285}, - {667, 7277}, + {1325, 0 + 992}, + {992, 992 + 1325 /* = 2317 */}, + {992, 2317 + 992 /* = 3309 */}, + {992, 3309 + 992 /* = 4301 */}, + {992, 4301 + 992 /* = 5293 */}, + {992, 5293 + 992 /* = 6285 */}, + {667, 6285 + 992 /* = 7277 */}, {0, -1}, };