From 73d64cb97b48c4c6860aa0d49af9705349d6f64b Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 30 Mar 2021 01:09:55 -0400 Subject: [PATCH 1/3] Reenable the heterogeneous support. This commit fixes the support for heterogeneous environments and specifically for external32. The root cause was that during the datatype optimization process types that are contiguous in memory are collapsed together in order to decrease the number of conversion (or memcpy) function calls. The resulting type however, does not have the same conversion rules as the types it replaced, leading to an incorrect (or absent) conversion in some cases. This patch marks the datatypes where types have been collapsed during the optimization process with a flag, allowing the convertor to detect if the optimized type can be used in heterogeneous setups. Signed-off-by: George Bosilca --- opal/datatype/opal_convertor.c | 4 +- opal/datatype/opal_datatype.h | 9 +++ opal/datatype/opal_datatype_dump.c | 2 + opal/datatype/opal_datatype_optimize.c | 12 ++- opal/datatype/opal_datatype_pack.c | 105 +++++++++++++++++-------- opal/datatype/opal_datatype_unpack.c | 92 +++++++++++++++++++++- test/datatype/external32.c | 49 +++++++++--- 7 files changed, 220 insertions(+), 53 deletions(-) diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 34efaaa8f16..0682785ddd4 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -484,8 +484,8 @@ size_t opal_convertor_compute_remote_size(opal_convertor_t *pConvertor) pConvertor->remote_size = pConvertor->local_size; if (OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask)) { pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS); - if (!(pConvertor->flags & CONVERTOR_SEND - && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) { + /* Can we use the optimized description? */ + if (pConvertor->flags & OPAL_DATATYPE_OPTIMIZED_RESTRICTED) { pConvertor->use_desc = &(datatype->desc); } if (0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE)) { diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 41da0f40d35..e2f2f72b0de 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -84,6 +84,15 @@ BEGIN_C_DECLS #define OPAL_DATATYPE_FLAG_BASIC \ (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS \ | OPAL_DATATYPE_FLAG_DATA | OPAL_DATATYPE_FLAG_COMMITTED) +/* + * If during the datatype optimization process we collapse contiguous elements with + * different types, we cannot use this optimized description for any communication + * in a heterogeneous setting, especially not for the exteranl32 support. + * + * A datatype with this flag cannot use the optimized description in heterogeneous + * setups. + */ +#define OPAL_DATATYPE_OPTIMIZED_RESTRICTED 0x1000 /** * The number of supported entries in the data-type definition and the diff --git a/opal/datatype/opal_datatype_dump.c b/opal/datatype/opal_datatype_dump.c index ca06a79307a..259dcc3152f 100644 --- a/opal/datatype/opal_datatype_dump.c +++ b/opal/datatype/opal_datatype_dump.c @@ -99,6 +99,8 @@ int opal_datatype_dump_data_flags(unsigned short usflags, char *ptr, size_t leng } if ((usflags & OPAL_DATATYPE_FLAG_BASIC) == OPAL_DATATYPE_FLAG_BASIC) { ptr[9] = 'B'; + } else if (usflags & OPAL_DATATYPE_OPTIMIZED_RESTRICTED) { + ptr[9] = 'H'; /* optimized description restricted to homogeneous cases */ } /* We know nothing about the upper level language or flags! */ /* ... */ diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index de6c3eb6560..dbf7aeb86a3 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -94,7 +94,9 @@ static int32_t opal_datatype_optimize_short(opal_datatype_t *pData, size_t count assert(1 == current->count); if ((current->common.type == OPAL_DATATYPE_LOOP) || compress.common.type != current->common.type) { - compress.common.type = OPAL_DATATYPE_UINT1; + compress.common.type = OPAL_DATATYPE_UINT1; + compress.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED; + pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED; compress.blocklen = end_loop->size; break; } @@ -194,7 +196,9 @@ static int32_t opal_datatype_optimize_short(opal_datatype_t *pData, size_t count ddt_elem_desc_t save = last; /* safekeep the type and blocklen */ if (last.common.type != current->common.type) { last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size; - last.common.type = OPAL_DATATYPE_UINT1; + last.common.type = OPAL_DATATYPE_UINT1; + last.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED; + pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED; } if ((last.extent * (ptrdiff_t) last.count + last.disp) == current->disp) { @@ -248,7 +252,9 @@ static int32_t opal_datatype_optimize_short(opal_datatype_t *pData, size_t count + (current->blocklen * opal_datatype_basicDatatypes[current->common.type] ->size)); - last.common.type = OPAL_DATATYPE_UINT1; + last.common.type = OPAL_DATATYPE_UINT1; + last.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED; + pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED; } last.extent += current->extent; if (current->count != 1) { diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index 51bf0fecec4..acbbb1e1df5 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -402,43 +402,84 @@ int32_t opal_generic_simple_pack_function(opal_convertor_t *pConvertor, struct i * -1 something wrong occurs. */ -static inline void pack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, - const dt_elem_desc_t *ELEM, size_t *COUNT, - unsigned char **SOURCE, - unsigned char **DESTINATION, size_t *SPACE) +static inline void +pack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, + const dt_elem_desc_t *ELEM, size_t *COUNT, + unsigned char **memory, + unsigned char **packed, size_t *SPACE) { const opal_convertor_master_t *master = (CONVERTOR)->master; const ddt_elem_desc_t *_elem = &((ELEM)->elem); - unsigned char *_source = (*SOURCE) + _elem->disp; - ptrdiff_t advance; - size_t _count = *(COUNT); - size_t _r_blength; - - _r_blength = master->remote_sizes[_elem->common.type]; - if ((_count * _r_blength) > *(SPACE)) { - _count = (*(SPACE) / _r_blength); - if (0 == _count) { - return; /* nothing to do */ - } + size_t cando_count = *(COUNT), do_now_bytes; + size_t local_elem_size = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t remote_elem_size = master->remote_sizes[_elem->common.type]; + size_t blocklen_bytes = remote_elem_size; + unsigned char *_memory = (*memory) + _elem->disp; + unsigned char *_packed = *packed; + ptrdiff_t advance = 0; + + assert(0 == (cando_count % _elem->blocklen)); /* no partials here */ + assert(*(COUNT) <= ((size_t) _elem->count * _elem->blocklen)); + + if ((remote_elem_size * cando_count) > *(SPACE)) + cando_count = (*SPACE) / blocklen_bytes; + + /* premptively update the number of COUNT we will return. */ + *(COUNT) -= cando_count; + + if (_elem->blocklen == 1) { + master->pFunctions[_elem->common.type](CONVERTOR, cando_count, + _memory, *SPACE, _elem->extent, + _packed, *SPACE, remote_elem_size, + &advance); + _memory += cando_count * _elem->extent; + _packed += cando_count * remote_elem_size; + goto update_and_return; + } + + if ((1 < _elem->count) && (_elem->blocklen <= cando_count)) { + blocklen_bytes = remote_elem_size * _elem->blocklen; + + do { /* Do as many full blocklen as possible */ + OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count); + DO_DEBUG(opal_output(0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void *) _packed, (void *) _memory, (unsigned long) blocklen_bytes, + (unsigned long) (*(SPACE) - (_packed - *(packed))));); + master->pFunctions[_elem->common.type](CONVERTOR, _elem->blocklen, + _memory, *SPACE, local_elem_size, + _packed, *SPACE, remote_elem_size, + &advance); + _packed += blocklen_bytes; + _memory += _elem->extent; + cando_count -= _elem->blocklen; + } while (_elem->blocklen <= cando_count); + } + + /** + * As an epilog do anything left from the last blocklen. + */ + if (0 != cando_count) { + assert((cando_count < _elem->blocklen) + || ((1 == _elem->count) && (cando_count <= _elem->blocklen))); + do_now_bytes = cando_count * remote_elem_size; + OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count); + DO_DEBUG(opal_output(0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", + (void *) _packed, (void *) _memory, (unsigned long) do_now_bytes, + (unsigned long) (*(SPACE) - (_packed - *(packed))));); + master->pFunctions[_elem->common.type](CONVERTOR, cando_count, + _memory, *SPACE, local_elem_size, + _packed, *SPACE, remote_elem_size, + &advance); + _memory += do_now_bytes; + _packed += do_now_bytes; } - OPAL_DATATYPE_SAFEGUARD_POINTER(_source, (_count * _elem->extent), (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count); - DO_DEBUG(opal_output(0, "pack [l %s r %s] memcpy( %p, %p, %lu ) => space %lu\n", - ((ptrdiff_t)(opal_datatype_basicDatatypes[_elem->common.type]->size) - == _elem->extent) - ? "cont" - : "----", - ((ptrdiff_t) _r_blength == _elem->extent) ? "cont" : "----", - (void *) *(DESTINATION), (void *) _source, (unsigned long) _r_blength, - (unsigned long) (*(SPACE)));); - master->pFunctions[_elem->common.type](CONVERTOR, _count, _source, *SPACE, _elem->extent, - *DESTINATION, *SPACE, _r_blength, &advance); - _r_blength *= _count; /* update the remote length to encompass all the elements */ - *(SOURCE) += _count * _elem->extent; - *(DESTINATION) += _r_blength; - *(SPACE) -= _r_blength; - *(COUNT) -= _count; +update_and_return: + *(memory) = _memory - _elem->disp; + *(SPACE) -= (_packed - *packed); + *(packed) = _packed; } int32_t opal_pack_general_function(opal_convertor_t *pConvertor, struct iovec *iov, diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index 7ac93582d9d..af6ba62d6cd 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -449,6 +449,86 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct * 1 if everything went fine and the data was completly converted * -1 something wrong occurs. */ +static inline void +unpack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, + const dt_elem_desc_t *ELEM, size_t *COUNT, + unsigned char **memory, + unsigned char **packed, size_t *SPACE) +{ + const opal_convertor_master_t *master = (CONVERTOR)->master; + const ddt_elem_desc_t *_elem = &((ELEM)->elem); + size_t cando_count = *(COUNT), do_now_bytes; + size_t local_elem_size = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t remote_elem_size = master->remote_sizes[_elem->common.type]; + size_t blocklen_bytes = remote_elem_size; + unsigned char *_memory = (*memory) + _elem->disp; + unsigned char *_packed = *packed; + ptrdiff_t advance = 0; + + assert(0 == (cando_count % _elem->blocklen)); /* no partials here */ + assert(*(COUNT) <= ((size_t) _elem->count * _elem->blocklen)); + + if ((remote_elem_size * cando_count) > *(SPACE)) + cando_count = (*SPACE) / blocklen_bytes; + + /* premptively update the number of COUNT we will return. */ + *(COUNT) -= cando_count; + + if (_elem->blocklen == 1) { + master->pFunctions[_elem->common.type](CONVERTOR, cando_count, + _packed, *SPACE, remote_elem_size, + _memory, *SPACE, _elem->extent, + &advance); + _memory += cando_count * _elem->extent; + _packed += cando_count * local_elem_size; + goto update_and_return; + } + + if ((1 < _elem->count) && (_elem->blocklen <= cando_count)) { + blocklen_bytes = remote_elem_size * _elem->blocklen; + + do { /* Do as many full blocklen as possible */ + OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count); + DO_DEBUG(opal_output(0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void *) _packed, (void *) _memory, (unsigned long) blocklen_bytes, + (unsigned long) (*(SPACE) - (_packed - *(packed))));); + master->pFunctions[_elem->common.type](CONVERTOR, _elem->blocklen, + _packed, *SPACE, remote_elem_size, + _memory, *SPACE, local_elem_size, + &advance); + _packed += blocklen_bytes; + _memory += _elem->extent; + cando_count -= _elem->blocklen; + } while (_elem->blocklen <= cando_count); + } + + /** + * As an epilog do anything left from the last blocklen. + */ + if (0 != cando_count) { + assert((cando_count < _elem->blocklen) + || ((1 == _elem->count) && (cando_count <= _elem->blocklen))); + do_now_bytes = cando_count * remote_elem_size; + OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count); + DO_DEBUG(opal_output(0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", + (void *) _packed, (void *) _memory, (unsigned long) do_now_bytes, + (unsigned long) (*(SPACE) - (_packed - *(packed))));); + master->pFunctions[_elem->common.type](CONVERTOR, cando_count, + _packed, *SPACE, remote_elem_size, + _memory, *SPACE, local_elem_size, + &advance); + _memory += do_now_bytes; + _packed += do_now_bytes; + } + +update_and_return: + *(memory) = _memory - _elem->disp; + *(SPACE) -= (_packed - *packed); + *(packed) = _packed; +} + int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec *iov, uint32_t *out_size, size_t *max_data) { @@ -463,9 +543,10 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec unsigned char *conv_ptr, *iov_ptr; uint32_t iov_count; size_t iov_len_local; - +#if 0 const opal_convertor_master_t *master = pConvertor->master; ptrdiff_t advance; /* number of bytes that we should advance the buffer */ +#endif size_t rc; DO_DEBUG(opal_output(0, "opal_convertor_general_unpack( %p, {%p, %lu}, %d )\n", @@ -509,6 +590,9 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf, count_desc, description[pos_desc].elem.extent, opal_datatype_basicDatatypes[type]->name);); + unpack_predefined_heterogeneous(pConvertor, pElem, &count_desc, &conv_ptr, &iov_ptr, + &iov_len_local); +#if 0 rc = master->pFunctions[type](pConvertor, count_desc, iov_ptr, iov_len_local, opal_datatype_basicDatatypes[type]->size, conv_ptr + pElem->elem.disp, @@ -518,6 +602,7 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec iov_len_local -= advance; /* decrease the available space in the buffer */ iov_ptr += advance; /* increase the pointer to the buffer */ count_desc -= rc; /* compute leftovers */ +#endif if (0 == count_desc) { /* completed */ conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ @@ -527,7 +612,9 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec } continue; } +#if 0 conv_ptr += rc * description[pos_desc].elem.extent; +#endif assert(pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED); assert(0 == iov_len_local); if (0 != iov_len_local) { @@ -535,8 +622,7 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec /* We have some partial data here. Let's copy it into the convertor * and keep it hot until the next round. */ - assert(iov_len_local - < opal_datatype_basicDatatypes[pElem->elem.common.type]->size); + assert(iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size); COMPUTE_CSUM(iov_ptr, iov_len_local, pConvertor); opal_unpack_partial_datatype(pConvertor, pElem, iov_ptr, 0, iov_len_local, diff --git a/test/datatype/external32.c b/test/datatype/external32.c index d09938510ba..9d47e60950a 100644 --- a/test/datatype/external32.c +++ b/test/datatype/external32.c @@ -33,13 +33,29 @@ int check_vector( void* send_buffer, void* packed, static int pack_unpack_datatype( void* send_data, ompi_datatype_t *datatype, int count, void* recv_data, checker_t validator, void *validator_arg ); -static void dump_hex(void* what, size_t length); - -static void dump_hex(void* what, size_t length) +static void +dump_hex(const char* msg, const void* vbuf, int nbytes, + int start_from, int stop_at, int vals_per_line) { - size_t i; - for( i = 0; i < length; i++ ) { - printf("%02x", (unsigned int)(((unsigned char*)what)[i])); + const char* buf = (const char*)vbuf; + + if( -1 == stop_at ) stop_at = nbytes; + + for (int i = (start_from / vals_per_line) * vals_per_line; i < nbytes; ++i) { + if( i >= stop_at ) return; + if (0 == (i % vals_per_line)) { + if( NULL == msg) printf("\n"); + else printf("\n%s", msg); + } else { + if (i % 4 == 0) { + printf(" "); + } + } + printf(" "); + if( i < start_from ) + printf(" "); + else + printf("%02x", *((unsigned char *)(buf + i))); } } @@ -131,7 +147,8 @@ static int pack_unpack_datatype( void* send_data, ompi_datatype_t *datatype, int return -1; } - printf("packed %ld bytes into a %ld bytes buffer ", position, buffer_size); dump_hex(buffer, position); printf("\n"); + printf("packed %ld bytes into a %ld bytes buffer ", position, buffer_size); + dump_hex(NULL, buffer, position, 0, -1, 24); printf("\n"); position = 0; error = ompi_datatype_unpack_external("external32", buffer, buffer_size, &position, @@ -155,12 +172,14 @@ int main(int argc, char *argv[]) if( verbose ) { printf("send data %08x %08x \n", send_data[0], send_data[1]); - printf("data "); dump_hex(&send_data, sizeof(int32_t) * 2); printf("\n"); + printf("data "); + dump_hex(NULL, &send_data, sizeof(int32_t) * 2, 0, -1, 24); printf("\n"); } (void)pack_unpack_datatype( send_data, &ompi_mpi_int32_t.dt, 2, recv_data, check_contiguous, (void*)&ompi_mpi_int32_t.dt ); if( verbose ) { - printf("recv "); dump_hex(&recv_data, sizeof(int32_t) * 2); printf("\n"); + printf("recv "); + dump_hex(NULL, &recv_data, sizeof(int32_t) * 2, 0, -1, 24); printf("\n"); printf("recv data %08x %08x \n", recv_data[0], recv_data[1]); } if( (send_data[0] != recv_data[0]) || (send_data[1] != recv_data[1]) ) { @@ -175,12 +194,14 @@ int main(int argc, char *argv[]) if( verbose ) { printf("send data %08x %08x \n", send_data[0], send_data[1]); - printf("data "); dump_hex(&send_data, sizeof(int16_t) * 2); printf("\n"); + printf("data "); + dump_hex(NULL, &send_data, sizeof(int16_t) * 2, 0, -1, 24); printf("\n"); } (void)pack_unpack_datatype( send_data, &ompi_mpi_int16_t.dt, 2, recv_data, check_contiguous, (void*)&ompi_mpi_int16_t.dt ); if( verbose ) { - printf("recv "); dump_hex(&recv_data, sizeof(int16_t) * 2); printf("\n"); + printf("recv "); + dump_hex(NULL, &recv_data, sizeof(int16_t) * 2, 0, -1, 24); printf("\n"); printf("recv data %08x %08x \n", recv_data[0], recv_data[1]); } if( (send_data[0] != recv_data[0]) || (send_data[1] != recv_data[1]) ) { @@ -208,16 +229,18 @@ int main(int argc, char *argv[]) if( verbose ) { printf("send data %08x %x08x %08x \n", send_data[0], send_data[1], send_data[2]); - printf("data "); dump_hex(&send_data, sizeof(int32_t) * 3); printf("\n"); + printf("data "); dump_hex(NULL, &send_data, sizeof(int32_t) * 3, 0, -1, 24); printf("\n"); } (void)pack_unpack_datatype( send_data, ddt, 1, recv_data, check_vector, (void*)&ompi_mpi_int32_t.dt ); if( verbose ) { - printf("recv "); dump_hex(&recv_data, sizeof(int32_t) * 3); printf("\n"); + printf("recv "); dump_hex(NULL, &recv_data, sizeof(int32_t) * 3, 0, -1, 24); printf("\n"); printf("recv data %08x %08x %08x \n", recv_data[0], recv_data[1], recv_data[2]); } ompi_datatype_destroy(&ddt); if( (send_data[0] != recv_data[0]) || (send_data[2] != recv_data[2]) ) { printf("Error during external32 pack/unack for vector types (MPI_INT32_T)\n"); + printf("[0]: %d ? %d | [2]: %d ? %d ([1]: %d ? %d)\n", send_data[0], recv_data[0], + send_data[2], recv_data[2], send_data[1], recv_data[1]); exit(-1); } } From 47273fb57e3d354731ebcbe2a69c562b1a662b8b Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 2 Apr 2021 02:19:35 -0400 Subject: [PATCH 2/3] Add the missing MPI 4.0 types and their external32 size. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype_external32.c | 97 ++++++++++++++++-------- 1 file changed, 66 insertions(+), 31 deletions(-) diff --git a/ompi/datatype/ompi_datatype_external32.c b/ompi/datatype/ompi_datatype_external32.c index 108e14258b7..9f1e6242412 100644 --- a/ompi/datatype/ompi_datatype_external32.c +++ b/ompi/datatype/ompi_datatype_external32.c @@ -26,39 +26,74 @@ /* From the MPI standard. external32 use the following types: * Type Length - * MPI_PACKED 1 - * MPI_BYTE 1 - * MPI_CHAR 1 - * MPI_UNSIGNED_CHAR 1 - * MPI_SIGNED_CHAR 1 - * MPI_WCHAR 2 - * MPI_SHORT 2 - * MPI_UNSIGNED_SHORT 2 - * MPI_INT 4 - * MPI_UNSIGNED 4 - * MPI_LONG 4 - * MPI_UNSIGNED_LONG 4 - * MPI_FLOAT 4 - * MPI_DOUBLE 8 - * MPI_LONG_DOUBLE 16 + * MPI_PACKED 1 + * MPI_BYTE 1 + * MPI_CHAR 1 + * MPI_UNSIGNED_CHAR 1 + * MPI_SIGNED_CHAR 1 + * MPI_WCHAR 2 + * MPI_SHORT 2 + * MPI_UNSIGNED_SHORT 2 + * MPI_INT 4 + * MPI_LONG 4 + * MPI_UNSIGNED 4 + * MPI_UNSIGNED_LONG 4 + * MPI_LONG_LONG_INT 8 + * MPI_UNSIGNED_LONG_LONG 8 + * MPI_FLOAT 4 + * MPI_DOUBLE 8 + * MPI_LONG_DOUBLE 16 + * + * MPI_C_BOOL 1 + * MPI_INT8_T 1 + * MPI_INT16_T 2 + * MPI_INT32_T 4 + * MPI_INT64_T 8 + * MPI_UINT8_T 1 + * MPI_UINT16_T 2 + * MPI_UINT32_T 4 + * MPI_UINT64_T 8 + * MPI_AINT 8 + * MPI_COUNT 8 + * MPI_OFFSET 8 + * MPI_C_COMPLEX 2*4 + * MPI_C_FLOAT_COMPLEX 2*4 + * MPI_C_DOUBLE_COMPLEX 2*8 + * MPI_C_LONG_DOUBLE_COMPLEX 2*16 + * * Fortran types - * MPI_CHARACTER 1 - * MPI_LOGICAL 4 - * MPI_INTEGER 4 - * MPI_REAL 4 - * MPI_DOUBLE_PRECISION 8 - * MPI_COMPLEX 2*4 - * MPI_DOUBLE_COMPLEX 2*8 + * MPI_CHARACTER 1 + * MPI_LOGICAL 4 + * MPI_INTEGER 4 + * MPI_REAL 4 + * MPI_DOUBLE_PRECISION 8 + * MPI_COMPLEX 2*4 + * MPI_DOUBLE_COMPLEX 2*8 + * + * MPI_CXX_BOOL 1 + * MPI_CXX_FLOAT_COMPLEX 2*4 + * MPI_CXX_DOUBLE_COMPLEX 2*8 + * MPI_CXX_LONG_DOUBLE_COMPLEX 2*16 + * * Optional types - * MPI_INTEGER1 1 - * MPI_INTEGER2 2 - * MPI_INTEGER4 4 - * MPI_INTEGER8 8 - * MPI_LONG_LONG_INT 8 - * MPI_UNSIGNED_LONG_LONG 8 - * MPI_REAL4 4 - * MPI_REAL8 8 - * MPI_REAL16 16 + * MPI_INTEGER1 1 + * MPI_INTEGER2 2 + * MPI_INTEGER4 4 + * MPI_INTEGER8 8 + * MPI_INTEGER16 16 + * MPI_REAL2 2 + * MPI_REAL4 4 + * MPI_REAL8 8 + * MPI_REAL16 16 + * MPI_COMPLEX4 2*2 + * MPI_COMPLEX8 2*4 + * MPI_COMPLEX16 2*8 + * MPI_COMPLEX32 2*16 + * + * MPI_CXX_BOOL 1 + * MPI_CXX_FLOAT_COMPLEX 2*4 + * MPI_CXX_DOUBLE_COMPLEX 2*8 + * MPI_CXX_LONG_DOUBLE_COMPLEX 2*16 * * All floating point values are in big-endian IEEE format. Double extended use 16 bytes, with * 15 exponent bits (bias = 10383), 112 mantissa bits and the same encoding as double. All From 4e56e83d524ff7e425afb8b15e9aeb04235c3189 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 2 Apr 2021 02:34:38 -0400 Subject: [PATCH 3/3] Long live MPI_LONG and MPI_UNSIGNED_LONG The only 2 types that have an external32 representation with a different size than most current architectures, and as a result are more challenging to handle. This patch also brings back the support for packing and unpacking to and from external32 for all datatypes and does a little cleaning in the datatype API and comments. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype.h | 2 +- ompi/datatype/ompi_datatype_external.c | 2 +- ompi/datatype/ompi_datatype_internal.h | 34 +-- ompi/datatype/ompi_datatype_module.c | 2 + ompi/mca/pml/ob1/pml_ob1_recvreq.h | 4 +- opal/datatype/opal_convertor.c | 10 +- opal/datatype/opal_convertor.h | 23 +- opal/datatype/opal_copy_functions.c | 63 ++-- .../opal_copy_functions_heterogeneous.c | 279 ++++++++++++++++-- opal/datatype/opal_datatype.h | 11 +- opal/datatype/opal_datatype_internal.h | 17 +- opal/datatype/opal_datatype_module.c | 8 +- opal/datatype/opal_datatype_pack.c | 12 +- opal/datatype/opal_datatype_unpack.c | 30 +- opal/util/arch.h | 4 +- 15 files changed, 364 insertions(+), 137 deletions(-) diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index e2ee2a79c01..26978d0867e 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -59,7 +59,7 @@ BEGIN_C_DECLS #define OMPI_DATATYPE_FLAG_DATA_FORTRAN 0xC000 #define OMPI_DATATYPE_FLAG_DATA_LANGUAGE 0xC000 -#define OMPI_DATATYPE_MAX_PREDEFINED 50 +#define OMPI_DATATYPE_MAX_PREDEFINED 52 #if OMPI_DATATYPE_MAX_PREDEFINED > OPAL_DATATYPE_MAX_SUPPORTED #error Need to increase the number of supported dataypes by OPAL (value OPAL_DATATYPE_MAX_SUPPORTED). diff --git a/ompi/datatype/ompi_datatype_external.c b/ompi/datatype/ompi_datatype_external.c index 53b907218cf..75ca59354bc 100644 --- a/ompi/datatype/ompi_datatype_external.c +++ b/ompi/datatype/ompi_datatype_external.c @@ -126,7 +126,7 @@ int ompi_datatype_pack_external_size(const char datarep[], int incount, CONVERTOR_SEND_CONVERSION, &local_convertor ); - opal_convertor_get_unpacked_size( &local_convertor, &length ); + opal_convertor_get_packed_size( &local_convertor, &length ); *size = (MPI_Aint)length; OBJ_DESTRUCT( &local_convertor ); diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index 1b137c1f947..e46f5137de1 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -109,8 +109,14 @@ #define OMPI_DATATYPE_MPI_SHORT_FLOAT 0x30 #define OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX 0x31 +/* + * Datatypes that have a different external32 length. + */ +#define OMPI_DATATYPE_MPI_LONG 0x32 +#define OMPI_DATATYPE_MPI_UNSIGNED_LONG 0x33 + /* This should __ALWAYS__ stay last */ -#define OMPI_DATATYPE_MPI_UNAVAILABLE 0x32 +#define OMPI_DATATYPE_MPI_UNAVAILABLE 0x34 #define OMPI_DATATYPE_MPI_MAX_PREDEFINED (OMPI_DATATYPE_MPI_UNAVAILABLE+1) @@ -177,20 +183,6 @@ #define OMPI_DATATYPE_MPI_UNSIGNED OMPI_DATATYPE_MPI_UINT64_T #endif -#if SIZEOF_LONG == 1 -#define OMPI_DATATYPE_MPI_LONG OMPI_DATATYPE_MPI_INT8_T -#define OMPI_DATATYPE_MPI_UNSIGNED_LONG OMPI_DATATYPE_MPI_UINT8_T -#elif SIZEOF_LONG == 2 -#define OMPI_DATATYPE_MPI_LONG OMPI_DATATYPE_MPI_INT16_T -#define OMPI_DATATYPE_MPI_UNSIGNED_LONG OMPI_DATATYPE_MPI_UINT16_T -#elif SIZEOF_LONG == 4 -#define OMPI_DATATYPE_MPI_LONG OMPI_DATATYPE_MPI_INT32_T -#define OMPI_DATATYPE_MPI_UNSIGNED_LONG OMPI_DATATYPE_MPI_UINT32_T -#elif SIZEOF_LONG == 8 -#define OMPI_DATATYPE_MPI_LONG OMPI_DATATYPE_MPI_INT64_T -#define OMPI_DATATYPE_MPI_UNSIGNED_LONG OMPI_DATATYPE_MPI_UINT64_T -#endif - #if SIZEOF_LONG_LONG == 1 #define OMPI_DATATYPE_MPI_LONG_LONG_INT OMPI_DATATYPE_MPI_INT8_T #define OMPI_DATATYPE_MPI_UNSIGNED_LONG_LONG OMPI_DATATYPE_MPI_UINT8_T @@ -571,16 +563,8 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX #define OMPI_DATATYPE_INITIALIZER_UNSIGNED OPAL_DATATYPE_INITIALIZER_UINT8 #endif -#if SIZEOF_LONG == 4 -#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT4 -#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT4 -#elif SIZEOF_LONG == 8 -#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT8 -#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT8 -#elif SIZEOF_LONG == 16 -#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT16 -#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT16 -#endif +#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_LONG +#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UNSIGNED_LONG #if SIZEOF_LONG_LONG == 4 #define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INITIALIZER_INT4 diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 232f0d90507..5a9a0aa9110 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -366,6 +366,8 @@ const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX_PREDEF [OMPI_DATATYPE_MPI_LB] = &ompi_mpi_lb.dt, [OMPI_DATATYPE_MPI_UB] = &ompi_mpi_ub.dt, + [OMPI_DATATYPE_MPI_LONG] = &ompi_mpi_long.dt, + [OMPI_DATATYPE_MPI_UNSIGNED_LONG] = &ompi_mpi_long.dt, /* MPI 3.0 types */ [OMPI_DATATYPE_MPI_COUNT] = &ompi_mpi_count.dt, diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index b0b89388dea..c9ffd2a9ba6 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -234,8 +234,8 @@ static inline void prepare_recv_req_converter(mca_pml_ob1_recv_request_t *req) req->req_recv.req_base.req_addr, 0, &req->req_recv.req_base.req_convertor); - opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor, - &req->req_bytes_expected); + opal_convertor_get_packed_size(&req->req_recv.req_base.req_convertor, + &req->req_bytes_expected); } } diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 0682785ddd4..e08265b42bc 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -142,7 +142,13 @@ opal_convertor_master_t *opal_convertor_find_or_create_master(uint32_t remote_ar } else { opal_output(0, "Unknown sizeof(bool) for the remote architecture\n"); } - + if (opal_arch_checkmask(&master->remote_arch, OPAL_ARCH_LONGIS64)) { + remote_sizes[OPAL_DATATYPE_LONG] = 8; + remote_sizes[OPAL_DATATYPE_UNSIGNED_LONG] = 8; + } else { + remote_sizes[OPAL_DATATYPE_LONG] = 4; + remote_sizes[OPAL_DATATYPE_UNSIGNED_LONG] = 4; + } /** * Now we can compute the conversion mask. For all sizes where the remote * and local architecture differ a conversion is needed. Moreover, if the @@ -434,7 +440,7 @@ int32_t opal_convertor_set_position_nocheck(opal_convertor_t *convertor, size_t } rc = opal_convertor_generic_simple_position(convertor, position); /** - * If we have a non-contigous send convertor don't allow it move in the middle + * If we have a non-contiguous send convertor don't allow it move in the middle * of a predefined datatype, it won't be able to copy out the left-overs * anyway. Instead force the position to stay on predefined datatypes * boundaries. As we allow partial predefined datatypes on the contiguous diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 05cc2b2ec67..396f959090d 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -199,22 +199,19 @@ static inline int32_t opal_convertor_need_buffers(const opal_convertor_t *pConve size_t opal_convertor_compute_remote_size(opal_convertor_t *pConv); /** - * Return the local size of the convertor (count times the size of the datatype). + * Return the packed size of the memory layout represented by this + * convertor. This is the size of the buffer that would be needed + * for the conversion (takes in account the type of the operation, + * aka pack or unpack, as well as which side is supposed to do the + * type conversion). */ -static inline void opal_convertor_get_packed_size(const opal_convertor_t *pConv, size_t *pSize) +static inline void +opal_convertor_get_packed_size(const opal_convertor_t *pConv, size_t *pSize) { *pSize = pConv->local_size; -} - -/** - * Return the remote size of the convertor (count times the remote size of the - * datatype). On homogeneous environments the local and remote sizes are - * identical. - */ -static inline void opal_convertor_get_unpacked_size(const opal_convertor_t *pConv, size_t *pSize) -{ - if (pConv->flags & CONVERTOR_HOMOGENEOUS) { - *pSize = pConv->local_size; + if ((pConv->flags & CONVERTOR_HOMOGENEOUS) || + ((pConv->flags & CONVERTOR_SEND) && !(pConv->flags & CONVERTOR_SEND_CONVERSION)) || + ((pConv->flags & CONVERTOR_RECV) && (pConv->flags & CONVERTOR_SEND_CONVERSION))) { return; } if (0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags)) { diff --git a/opal/datatype/opal_copy_functions.c b/opal/datatype/opal_copy_functions.c index 0cb8b50c274..fec19a2a70e 100644 --- a/opal/datatype/opal_copy_functions.c +++ b/opal/datatype/opal_copy_functions.c @@ -62,10 +62,10 @@ \ if ((from_extent == (ptrdiff_t) local_TYPE_size) \ && (to_extent == (ptrdiff_t) remote_TYPE_size)) { \ - /* copy of contigous data at both source and destination */ \ + /* copy of contiguous data at both source and destination */ \ MEMCPY(to, from, count *local_TYPE_size); \ } else { \ - /* source or destination are non-contigous */ \ + /* source or destination are non-contiguous */ \ for (size_t i = 0; i < count; i++) { \ MEMCPY(to, from, local_TYPE_size); \ to += to_extent; \ @@ -254,30 +254,37 @@ COPY_TYPE(wchar, wchar_t, 1) /* Table of predefined copy functions - one for each OPAL type */ /* NOTE: The order of this array *MUST* match the order in opal_datatype_basicDatatypes */ conversion_fct_t opal_datatype_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = { - (conversion_fct_t) NULL, /* OPAL_DATATYPE_LOOP */ - (conversion_fct_t) NULL, /* OPAL_DATATYPE_END_LOOP */ - (conversion_fct_t) NULL, /* OPAL_DATATYPE_LB */ - (conversion_fct_t) NULL, /* OPAL_DATATYPE_UB */ - (conversion_fct_t) copy_bytes_1, /* OPAL_DATATYPE_INT1 */ - (conversion_fct_t) copy_bytes_2, /* OPAL_DATATYPE_INT2 */ - (conversion_fct_t) copy_bytes_4, /* OPAL_DATATYPE_INT4 */ - (conversion_fct_t) copy_bytes_8, /* OPAL_DATATYPE_INT8 */ - (conversion_fct_t) copy_bytes_16, /* OPAL_DATATYPE_INT16 */ - (conversion_fct_t) copy_bytes_1, /* OPAL_DATATYPE_UINT1 */ - (conversion_fct_t) copy_bytes_2, /* OPAL_DATATYPE_UINT2 */ - (conversion_fct_t) copy_bytes_4, /* OPAL_DATATYPE_UINT4 */ - (conversion_fct_t) copy_bytes_8, /* OPAL_DATATYPE_UINT8 */ - (conversion_fct_t) copy_bytes_16, /* OPAL_DATATYPE_UINT16 */ - (conversion_fct_t) copy_float_2, /* OPAL_DATATYPE_FLOAT2 */ - (conversion_fct_t) copy_float_4, /* OPAL_DATATYPE_FLOAT4 */ - (conversion_fct_t) copy_float_8, /* OPAL_DATATYPE_FLOAT8 */ - (conversion_fct_t) copy_float_12, /* OPAL_DATATYPE_FLOAT12 */ - (conversion_fct_t) copy_float_16, /* OPAL_DATATYPE_FLOAT16 */ - (conversion_fct_t) copy_short_float_complex, /* OPAL_DATATYPE_SHORT_FLOAT_COMPLEX */ - (conversion_fct_t) copy_float_complex, /* OPAL_DATATYPE_FLOAT_COMPLEX */ - (conversion_fct_t) copy_double_complex, /* OPAL_DATATYPE_DOUBLE_COMPLEX */ - (conversion_fct_t) copy_long_double_complex, /* OPAL_DATATYPE_LONG_DOUBLE_COMPLEX */ - (conversion_fct_t) copy_bool, /* OPAL_DATATYPE_BOOL */ - (conversion_fct_t) copy_wchar, /* OPAL_DATATYPE_WCHAR */ - (conversion_fct_t) NULL /* OPAL_DATATYPE_UNAVAILABLE */ + [OPAL_DATATYPE_LOOP] = (conversion_fct_t) NULL, + [OPAL_DATATYPE_END_LOOP] = (conversion_fct_t) NULL, + [OPAL_DATATYPE_LB] = (conversion_fct_t) NULL, + [OPAL_DATATYPE_UB] = (conversion_fct_t) NULL, + [OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_bytes_1, + [OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_bytes_2, + [OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_bytes_4, + [OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_bytes_8, + [OPAL_DATATYPE_INT16] = (conversion_fct_t) copy_bytes_16, + [OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_bytes_1, + [OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_bytes_2, + [OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_bytes_4, + [OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_bytes_8, + [OPAL_DATATYPE_UINT16] = (conversion_fct_t) copy_bytes_16, + [OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float_2, + [OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float_4, + [OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float_8, + [OPAL_DATATYPE_FLOAT12] = (conversion_fct_t) copy_float_12, + [OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float_16, + [OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = (conversion_fct_t) copy_short_float_complex, + [OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex, + [OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex, + [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = (conversion_fct_t) copy_long_double_complex, + [OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_bool, + [OPAL_DATATYPE_WCHAR] = (conversion_fct_t) copy_wchar, +#if SIZEOF_LONG == 4 + [OPAL_DATATYPE_LONG] = (conversion_fct_t)copy_bytes_4, + [OPAL_DATATYPE_UNSIGNED_LONG] = (conversion_fct_t)copy_bytes_4, +#elif SIZEOF_LONG == 8 + [OPAL_DATATYPE_LONG] = (conversion_fct_t)copy_bytes_8, + [OPAL_DATATYPE_UNSIGNED_LONG] = (conversion_fct_t)copy_bytes_8, +#endif + [OPAL_DATATYPE_UNAVAILABLE] = NULL, }; diff --git a/opal/datatype/opal_copy_functions_heterogeneous.c b/opal/datatype/opal_copy_functions_heterogeneous.c index be2adf33bce..5529a7f5d53 100644 --- a/opal/datatype/opal_copy_functions_heterogeneous.c +++ b/opal/datatype/opal_copy_functions_heterogeneous.c @@ -176,7 +176,7 @@ static inline void opal_dt_swap_long_double(void *to_p, const void *from_p, cons && (ptrdiff_t) sizeof(TYPE) == from_extent) { \ MEMCPY(to, from, count * sizeof(TYPE)); \ } else { \ - /* source or destination are non-contigous */ \ + /* source or destination are non-contiguous */ \ for (i = 0; i < count; i++) { \ MEMCPY(to, from, sizeof(TYPE)); \ to += to_extent; \ @@ -225,7 +225,7 @@ static inline void opal_dt_swap_long_double(void *to_p, const void *from_p, cons && (ptrdiff_t) sizeof(TYPE) == from_extent) { \ MEMCPY(to, from, count * sizeof(TYPE)); \ } else { \ - /* source or destination are non-contigous */ \ + /* source or destination are non-contiguous */ \ for (i = 0; i < count; i++) { \ MEMCPY(to, from, sizeof(TYPE)); \ to += to_extent; \ @@ -265,10 +265,10 @@ static inline void opal_dt_swap_long_double(void *to_p, const void *from_p, cons } \ } else if ((ptrdiff_t)(sizeof(TYPE1) + sizeof(TYPE2)) == to_extent \ && (ptrdiff_t)(sizeof(TYPE1) + sizeof(TYPE2)) == from_extent) { \ - /* source and destination are contigous */ \ + /* source and destination are contiguous */ \ MEMCPY(to, from, count *(sizeof(TYPE1) + sizeof(TYPE2))); \ } else { \ - /* source or destination are non-contigous */ \ + /* source or destination are non-contiguous */ \ for (i = 0; i < count; i++) { \ MEMCPY(to, from, sizeof(TYPE1) + sizeof(TYPE2)); \ to += to_extent; \ @@ -457,32 +457,255 @@ COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL(long_double_complex, long double, 1) COPY_TYPE_HETEROGENEOUS(wchar, wchar_t) +#if SIZEOF_LONG == 8 +static int32_t +copy_long_heterogeneous(opal_convertor_t *pConvertor, size_t count, + const char* from, size_t from_len, ptrdiff_t from_extent, + char* to, size_t to_length, ptrdiff_t to_extent, + ptrdiff_t *advance) +{ + size_t i; + + datatype_check("long", sizeof(long), pConvertor->master->remote_sizes[OPAL_DATATYPE_LONG], &count, from, from_len, from_extent, to, + to_length, to_extent); + if (!((pConvertor->remoteArch ^ opal_local_arch) & OPAL_ARCH_LONGIS64)) { /* same sizeof(long) */ + if ((pConvertor->remoteArch ^ opal_local_arch) & OPAL_ARCH_ISBIGENDIAN) { /* different endianess */ + for (i = 0; i < count; i++) { + opal_dt_swap_bytes(to, from, sizeof(long), 1); + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { + *(long*)to = *(long*)from; + to += to_extent; + from += from_extent; + } + } + } else { + /* the two sides have different lengths for sizeof(long) */ + if( CONVERTOR_SEND & pConvertor->flags ) { /* we're doing a pack */ + assert(CONVERTOR_SEND_CONVERSION & pConvertor->flags); + if ((pConvertor->remoteArch ^ opal_local_arch) & OPAL_ARCH_ISBIGENDIAN) { + /* different sizeof, we need to convert */ + if (opal_local_arch & OPAL_ARCH_LONGIS64) { + for (i = 0; i < count; i++) { /* from 8 to 4 bytes */ + int64_t val = *(int64_t*)from; + int32_t i32 = (int32_t)val; + opal_dt_swap_bytes(to, &i32, sizeof(int32_t), 1); + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { /* from 4 to 8 bytes */ + int32_t val = *(int32_t*)from; + int64_t i64 = (int64_t)val; + opal_dt_swap_bytes(to, &i64, sizeof(int64_t), 1); + to += to_extent; + from += from_extent; + } + } + } else { /* both have the same endianess */ + if (opal_local_arch & OPAL_ARCH_LONGIS64) { + for (i = 0; i < count; i++) { /* from 8 to 4 bytes */ + long val = *(long*)from; + *(int32_t*)to = (int32_t)val; + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { /* from 4 to 8 bytes */ + long val = *(long*)from; + *(int64_t*)to = (int64_t)val; + to += to_extent; + from += from_extent; + } + } + } + } else { /* unpack */ + if ((pConvertor->remoteArch ^ opal_local_arch) & OPAL_ARCH_ISBIGENDIAN) { + /* different endianness */ + if (opal_local_arch & OPAL_ARCH_LONGIS64) { + for (i = 0; i < count; i++) { /* from 4 to 8 bytes */ + int32_t val; + opal_dt_swap_bytes(&val, from, sizeof(int32_t), 1); + *(long*)to = (long)val; + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { /* from 8 to 4 bytes */ + int64_t val; + opal_dt_swap_bytes(&val, from, sizeof(int64_t), 1); + *(long*)to = (long)val; + to += to_extent; + from += from_extent; + } + } + } else { /* both have the same endianess */ + if (opal_local_arch & OPAL_ARCH_LONGIS64) { + for (i = 0; i < count; i++) { /* from 8 to 4 bytes */ + int32_t val = *(int32_t*)from; + *(long*)to = (long)val; + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { /* from 4 to 8 bytes */ + int64_t val = *(int64_t*)from; + *(long*)to = (long)val; + to += to_extent; + from += from_extent; + } + } + } + } + } + *advance = count * from_extent; + return count; +} + +static int32_t +copy_unsigned_long_heterogeneous(opal_convertor_t *pConvertor, size_t count, + const char* from, size_t from_len, ptrdiff_t from_extent, + char* to, size_t to_length, ptrdiff_t to_extent, + ptrdiff_t *advance) +{ + size_t i; + + datatype_check("unsigned long", sizeof(unsigned long), pConvertor->master->remote_sizes[OPAL_DATATYPE_UNSIGNED_LONG], + &count, from, from_len, from_extent, to, to_length, to_extent); + if (!((pConvertor->remoteArch ^ opal_local_arch) & OPAL_ARCH_LONGIS64)) { /* same sizeof(long) */ + if ((pConvertor->remoteArch ^ opal_local_arch) & OPAL_ARCH_ISBIGENDIAN) { /* different endianess */ + for (i = 0; i < count; i++) { + opal_dt_swap_bytes(to, from, sizeof(unsigned long), 1); + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { + *(unsigned long*)to = *(unsigned long*)from; + to += to_extent; + from += from_extent; + } + } + } else { + /* the two sides have different lengths for sizeof(long) */ + if( CONVERTOR_SEND & pConvertor->flags ) { /* we're doing a pack */ + assert(CONVERTOR_SEND_CONVERSION & pConvertor->flags); + if ((pConvertor->remoteArch ^ opal_local_arch) & OPAL_ARCH_ISBIGENDIAN) { + /* different sizeof, we need to convert */ + if (opal_local_arch & OPAL_ARCH_LONGIS64) { + for (i = 0; i < count; i++) { /* from 8 to 4 bytes */ + uint64_t val = *(uint64_t*)from; + uint32_t i32 = (uint32_t)val; + opal_dt_swap_bytes(to, &i32, sizeof(uint32_t), 1); + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { /* from 4 to 8 bytes */ + uint32_t val = *(uint32_t*)from; + uint64_t i64 = (uint64_t)val; + opal_dt_swap_bytes(to, &i64, sizeof(uint64_t), 1); + to += to_extent; + from += from_extent; + } + } + } else { /* both have the same endianess */ + if (opal_local_arch & OPAL_ARCH_LONGIS64) { + for (i = 0; i < count; i++) { /* from 8 to 4 bytes */ + unsigned long val = *(unsigned long*)from; + *(uint32_t*)to = (uint32_t)val; + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { /* from 4 to 8 bytes */ + unsigned long val = *(unsigned long*)from; + *(uint64_t*)to = (uint64_t)val; + to += to_extent; + from += from_extent; + } + } + } + } else { /* unpack */ + if ((pConvertor->remoteArch ^ opal_local_arch) & OPAL_ARCH_ISBIGENDIAN) { + /* different endianness */ + if (opal_local_arch & OPAL_ARCH_LONGIS64) { + for (i = 0; i < count; i++) { /* from 4 to 8 bytes */ + uint32_t val; + opal_dt_swap_bytes(&val, from, sizeof(uint32_t), 1); + *(unsigned long*)to = (unsigned long)val; + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { /* from 8 to 4 bytes */ + uint64_t val; + opal_dt_swap_bytes(&val, from, sizeof(uint64_t), 1); + *(unsigned long*)to = (unsigned long)val; + to += to_extent; + from += from_extent; + } + } + } else { /* both have the same endianess */ + if (opal_local_arch & OPAL_ARCH_LONGIS64) { + for (i = 0; i < count; i++) { /* from 8 to 4 bytes */ + uint32_t val = *(uint32_t*)from; + *(unsigned long*)to = (unsigned long)val; + to += to_extent; + from += from_extent; + } + } else { + for (i = 0; i < count; i++) { /* from 4 to 8 bytes */ + uint64_t val = *(uint64_t*)from; + *(unsigned long*)to = (unsigned long)val; + to += to_extent; + from += from_extent; + } + } + } + } + } + *advance = count * from_extent; + return count; +} +#endif /* SIZEOF_LONG == 8 */ + /* table of predefined copy functions - one for each MPI type */ conversion_fct_t opal_datatype_heterogeneous_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = { - [OPAL_DATATYPE_LOOP] = NULL, - [OPAL_DATATYPE_END_LOOP] = NULL, - [OPAL_DATATYPE_LB] = NULL, - [OPAL_DATATYPE_UB] = NULL, - [OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_int1_heterogeneous, - [OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_int2_heterogeneous, - [OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_int4_heterogeneous, - [OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_int8_heterogeneous, - [OPAL_DATATYPE_INT16] = (conversion_fct_t) copy_int16_heterogeneous, - [OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_int1_heterogeneous, - [OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_int2_heterogeneous, - [OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_int4_heterogeneous, - [OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_int8_heterogeneous, - [OPAL_DATATYPE_UINT16] = (conversion_fct_t) copy_int16_heterogeneous, - [OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float2_heterogeneous, - [OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float4_heterogeneous, - [OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float8_heterogeneous, - [OPAL_DATATYPE_FLOAT12] = (conversion_fct_t) copy_float12_heterogeneous, - [OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float16_heterogeneous, + [OPAL_DATATYPE_LOOP] = NULL, + [OPAL_DATATYPE_END_LOOP] = NULL, + [OPAL_DATATYPE_LB] = NULL, + [OPAL_DATATYPE_UB] = NULL, + [OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_int1_heterogeneous, + [OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_int2_heterogeneous, + [OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_int4_heterogeneous, + [OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_int8_heterogeneous, + [OPAL_DATATYPE_INT16] = (conversion_fct_t) copy_int16_heterogeneous, + [OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_int1_heterogeneous, + [OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_int2_heterogeneous, + [OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_int4_heterogeneous, + [OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_int8_heterogeneous, + [OPAL_DATATYPE_UINT16] = (conversion_fct_t) copy_int16_heterogeneous, + [OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float2_heterogeneous, + [OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float4_heterogeneous, + [OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float8_heterogeneous, + [OPAL_DATATYPE_FLOAT12] = (conversion_fct_t) copy_float12_heterogeneous, + [OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float16_heterogeneous, [OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = (conversion_fct_t) copy_short_float_complex_heterogeneous, - [OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex_heterogeneous, - [OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex_heterogeneous, + [OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex_heterogeneous, + [OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex_heterogeneous, [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = (conversion_fct_t) copy_long_double_complex_heterogeneous, - [OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_cxx_bool_heterogeneous, - [OPAL_DATATYPE_WCHAR] = (conversion_fct_t) copy_wchar_heterogeneous, - [OPAL_DATATYPE_UNAVAILABLE] = NULL, + [OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_cxx_bool_heterogeneous, + [OPAL_DATATYPE_WCHAR] = (conversion_fct_t) copy_wchar_heterogeneous, +#if SIZEOF_LONG == 4 + [OPAL_DATATYPE_LONG] = (conversion_fct_t) copy_int4_heterogeneous, + [OPAL_DATATYPE_UNSIGNED_LONG] = (conversion_fct_t) copy_int4_heterogeneous, +#else + [OPAL_DATATYPE_LONG] = (conversion_fct_t) copy_long_heterogeneous, + [OPAL_DATATYPE_UNSIGNED_LONG] = (conversion_fct_t) copy_unsigned_long_heterogeneous, +#endif + [OPAL_DATATYPE_UNAVAILABLE] = NULL, }; diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index e2f2f72b0de..7dabd1742c0 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -52,16 +52,15 @@ BEGIN_C_DECLS * This must match the same definition as in opal_datatype_internal.h */ #if !defined(OPAL_DATATYPE_MAX_PREDEFINED) -# define OPAL_DATATYPE_MAX_PREDEFINED 26 +# define OPAL_DATATYPE_MAX_PREDEFINED 28 #endif /* - * No more than this number of _Basic_ datatypes in C/CPP or Fortran - * are supported (in order to not change setup and usage of the predefined - * datatypes). + * Upper limit of the number of _Basic_ datatypes supported (in order to + * not change setup and usage of the predefined datatypes). * * BEWARE: This constant should reflect whatever the OMPI-layer needs. */ -#define OPAL_DATATYPE_MAX_SUPPORTED 50 +#define OPAL_DATATYPE_MAX_SUPPORTED 64 /* flags for the datatypes. */ #define OPAL_DATATYPE_FLAG_UNAVAILABLE \ @@ -187,6 +186,8 @@ OPAL_DECLSPEC extern const opal_datatype_t opal_datatype_double_complex; OPAL_DECLSPEC extern const opal_datatype_t opal_datatype_long_double_complex; OPAL_DECLSPEC extern const opal_datatype_t opal_datatype_bool; OPAL_DECLSPEC extern const opal_datatype_t opal_datatype_wchar; +OPAL_DECLSPEC extern const opal_datatype_t opal_datatype_long; +OPAL_DECLSPEC extern const opal_datatype_t opal_datatype_unsigned_long; /* * Functions exported externally diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index b3ec1de894b..c50f4501393 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -61,7 +61,9 @@ extern int opal_datatype_dfd; * * At the OPAL-level we do not care from which language the datatype came from * (C, C++ or FORTRAN), we only focus on their internal representation in - * the host memory. + * the host memory. There is one notable exception, the long predefined type + * which need to be handled at the lowest level due to it's variable size but + * fixed XDR representation. * * NOTE: This predefined datatype order should be matched by any upper-level * users of the OPAL datatype. @@ -92,7 +94,9 @@ extern int opal_datatype_dfd; #define OPAL_DATATYPE_LONG_DOUBLE_COMPLEX 22 #define OPAL_DATATYPE_BOOL 23 #define OPAL_DATATYPE_WCHAR 24 -#define OPAL_DATATYPE_UNAVAILABLE 25 +#define OPAL_DATATYPE_LONG 25 +#define OPAL_DATATYPE_UNSIGNED_LONG 26 +#define OPAL_DATATYPE_UNAVAILABLE 27 #ifndef OPAL_DATATYPE_MAX_PREDEFINED # define OPAL_DATATYPE_MAX_PREDEFINED (OPAL_DATATYPE_UNAVAILABLE + 1) @@ -381,6 +385,12 @@ struct opal_datatype_t; # define OPAL_DATATYPE_HANDLE_UINT16(AV, NOTAV, FLAGS) NOTAV(INT16, FLAGS) #endif + +#define OPAL_DATATYPE_INITIALIZER_LONG(FLAGS) \ + OPAL_DATATYPE_INIT_BASIC_DATATYPE(long, OPAL_ALIGNMENT_LONG, LONG, FLAGS) +#define OPAL_DATATYPE_INITIALIZER_UNSIGNED_LONG(FLAGS) \ + OPAL_DATATYPE_INIT_BASIC_DATATYPE(unsigned long, OPAL_ALIGNMENT_LONG, UNSIGNED_LONG, FLAGS) + #if defined(HAVE_SHORT_FLOAT) && SIZEOF_SHORT_FLOAT == 2 # define OPAL_DATATYPE_HANDLE_FLOAT2(AV, NOTAV, FLAGS) \ AV(short float, OPAL_ALIGNMENT_SHORT_FLOAT, FLOAT2, FLAGS) @@ -497,7 +507,8 @@ struct opal_datatype_t; #define OPAL_DATATYPE_HANDLE_LONG_DOUBLE_COMPLEX(AV, NOTAV, FLAGS) \ AV(long double _Complex, OPAL_ALIGNMENT_LONG_DOUBLE_COMPLEX, LONG_DOUBLE_COMPLEX, FLAGS) -#define OPAL_DATATYPE_HANDLE_BOOL(AV, NOTAV, FLAGS) AV(_Bool, OPAL_ALIGNMENT_BOOL, BOOL, FLAGS) +#define OPAL_DATATYPE_HANDLE_BOOL(AV, NOTAV, FLAGS) \ + AV(_Bool, OPAL_ALIGNMENT_BOOL, BOOL, FLAGS) #if OPAL_ALIGNMENT_WCHAR != 0 # define OPAL_DATATYPE_HANDLE_WCHAR(AV, NOTAV, FLAGS) \ diff --git a/opal/datatype/opal_datatype_module.c b/opal/datatype/opal_datatype_module.c index d8bc8f9d47c..26059b28472 100644 --- a/opal/datatype/opal_datatype_module.c +++ b/opal/datatype/opal_datatype_module.c @@ -88,6 +88,8 @@ OPAL_DECLSPEC const opal_datatype_t opal_datatype_long_double_complex = OPAL_DATATYPE_INITIALIZER_LONG_DOUBLE_COMPLEX(0); OPAL_DECLSPEC const opal_datatype_t opal_datatype_bool = OPAL_DATATYPE_INITIALIZER_BOOL(0); OPAL_DECLSPEC const opal_datatype_t opal_datatype_wchar = OPAL_DATATYPE_INITIALIZER_WCHAR(0); +OPAL_DECLSPEC const opal_datatype_t opal_datatype_long = OPAL_DATATYPE_INITIALIZER_LONG(0); +OPAL_DECLSPEC const opal_datatype_t opal_datatype_unsigned_long = OPAL_DATATYPE_INITIALIZER_UNSIGNED_LONG(0); OPAL_DECLSPEC const opal_datatype_t opal_datatype_unavailable = OPAL_DATATYPE_INITIALIZER_UNAVAILABLE_NAMED(UNAVAILABLE, 0); @@ -126,6 +128,8 @@ OPAL_DECLSPEC const size_t opal_datatype_local_sizes[OPAL_DATATYPE_MAX_PREDEFINE [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = sizeof(long double _Complex), [OPAL_DATATYPE_BOOL] = sizeof(_Bool), [OPAL_DATATYPE_WCHAR] = sizeof(wchar_t), + [OPAL_DATATYPE_LONG] = sizeof(long), + [OPAL_DATATYPE_UNSIGNED_LONG] = sizeof(unsigned long), }; /* @@ -160,6 +164,8 @@ OPAL_DECLSPEC const opal_datatype_t *opal_datatype_basicDatatypes[OPAL_DATATYPE_ [OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = &opal_datatype_long_double_complex, [OPAL_DATATYPE_BOOL] = &opal_datatype_bool, [OPAL_DATATYPE_WCHAR] = &opal_datatype_wchar, + [OPAL_DATATYPE_LONG] = &opal_datatype_long, + [OPAL_DATATYPE_UNSIGNED_LONG] = &opal_datatype_unsigned_long, [OPAL_DATATYPE_UNAVAILABLE] = &opal_datatype_unavailable, }; @@ -258,7 +264,7 @@ int32_t opal_datatype_init(void) int32_t i; /** - * Force he initialization of the opal_datatype_t class. This will allow us to + * Force the initialization of the opal_datatype_t class. This will allow us to * call OBJ_DESTRUCT without going too deep in the initialization process. */ opal_class_initialize(OBJ_CLASS(opal_datatype_t)); diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index acbbb1e1df5..d1b9e1a538a 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -375,7 +375,7 @@ int32_t opal_generic_simple_pack_function(opal_convertor_t *pConvertor, struct i *max_data = total_packed; pConvertor->bConverted += total_packed; /* update the already converted bytes */ *out_size = iov_count; - if (pConvertor->bConverted == pConvertor->local_size) { + if (pConvertor->bConverted == pConvertor->remote_size) { pConvertor->flags |= CONVERTOR_COMPLETED; return 1; } @@ -398,7 +398,7 @@ int32_t opal_generic_simple_pack_function(opal_convertor_t *pConvertor, struct i * to a contiguous output buffer with a predefined size. * return OPAL_SUCCESS if everything went OK and if there is still room before the complete * conversion of the data (need additional call with others input buffers ) - * 1 if everything went fine and the data was completly converted + * 1 if everything went fine and the data was completely converted * -1 something wrong occurs. */ @@ -424,7 +424,7 @@ pack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, if ((remote_elem_size * cando_count) > *(SPACE)) cando_count = (*SPACE) / blocklen_bytes; - /* premptively update the number of COUNT we will return. */ + /* preemptively update the number of COUNT we will return. */ *(COUNT) -= cando_count; if (_elem->blocklen == 1) { @@ -472,7 +472,7 @@ pack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, _memory, *SPACE, local_elem_size, _packed, *SPACE, remote_elem_size, &advance); - _memory += do_now_bytes; + _memory += cando_count * local_elem_size; _packed += do_now_bytes; } @@ -615,7 +615,9 @@ int32_t opal_pack_general_function(opal_convertor_t *pConvertor, struct iovec *i *max_data = total_packed; pConvertor->bConverted += total_packed; /* update the already converted bytes */ *out_size = iov_count; - if (pConvertor->bConverted == pConvertor->local_size) { + size_t expected_packed_size; + opal_convertor_get_packed_size(pConvertor, &expected_packed_size); + if (pConvertor->bConverted == expected_packed_size) { pConvertor->flags |= CONVERTOR_COMPLETED; return 1; } diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index af6ba62d6cd..629c7762ec0 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -222,7 +222,7 @@ static inline void opal_unpack_partial_datatype(opal_convertor_t *pConvertor, dt /* reload the length as it is reset by the macro */ data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; - /* For every occurence of the unused byte move data from the saved + /* For every occurrence of the unused byte move data from the saved * buffer back into the user memory. */ #if OPAL_CUDA_SUPPORT @@ -422,7 +422,7 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct *max_data = total_unpacked; pConvertor->bConverted += total_unpacked; /* update the already converted bytes */ *out_size = iov_count; - if (pConvertor->bConverted == pConvertor->remote_size) { + if (pConvertor->bConverted == pConvertor->local_size) { pConvertor->flags |= CONVERTOR_COMPLETED; return 1; } @@ -446,7 +446,7 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct * to a contiguous output buffer with a predefined size. * return OPAL_SUCCESS if everything went OK and if there is still room before the complete * conversion of the data (need additional call with others input buffers ) - * 1 if everything went fine and the data was completly converted + * 1 if everything went fine and the data was completely converted * -1 something wrong occurs. */ static inline void @@ -471,7 +471,7 @@ unpack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, if ((remote_elem_size * cando_count) > *(SPACE)) cando_count = (*SPACE) / blocklen_bytes; - /* premptively update the number of COUNT we will return. */ + /* preemptively update the number of COUNT we will return. */ *(COUNT) -= cando_count; if (_elem->blocklen == 1) { @@ -480,7 +480,7 @@ unpack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, _memory, *SPACE, _elem->extent, &advance); _memory += cando_count * _elem->extent; - _packed += cando_count * local_elem_size; + _packed += cando_count * remote_elem_size; goto update_and_return; } @@ -519,7 +519,7 @@ unpack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, _packed, *SPACE, remote_elem_size, _memory, *SPACE, local_elem_size, &advance); - _memory += do_now_bytes; + _memory += cando_count * local_elem_size; _packed += do_now_bytes; } @@ -592,17 +592,6 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec opal_datatype_basicDatatypes[type]->name);); unpack_predefined_heterogeneous(pConvertor, pElem, &count_desc, &conv_ptr, &iov_ptr, &iov_len_local); -#if 0 - rc = master->pFunctions[type](pConvertor, count_desc, iov_ptr, iov_len_local, - opal_datatype_basicDatatypes[type]->size, - conv_ptr + pElem->elem.disp, - (pConvertor->pDesc->ub - pConvertor->pDesc->lb) - * pConvertor->count, - description[pos_desc].elem.extent, &advance); - iov_len_local -= advance; /* decrease the available space in the buffer */ - iov_ptr += advance; /* increase the pointer to the buffer */ - count_desc -= rc; /* compute leftovers */ -#endif if (0 == count_desc) { /* completed */ conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ @@ -612,9 +601,6 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec } continue; } -#if 0 - conv_ptr += rc * description[pos_desc].elem.extent; -#endif assert(pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED); assert(0 == iov_len_local); if (0 != iov_len_local) { @@ -682,7 +668,9 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec *max_data = total_unpacked; pConvertor->bConverted += total_unpacked; /* update the already converted bytes */ *out_size = iov_count; - if (pConvertor->bConverted == pConvertor->remote_size) { + size_t expected_packed_size; + opal_convertor_get_packed_size(pConvertor, &expected_packed_size); + if (pConvertor->bConverted == expected_packed_size) { pConvertor->flags |= CONVERTOR_COMPLETED; return 1; } diff --git a/opal/util/arch.h b/opal/util/arch.h index 115cd65af69..78a394651b8 100644 --- a/opal/util/arch.h +++ b/opal/util/arch.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -51,7 +51,7 @@ ** The fortran integer is dismissed here, since there is no ** platform known to me, were fortran and C-integer do not match ** -** The following abbriviations are introduced: +** The following abbreviations are introduced: ** ** a) il32 (int long are 32 bits) (e.g. IA32 LINUX, SGI n32, SUN) **