From 500e110f5c66ad46d31ce4ab7af74c4fd45d4712 Mon Sep 17 00:00:00 2001 From: bosilca Date: Tue, 9 May 2017 09:31:40 -0400 Subject: [PATCH] Topic/datatype (#3441) * Don't overflow the internal datatype count. Change the type of the count to be a size_t (it does not alter the total size of the internal structures, so has no impact on the ABI). Signed-off-by: George Bosilca * Optimize the datatype creation. The internal array of counts of predefined types is now only created when needed, which is either in a heterogeneous environment, or when one call get_elements. It saves space and makes the convertor creation a little faster in some cases. Rearrange the fields in the datatype description structs. The macro OPAL_DATATYPE_INIT_PTYPES_ARRAY had a bug, and the static array was only partially created. All predefined types should have the ptypes array created and initialized. Signed-off-by: George Bosilca * Fix the boundary computation. Signed-off-by: George Bosilca * test/datatype: add test for short unpack on heteregeneous cluster Signed-off-by: Gilles Gouaillardet Signed-off-by: George Bosilca * Trying to reduce the cost of creating a convertor. Signed-off-by: George Bosilca * Respect the unpack boundaries. As Gilles suggested on #2535 the opal_unpack_general_function was unpacking based on the requested count and not on the amount of packed data provided. Fixes #2535. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype_get_elements.c | 8 +- ompi/datatype/ompi_datatype_internal.h | 4 +- ompi/datatype/ompi_datatype_module.c | 7 +- ompi/include/ompi/memchecker.h | 3 +- opal/datatype/opal_convertor.c | 91 +++++++++++--------- opal/datatype/opal_convertor.h | 29 +++++-- opal/datatype/opal_datatype.h | 21 +++-- opal/datatype/opal_datatype_add.c | 21 ++--- opal/datatype/opal_datatype_clone.c | 5 +- opal/datatype/opal_datatype_copy.h | 4 +- opal/datatype/opal_datatype_create.c | 38 +++++---- opal/datatype/opal_datatype_dump.c | 14 ++- opal/datatype/opal_datatype_fake_stack.c | 25 ++---- opal/datatype/opal_datatype_get_count.c | 75 +++++++++++++--- opal/datatype/opal_datatype_internal.h | 32 ++++--- opal/datatype/opal_datatype_optimize.c | 19 +++-- opal/datatype/opal_datatype_unpack.c | 3 +- test/datatype/Makefile.am | 7 +- test/datatype/position_noncontig.c | 4 +- test/datatype/unpack_hetero.c | 99 ++++++++++++++++++++++ 20 files changed, 358 insertions(+), 151 deletions(-) create mode 100644 test/datatype/unpack_hetero.c diff --git a/ompi/datatype/ompi_datatype_get_elements.c b/ompi/datatype/ompi_datatype_get_elements.c index 0c1f8a7b842..72ac87d6df7 100644 --- a/ompi/datatype/ompi_datatype_get_elements.c +++ b/ompi/datatype/ompi_datatype_get_elements.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -25,6 +25,7 @@ #include "ompi/runtime/params.h" #include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_datatype_internal.h" int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t *count) { @@ -48,9 +49,10 @@ int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t there are no leftover bytes */ if (!ompi_datatype_is_predefined(datatype)) { if (0 != internal_count) { + opal_datatype_compute_ptypes(&datatype->super); /* count the basic elements in the datatype */ - for (i = 4, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) { - total += datatype->super.btypes[i]; + for (i = OPAL_DATATYPE_FIRST_TYPE, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) { + total += datatype->super.ptypes[i]; } internal_count = total * internal_count; } diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index 76485370dfa..4ab5cc14020 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2009-2013 The University of Tennessee and The University + * Copyright (c) 2009-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -465,7 +465,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX .name = OPAL_DATATYPE_INIT_NAME(TYPE ## SIZE), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(TYPE ## SIZE) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY(TYPE ## SIZE) \ } #define OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE_FORTRAN( TYPE, NAME, SIZE, ALIGN, FLAGS ) \ diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index fb5a09e9072..de14aa11188 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -384,8 +384,9 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; (PDST)->super.opt_desc = (PSRC)->super.opt_desc; \ (PDST)->packed_description = (PSRC)->packed_description; \ (PSRC)->packed_description = NULL; \ - memcpy( (PDST)->super.btypes, (PSRC)->super.btypes, \ - OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t) ); \ + /* transfer the ptypes */ \ + (PDST)->super.ptypes = (PSRC)->super.ptypes; \ + (PSRC)->super.ptypes = NULL; \ } while(0) #define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2, FLAGS) \ @@ -737,7 +738,7 @@ void ompi_datatype_dump( const ompi_datatype_t* pData ) (long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used, (long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb), (long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb), - (int)pData->super.nbElems, (int)pData->super.btypes[OPAL_DATATYPE_LOOP], (int)pData->super.flags ); + (int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags ); /* dump the flags */ if( ompi_datatype_is_predefined(pData) ) { index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/ompi/include/ompi/memchecker.h b/ompi/include/ompi/memchecker.h index 90a89199353..bbc38afee91 100644 --- a/ompi/include/ompi/memchecker.h +++ b/ompi/include/ompi/memchecker.h @@ -366,7 +366,8 @@ static inline int memchecker_datatype(MPI_Datatype type) opal_memchecker_base_isdefined (&type->super.opt_desc.length, sizeof(opal_datatype_count_t)); opal_memchecker_base_isdefined (&type->super.opt_desc.used, sizeof(opal_datatype_count_t)); opal_memchecker_base_isdefined (&type->super.opt_desc.desc, sizeof(dt_elem_desc_t *)); - opal_memchecker_base_isdefined (&type->super.btypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t)); + if( NULL != type->super.ptypes ) + opal_memchecker_base_isdefined (&type->super.ptypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(size_t)); opal_memchecker_base_isdefined (&type->id, sizeof(int32_t)); opal_memchecker_base_isdefined (&type->d_f_to_c_index, sizeof(int32_t)); diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 46aff829723..18cb4434eb5 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -43,9 +43,6 @@ CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) ) #endif -extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor, - int starting_point, const int* sizes ); - static void opal_convertor_construct( opal_convertor_t* convertor ) { convertor->pStack = convertor->static_stack; @@ -226,7 +223,7 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv, if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) { /** * We are doing conversion on a contiguous datatype on a homogeneous - * environment. The convertor contain minimal informations, we only + * environment. The convertor contain minimal information, we only * use the bConverted to manage the conversion. */ uint32_t i; @@ -447,31 +444,49 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, return rc; } +static size_t +opal_datatype_compute_remote_size( const opal_datatype_t* pData, + const size_t* sizes ) +{ + uint32_t typeMask = pData->bdt_used; + size_t length = 0; + + if( OPAL_UNLIKELY(NULL == pData->ptypes) ) { + /* Allocate and fill the array of types used in the datatype description */ + opal_datatype_compute_ptypes( (opal_datatype_t*)pData ); + } + + for( int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { + if( typeMask & ((uint32_t)1 << i) ) { + length += (pData->ptypes[i] * sizes[i]); + typeMask ^= ((uint32_t)1 << i); + } + } + return length; +} /** * Compute the remote size. If necessary remove the homogeneous flag * and redirect the convertor description toward the non-optimized * datatype representation. */ -#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \ -{ \ - if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \ - opal_convertor_master_t* master; \ - int i; \ - uint32_t mask = datatype->bdt_used; \ - convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \ - master = convertor->master; \ - convertor->remote_size = 0; \ - for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \ - if( mask & ((uint32_t)1 << i) ) { \ - convertor->remote_size += (datatype->btypes[i] * \ - master->remote_sizes[i]); \ - mask ^= ((uint32_t)1 << i); \ - } \ - } \ - convertor->remote_size *= convertor->count; \ - convertor->use_desc = &(datatype->desc); \ - } \ +size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor ) +{ + opal_datatype_t* datatype = (opal_datatype_t*)pConvertor->pDesc; + + pConvertor->remote_size = pConvertor->local_size; + if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) { + pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS); + pConvertor->use_desc = &(datatype->desc); + if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) { + /* This is for a single datatype, we must update it with the count */ + pConvertor->remote_size = opal_datatype_compute_remote_size(datatype, + pConvertor->master->remote_sizes); + pConvertor->remote_size *= pConvertor->count; + } + } + pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE; + return pConvertor->remote_size; } /** @@ -483,29 +498,26 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, */ #define OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \ { \ - uint32_t bdt_mask; \ - \ + convertor->local_size = count * datatype->size; \ + convertor->pBaseBuf = (unsigned char*)pUserBuf; \ + convertor->count = count; \ + convertor->pDesc = (opal_datatype_t*)datatype; \ + convertor->bConverted = 0; \ + convertor->use_desc = &(datatype->opt_desc); \ /* If the data is empty we just mark the convertor as \ * completed. With this flag set the pack and unpack functions \ * will not do anything. \ */ \ if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \ - convertor->flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED; \ + convertor->flags |= (OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE); \ convertor->local_size = convertor->remote_size = 0; \ return OPAL_SUCCESS; \ } \ - /* Compute the local in advance */ \ - convertor->local_size = count * datatype->size; \ - convertor->pBaseBuf = (unsigned char*)pUserBuf; \ - convertor->count = count; \ \ /* Grab the datatype part of the flags */ \ convertor->flags &= CONVERTOR_TYPE_MASK; \ convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \ convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \ - convertor->pDesc = (opal_datatype_t*)datatype; \ - convertor->bConverted = 0; \ - convertor->use_desc = &(datatype->opt_desc); \ \ convertor->remote_size = convertor->local_size; \ if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \ @@ -516,9 +528,8 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, } \ } \ \ - bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \ - OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype, \ - bdt_mask ); \ + assert( (convertor)->pDesc == (datatype) ); \ + opal_convertor_compute_remote_size( convertor ); \ assert( NULL != convertor->use_desc->desc ); \ /* For predefined datatypes (contiguous) do nothing more */ \ /* if checksum is enabled then always continue */ \ @@ -530,7 +541,7 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, } \ convertor->flags &= ~CONVERTOR_NO_OP; \ { \ - uint32_t required_stack_length = datatype->btypes[OPAL_DATATYPE_LOOP] + 1; \ + uint32_t required_stack_length = datatype->loops + 1; \ \ if( required_stack_length > convertor->stack_size ) { \ assert(convertor->pStack == convertor->static_stack); \ @@ -714,8 +725,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos, opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index, (int)pStack[stack_pos].count, (long)pStack[stack_pos].disp ); if( pStack->index != -1 ) - opal_output( 0, "\t[desc count %d disp %ld extent %ld]\n", - pDesc[pStack[stack_pos].index].elem.count, + opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n", + (unsigned long)pDesc[pStack[stack_pos].index].elem.count, (long)pDesc[pStack[stack_pos].index].elem.disp, (long)pDesc[pStack[stack_pos].index].elem.extent ); else diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 7c5de1af39b..9ceb5a64673 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -52,6 +52,7 @@ BEGIN_C_DECLS #define CONVERTOR_STATE_ALLOC 0x04000000 #define CONVERTOR_COMPLETED 0x08000000 #define CONVERTOR_CUDA_UNIFIED 0x10000000 +#define CONVERTOR_HAS_REMOTE_SIZE 0x20000000 union dt_elem_desc; typedef struct opal_convertor_t opal_convertor_t; @@ -184,9 +185,16 @@ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConv return 1; } +/** + * Update the size of the remote datatype representation. The size will + * depend on the configuration of the master convertor. In homogeneous + * environments, the local and remote sizes are identical. + */ +size_t +opal_convertor_compute_remote_size( opal_convertor_t* pConv ); -/* - * +/** + * Return the local size of the convertor (count times the size of the datatype). */ static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv, size_t* pSize ) @@ -195,16 +203,24 @@ static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv } -/* - * +/** + * Return the remote size of the convertor (count times the remote size of the + * datatype). On homogeneous environments the local and remote sizes are + * identical. */ static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pConv, size_t* pSize ) { + if( pConv->flags & CONVERTOR_HOMOGENEOUS ) { + *pSize = pConv->local_size; + return; + } + if( 0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags) ) { + opal_convertor_compute_remote_size( (opal_convertor_t*)pConv); + } *pSize = pConv->remote_size; } - /** * Return the current absolute position of the next pack/unpack. This function is * mostly useful for contiguous datatypes, when we need to get the pointer to the @@ -277,6 +293,7 @@ opal_convertor_raw( opal_convertor_t* convertor, /* [IN/OUT] */ uint32_t* iov_count, /* [IN/OUT] */ size_t* length ); /* [OUT] */ + /* * Upper level does not need to call the _nocheck function directly. */ diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 34c7b4e1b66..7e9678bd466 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -53,9 +53,10 @@ BEGIN_C_DECLS #endif /* * No more than this number of _Basic_ datatypes in C/CPP or Fortran - * are supported (in order to not change setup and usage of btypes). + * are supported (in order to not change setup and usage of the predefined + * datatypes). * - * XXX TODO Adapt to whatever the OMPI-layer needs + * BEWARE: This constant should reflect whatever the OMPI-layer needs. */ #define OPAL_DATATYPE_MAX_SUPPORTED 47 @@ -115,6 +116,7 @@ struct opal_datatype_t { /* --- cacheline 1 boundary (64 bytes) --- */ size_t nbElems; /**< total number of elements inside the datatype */ uint32_t align; /**< data should be aligned to */ + uint32_t loops; /**< number of loops on the iternal type stack */ /* Attribute fields */ char name[OPAL_MAX_OBJECT_NAME]; /**< name of the datatype */ @@ -123,11 +125,12 @@ struct opal_datatype_t { dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless or in the send case (without conversion) */ - uint32_t btypes[OPAL_DATATYPE_MAX_SUPPORTED]; - /**< basic elements count used to compute the size of the - datatype for remote nodes. The length of the array is dependent on - the maximum number of datatypes of all top layers. - Reason being is that Fortran is not at the OPAL layer. */ + size_t *ptypes; /**< array of basic predefined types that facilitate the computing + of the remote size in heterogeneous environments. The length of the + array is dependent on the maximum number of predefined datatypes of + all language interfaces (because Fortran is not known at the OPAL + layer). This field should never be initialized in homogeneous + environments */ /* --- cacheline 5 boundary (320 bytes) was 32-36 bytes ago --- */ /* size: 352, cachelines: 6, members: 15 */ @@ -281,6 +284,8 @@ OPAL_DECLSPEC int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* pData, int32_t count, char* pDestBuf, char* pSrcBuf ); +OPAL_DECLSPEC int opal_datatype_compute_ptypes( opal_datatype_t* datatype ); + OPAL_DECLSPEC const opal_datatype_t* opal_datatype_match_size( int size, uint16_t datakind, uint16_t datalang ); diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index 890f5503bbd..70c7466e839 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -277,7 +277,8 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA * predefined non contiguous datatypes (like MPI_SHORT_INT). */ if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) { - pdtBase->btypes[pdtAdd->id] += count; + if( NULL != pdtBase->ptypes ) + pdtBase->ptypes[pdtAdd->id] += count; pLast->elem.common.type = pdtAdd->id; pLast->elem.count = count; pLast->elem.disp = disp; @@ -289,13 +290,13 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA } } else { /* keep trace of the total number of basic datatypes in the datatype definition */ - pdtBase->btypes[OPAL_DATATYPE_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_LOOP]; - pdtBase->btypes[OPAL_DATATYPE_END_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_END_LOOP]; - pdtBase->btypes[OPAL_DATATYPE_LB] |= pdtAdd->btypes[OPAL_DATATYPE_LB]; - pdtBase->btypes[OPAL_DATATYPE_UB] |= pdtAdd->btypes[OPAL_DATATYPE_UB]; - for( i = 4; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) - if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]); - + pdtBase->loops += pdtAdd->loops; + pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_LB); + pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_UB); + if( (NULL != pdtBase->ptypes) && (NULL != pdtAdd->ptypes) ) { + for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) + if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]); + } if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) && (extent == pdtAdd->desc.desc[0].elem.extent) ){ pLast->elem = pdtAdd->desc.desc[0].elem; @@ -310,7 +311,7 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA pLoop = pLast; CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent, (pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED)) ); - pdtBase->btypes[OPAL_DATATYPE_LOOP] += 2; + pdtBase->loops += 2; pdtBase->desc.used += 2; pLast++; } diff --git a/opal/datatype/opal_datatype_clone.c b/opal/datatype/opal_datatype_clone.c index 05f57c88cd8..fa4479982d0 100644 --- a/opal/datatype/opal_datatype_clone.c +++ b/opal/datatype/opal_datatype_clone.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -61,6 +61,9 @@ int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * dest_type->opt_desc.used = src_type->opt_desc.used; memcpy( dest_type->opt_desc.desc, src_type->opt_desc.desc, desc_length * sizeof(dt_elem_desc_t) ); } + } else { + assert( NULL == dest_type->opt_desc.desc ); + assert( 0 == dest_type->opt_desc.length ); } } dest_type->id = src_type->id; /* preserve the default id. This allow us to diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index 5557142b1fd..ca035575444 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2012 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -179,7 +179,7 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i return 0; /* completed */ } - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 1) ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 1) ); pStack->count = count; pStack->index = -1; pStack->disp = 0; diff --git a/opal/datatype/opal_datatype_create.c b/opal/datatype/opal_datatype_create.c index e64e1f04190..0e6d49b9bd7 100644 --- a/opal/datatype/opal_datatype_create.c +++ b/opal/datatype/opal_datatype_create.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -30,8 +30,6 @@ static void opal_datatype_construct( opal_datatype_t* pData ) { - int i; - pData->size = 0; pData->flags = OPAL_DATATYPE_FLAG_CONTIGUOUS; pData->id = 0; @@ -53,32 +51,36 @@ static void opal_datatype_construct( opal_datatype_t* pData ) pData->opt_desc.length = 0; pData->opt_desc.used = 0; - for( i = 0; i < OPAL_DATATYPE_MAX_SUPPORTED; i++ ) - pData->btypes[i] = 0; + pData->ptypes = NULL; + pData->loops = 0; } static void opal_datatype_destruct( opal_datatype_t* datatype ) { + /** + * As the default description and the optimized description might point to the + * same data description we should start by cleaning the optimized description. + */ + if( NULL != datatype->opt_desc.desc ) { + if( datatype->opt_desc.desc != datatype->desc.desc ) + free( datatype->opt_desc.desc ); + datatype->opt_desc.length = 0; + datatype->opt_desc.used = 0; + datatype->opt_desc.desc = NULL; + } if (!opal_datatype_is_predefined(datatype)) { - if( datatype->desc.desc != NULL ) { + if( NULL != datatype->desc.desc ) { free( datatype->desc.desc ); datatype->desc.length = 0; datatype->desc.used = 0; + datatype->desc.desc = NULL; } } - if( datatype->opt_desc.desc != NULL ) { - if( datatype->opt_desc.desc != datatype->desc.desc ) - free( datatype->opt_desc.desc ); - datatype->opt_desc.length = 0; - datatype->opt_desc.used = 0; - datatype->opt_desc.desc = NULL; + /* dont free the ptypes of predefined types (it was not dynamically allocated) */ + if( (NULL != datatype->ptypes) && (datatype->id >= OPAL_DATATYPE_MAX_PREDEFINED) ) { + free(datatype->ptypes); + datatype->ptypes = NULL; } - /** - * As the default description and the optimized description can point to the - * same memory location we should keep the default location pointer until we - * know what we should do with the optimized description. - */ - datatype->desc.desc = NULL; /* make sure the name is set to empty */ datatype->name[0] = '\0'; diff --git a/opal/datatype/opal_datatype_dump.c b/opal/datatype/opal_datatype_dump.c index 30575674196..8ec86ee63a8 100644 --- a/opal/datatype/opal_datatype_dump.c +++ b/opal/datatype/opal_datatype_dump.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -42,8 +42,14 @@ int opal_datatype_contain_basic_datatypes( const opal_datatype_t* pData, char* p if( pData->flags & OPAL_DATATYPE_FLAG_USER_LB ) index += snprintf( ptr, length - index, "lb " ); if( pData->flags & OPAL_DATATYPE_FLAG_USER_UB ) index += snprintf( ptr + index, length - index, "ub " ); for( i = 0; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) { - if( pData->bdt_used & mask ) - index += snprintf( ptr + index, length - index, "%s ", opal_datatype_basicDatatypes[i]->name ); + if( pData->bdt_used & mask ) { + if( NULL == pData->ptypes ) { + index += snprintf( ptr + index, length - index, "%s:* ", opal_datatype_basicDatatypes[i]->name ); + } else { + index += snprintf( ptr + index, length - index, "%s:%lu ", opal_datatype_basicDatatypes[i]->name, + pData->ptypes[i]); + } + } mask <<= 1; if( length <= (size_t)index ) break; } @@ -115,7 +121,7 @@ void opal_datatype_dump( const opal_datatype_t* pData ) (void*)pData, pData->name, (long)pData->size, (int)pData->align, pData->id, (int)pData->desc.length, (int)pData->desc.used, (long)pData->true_lb, (long)pData->true_ub, (long)(pData->true_ub - pData->true_lb), (long)pData->lb, (long)pData->ub, (long)(pData->ub - pData->lb), - (int)pData->nbElems, (int)pData->btypes[OPAL_DATATYPE_LOOP], (int)pData->flags ); + (int)pData->nbElems, (int)pData->loops, (int)pData->flags ); /* dump the flags */ if( pData->flags == OPAL_DATATYPE_FLAG_PREDEFINED ) index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/opal/datatype/opal_datatype_fake_stack.c b/opal/datatype/opal_datatype_fake_stack.c index 4f72b343672..1f8f115cc3f 100644 --- a/opal/datatype/opal_datatype_fake_stack.c +++ b/opal/datatype/opal_datatype_fake_stack.c @@ -3,10 +3,10 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2017 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -32,21 +32,8 @@ #include "opal/datatype/opal_datatype_internal.h" -int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, - size_t starting_point, - const size_t* sizes ); - -static inline size_t -opal_convertor_compute_remote_size( const opal_datatype_t* pData, const size_t* sizes ) -{ - uint32_t i; - size_t length = 0; - - for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) { - length += (pData->btypes[i] * sizes[i]); - } - return length; -} +extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor, + size_t starting_point, const size_t* sizes ); int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, size_t starting_point, const size_t* sizes ) @@ -102,7 +89,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, } /* remove from the main loop all the complete datatypes */ - remote_size = opal_convertor_compute_remote_size( pData, sizes ); + remote_size = opal_convertor_compute_remote_size( pConvertor ); count = (int32_t)(starting_point / remote_size); resting_place -= (remote_size * count); pStack->count = pConvertor->count - count; @@ -112,7 +99,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].elem.disp; pos_desc = 0; - remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->btypes[OPAL_DATATYPE_LOOP] + 1)); + remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->loops + 1)); remoteLength[0] = 0; /* initial value set to ZERO */ loop_length = 0; diff --git a/opal/datatype/opal_datatype_get_count.c b/opal/datatype/opal_datatype_get_count.c index 7b539fbec81..9f1b0ecf8e5 100644 --- a/opal/datatype/opal_datatype_get_count.c +++ b/opal/datatype/opal_datatype_get_count.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -39,9 +39,9 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t /* Normally the size should be less or equal to the size of the datatype. * This function does not support a iSize bigger than the size of the datatype. */ - assert( (uint32_t)iSize <= datatype->size ); - DUMP( "dt_count_elements( %p, %d )\n", (void*)datatype, iSize ); - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) ); + assert( iSize <= datatype->size ); + DUMP( "dt_count_elements( %p, %ul )\n", (void*)datatype, (unsigned long)iSize ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); pStack->count = 1; pStack->index = -1; pStack->disp = 0; @@ -53,8 +53,10 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return nbElems; /* completed */ + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ } - pos_desc = pStack->index + 1; continue; } if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { @@ -93,9 +95,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t /** * Handle all complete multiple of the datatype. */ - for( pos_desc = 4; pos_desc < OPAL_DATATYPE_MAX_PREDEFINED; pos_desc++ ) { - local_length += datatype->btypes[pos_desc]; - } + local_length = datatype->nbElems; pos_desc = count / local_length; count = count % local_length; *length = datatype->size * pos_desc; @@ -104,7 +104,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t } DUMP( "dt_set_element_count( %p, %d )\n", (void*)datatype, count ); - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); pStack->count = 1; pStack->index = -1; pStack->disp = 0; @@ -116,8 +116,10 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return 0; + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ } - pos_desc = pStack->index + 1; continue; } if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { @@ -143,3 +145,56 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t } } +/** + * Compute the array of counts of the predefined datatypes contained in + * the datatype. We have no simple way to create this array, as we only + * sporadically need it (when we deal with heterogeneous environments or + * when we use get_element_count). Thus, we will pay the cost once per + * datatype, but we will only update this array if/when needed. + */ +int opal_datatype_compute_ptypes( opal_datatype_t* datatype ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + uint32_t pos_desc; /* actual position in the description of the derived datatype */ + ssize_t nbElems = 0, stack_pos = 0; + dt_elem_desc_t* pElems; + + if( NULL != datatype->ptypes ) return 0; + datatype->ptypes = (size_t*)calloc(OPAL_DATATYPE_MAX_SUPPORTED, sizeof(size_t)); + + DUMP( "opal_datatype_compute_ptypes( %p )\n", (void*)datatype ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); + pStack->count = 1; + pStack->index = -1; + pStack->disp = 0; + pElems = datatype->desc.desc; + pos_desc = 0; + + while( 1 ) { /* loop forever the exit condition is on the last OPAL_DATATYPE_END_LOOP */ + if( OPAL_DATATYPE_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */ + if( --(pStack->count) == 0 ) { /* end of loop */ + stack_pos--; pStack--; + if( stack_pos == -1 ) return 0; /* completed */ + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ + } + continue; + } + if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { + ddt_loop_desc_t* loop = &(pElems[pos_desc].loop); + do { + PUSH_STACK( pStack, stack_pos, pos_desc, OPAL_DATATYPE_LOOP, loop->loops, 0 ); + pos_desc++; + } while( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */ + DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" ); + } + while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + /* now here we have a basic datatype */ + datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count; + nbElems += pElems[pos_desc].elem.count; + + pos_desc++; /* advance to the next data */ + } + } +} diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index 5fdd2c59d96..754fee71497 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -153,8 +153,8 @@ typedef struct ddt_elem_id_description ddt_elem_id_description; */ struct ddt_elem_desc { ddt_elem_id_description common; /**< basic data description and flags */ - uint32_t count; /**< number of blocks */ uint32_t blocklen; /**< number of elements on each block */ + size_t count; /**< number of blocks */ OPAL_PTRDIFF_TYPE extent; /**< extent of each block (in bytes) */ OPAL_PTRDIFF_TYPE disp; /**< displacement of the first block */ }; @@ -170,8 +170,8 @@ typedef struct ddt_elem_desc ddt_elem_desc_t; */ struct ddt_loop_desc { ddt_elem_id_description common; /**< basic data description and flags */ - uint32_t loops; /**< number of elements */ uint32_t items; /**< number of items in the loop */ + uint32_t loops; /**< number of elements */ size_t unused; /**< not used right now */ OPAL_PTRDIFF_TYPE extent; /**< extent of the whole loop */ }; @@ -212,13 +212,20 @@ union dt_elem_desc { (_place)->end_loop.unused = -1; \ } while(0) + +/** + * Create one or more elements depending on the value of _count. If the value + * is too large for the type of elem.count then use oth the elem.count and + * elem.blocklen to create it. If the number is prime then create a second + * element to account for the difference. + */ #define CREATE_ELEM( _place, _type, _flags, _count, _disp, _extent ) \ do { \ (_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \ (_place)->elem.common.type = (_type); \ - (_place)->elem.count = (_count); \ (_place)->elem.disp = (_disp); \ (_place)->elem.extent = (_extent); \ + (_place)->elem.count = (_count); \ (_place)->elem.blocklen = 1; \ } while(0) /* @@ -236,8 +243,8 @@ struct opal_datatype_t; * OPAL_DATATYPE_INIT_BTYPES_ARRAY_[0-21], then order and naming would _not_ matter.... */ -#define OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE { 0 } -#define OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) { [OPAL_DATATYPE_ ## NAME] = 1 } +#define OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE NULL +#define OPAL_DATATYPE_INIT_PTYPES_ARRAY(NAME) (size_t[OPAL_DATATYPE_MAX_PREDEFINED]){ [OPAL_DATATYPE_ ## NAME] = 1, [OPAL_DATATYPE_MAX_PREDEFINED-1] = 0 } #define OPAL_DATATYPE_INIT_NAME(NAME) "OPAL_" #NAME @@ -266,7 +273,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INITIALIZER_UNAVAILABLE( FLAGS ) \ @@ -285,7 +292,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(EMPTY), \ .desc = OPAL_DATATYPE_INIT_DESC_NULL, \ .opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INIT_BASIC_TYPE( TYPE, NAME, FLAGS ) \ @@ -301,7 +308,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_NULL, \ .opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INIT_BASIC_DATATYPE( TYPE, ALIGN, NAME, FLAGS ) \ @@ -317,7 +324,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INITIALIZER_LOOP(FLAGS) OPAL_DATATYPE_INIT_BASIC_TYPE( OPAL_DATATYPE_LOOP, LOOP, FLAGS ) @@ -474,7 +481,10 @@ static inline int GET_FIRST_NON_LOOP( const union dt_elem_desc* _pElem ) #define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \ do { \ (ELEMENT) = &((DESCRIPTION)[(POSITION)]); \ - (COUNTER) = (ELEMENT)->elem.count; \ + if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \ + (COUNTER) = (ELEMENT)->loop.loops; \ + else \ + (COUNTER) = (ELEMENT)->elem.count; \ } while (0) OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length ); diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index 5b66e4df595..3440c565538 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -42,21 +42,22 @@ static int32_t opal_datatype_optimize_short( opal_datatype_t* pData, - int32_t count, - dt_type_desc_t* pTypeDesc ) + int32_t count, + dt_type_desc_t* pTypeDesc ) { dt_elem_desc_t* pElemDesc; ddt_elem_desc_t opt_elem; dt_stack_t* pOrigStack; dt_stack_t* pStack; /* pointer to the position on the stack */ int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ - int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0; + int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1; int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity; OPAL_PTRDIFF_TYPE total_disp = 0, last_extent = 1, last_disp = 0; uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ uint32_t i; + size_t last_length = 0; - pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) ); + pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) ); SAVE_STACK( pStack, -1, 0, count, 0 ); pTypeDesc->length = 2 * pData->desc.used + 1 /* for the fake OPAL_DATATYPE_END_LOOP at the end */; @@ -85,7 +86,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, pElemDesc++; nbElems++; if( --stack_pos >= 0 ) { /* still something to do ? */ ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop); - pStartLoop->items = (pElemDesc - 1)->elem.count; + pStartLoop->items = end_loop->items; total_disp = pStack->disp; /* update the displacement position */ } pStack--; /* go down one position on the stack */ @@ -98,8 +99,8 @@ opal_datatype_optimize_short( opal_datatype_t* pData, int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); OPAL_PTRDIFF_TYPE loop_disp = pData->desc.desc[pos_desc + index].elem.disp; - continuity = ((last_disp + last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) - == (total_disp + loop_disp)); + continuity = ((last_disp + (OPAL_PTRDIFF_TYPE)last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) + == (total_disp + loop_disp)); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { /* the loop is contiguous or composed by contiguous elements with a gap */ if( loop->extent == (OPAL_PTRDIFF_TYPE)end_loop->size ) { @@ -206,7 +207,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */ /* now here we have a basic datatype */ type = pData->desc.desc[pos_desc].elem.common.type; - continuity = ((last_disp + last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) + continuity = ((last_disp + (OPAL_PTRDIFF_TYPE)last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + pData->desc.desc[pos_desc].elem.disp)); if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity && diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index 195bca48f1e..86c974adbb7 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -500,6 +500,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + if( 0 == iov_len_local ) goto complete_loop; /* escape if we're done */ continue; } conv_ptr += rc * description[pos_desc].elem.extent; diff --git a/test/datatype/Makefile.am b/test/datatype/Makefile.am index 9c9aaa4a1a0..cd867134a4f 100644 --- a/test/datatype/Makefile.am +++ b/test/datatype/Makefile.am @@ -18,7 +18,7 @@ if PROJECT_OMPI MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw unpack_ooo ddt_pack external32 MPI_CHECKS = to_self endif -TESTS = opal_datatype_test $(MPI_TESTS) +TESTS = opal_datatype_test unpack_hetero $(MPI_TESTS) check_PROGRAMS = $(TESTS) $(MPI_CHECKS) @@ -79,5 +79,10 @@ external32_LDADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la +unpack_hetero_SOURCES = unpack_hetero.c +unpack_hetero_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) +unpack_hetero_LDADD = \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + distclean: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/datatype/position_noncontig.c b/test/datatype/position_noncontig.c index 12a15fa47a7..0fb94c224ab 100644 --- a/test/datatype/position_noncontig.c +++ b/test/datatype/position_noncontig.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. @@ -23,7 +23,7 @@ /** * The purpose of this test is to simulate the multi-network packing and * unpacking process. The pack operation will happens in-order while the - * will be done randomly. Therefore, before each unpack the correct + * unpack will be done randomly. Therefore, before each unpack the correct * position in the user buffer has to be set. */ diff --git a/test/datatype/unpack_hetero.c b/test/datatype/unpack_hetero.c new file mode 100644 index 00000000000..48c9c1c2746 --- /dev/null +++ b/test/datatype/unpack_hetero.c @@ -0,0 +1,99 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/runtime/opal.h" +#include "opal/datatype/opal_datatype.h" +#include "opal/datatype/opal_datatype_internal.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/datatype/opal_datatype_prototypes.h" +#include "opal/util/arch.h" +#include +#include +#ifdef HAVE_SYS_TIME_H +#include +#endif +#include +#include + +/* Compile with: +gcc -DHAVE_CONFIG_H -I. -I../../include -I../.. -I../../include -I../../../ompi-trunk/opal -I../../../ompi-trunk/orte -g opal_datatype_test.c -o opal_datatype_test +*/ + +uint32_t remote_arch = 0xffffffff; + +/** + * Main function. Call several tests and print-out the results. It try to stress the convertor + * using difficult data-type constructions as well as strange segment sizes for the conversion. + * Usually, it is able to detect most of the data-type and convertor problems. Any modifications + * on the data-type engine should first pass all the tests from this file, before going into other + * tests. + */ +int main( int argc, char* argv[] ) +{ + opal_datatype_init(); + + /** + * By default simulate homogeneous architectures. + */ + remote_arch = opal_local_arch ^ OPAL_ARCH_ISBIGENDIAN; + + opal_convertor_t * pConv; + int sbuf[2], rbuf[2]; + size_t max_data; + struct iovec a; + uint32_t iov_count; + + sbuf[0] = 0x01000000; sbuf[1] = 0x02000000; + + printf( "\n\n#\n * TEST UNPACKING 1 int out of 1\n#\n\n" ); + + pConv = opal_convertor_create( remote_arch, 0 ); + rbuf[0] = -1; rbuf[1] = -1; + if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 1, rbuf ) ) { + printf( "Cannot attach the datatype to a convertor\n" ); + return OPAL_ERROR; + } + + a.iov_base = sbuf; + a.iov_len = 4; + iov_count = 1; + max_data = 4; + opal_unpack_general( pConv, &a, &iov_count, &max_data ); + + assert(1 == rbuf[0]); + assert(-1 == rbuf[1]); + OBJ_RELEASE(pConv); + + printf( "\n\n#\n * TEST UNPACKING 1 int out of 2\n#\n\n" ); + pConv = opal_convertor_create( remote_arch, 0 ); + rbuf[0] = -1; rbuf[1] = -1; + if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 2, rbuf ) ) { + printf( "Cannot attach the datatype to a convertor\n" ); + return OPAL_ERROR; + } + + + a.iov_base = sbuf; + a.iov_len = 4; + iov_count = 1; + max_data = 4; + opal_unpack_general( pConv, &a, &iov_count, &max_data ); + + assert(1 == rbuf[0]); + assert(-1 == rbuf[1]); + OBJ_RELEASE(pConv); + + /* clean-ups all data allocations */ + opal_datatype_finalize(); + opal_finalize(); + return OPAL_SUCCESS; +}