Skip to content

Commit cbf03b3

Browse files
authored
Topic/datatype (#3441)
* Don't overflow the internal datatype count. Change the type of the count to be a size_t (it does not alter the total size of the internal structures, so has no impact on the ABI). Signed-off-by: George Bosilca <[email protected]> * Optimize the datatype creation. The internal array of counts of predefined types is now only created when needed, which is either in a heterogeneous environment, or when one call get_elements. It saves space and makes the convertor creation a little faster in some cases. Rearrange the fields in the datatype description structs. The macro OPAL_DATATYPE_INIT_PTYPES_ARRAY had a bug, and the static array was only partially created. All predefined types should have the ptypes array created and initialized. Signed-off-by: George Bosilca <[email protected]> * Fix the boundary computation. Signed-off-by: George Bosilca <[email protected]> * test/datatype: add test for short unpack on heteregeneous cluster Signed-off-by: Gilles Gouaillardet <[email protected]> Signed-off-by: George Bosilca <[email protected]> * Trying to reduce the cost of creating a convertor. Signed-off-by: George Bosilca <[email protected]> * Respect the unpack boundaries. As Gilles suggested on #2535 the opal_unpack_general_function was unpacking based on the requested count and not on the amount of packed data provided. Fixes #2535. Signed-off-by: George Bosilca <[email protected]>
1 parent a66909b commit cbf03b3

20 files changed

+373
-166
lines changed

ompi/datatype/ompi_datatype_get_elements.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2013 The University of Tennessee and The University
6+
* Copyright (c) 2004-2017 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@@ -25,6 +25,7 @@
2525

2626
#include "ompi/runtime/params.h"
2727
#include "ompi/datatype/ompi_datatype.h"
28+
#include "opal/datatype/opal_datatype_internal.h"
2829

2930
int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t *count)
3031
{
@@ -48,9 +49,10 @@ int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t
4849
there are no leftover bytes */
4950
if (!ompi_datatype_is_predefined(datatype)) {
5051
if (0 != internal_count) {
52+
opal_datatype_compute_ptypes(&datatype->super);
5153
/* count the basic elements in the datatype */
52-
for (i = 4, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) {
53-
total += datatype->super.btypes[i];
54+
for (i = OPAL_DATATYPE_FIRST_TYPE, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) {
55+
total += datatype->super.ptypes[i];
5456
}
5557
internal_count = total * internal_count;
5658
}

ompi/datatype/ompi_datatype_internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2009-2013 The University of Tennessee and The University
3+
* Copyright (c) 2009-2017 The University of Tennessee and The University
44
* of Tennessee Research Foundation. All rights
55
* reserved.
66
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
@@ -467,7 +467,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
467467
.name = OPAL_DATATYPE_INIT_NAME(TYPE ## SIZE), \
468468
.desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \
469469
.opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \
470-
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(TYPE ## SIZE) \
470+
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY(TYPE ## SIZE) \
471471
}
472472

473473
#define OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE_FORTRAN( TYPE, NAME, SIZE, ALIGN, FLAGS ) \

ompi/datatype/ompi_datatype_module.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -384,29 +384,30 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}};
384384
(PDST)->super.opt_desc = (PSRC)->super.opt_desc; \
385385
(PDST)->packed_description = (PSRC)->packed_description; \
386386
(PSRC)->packed_description = NULL; \
387-
memcpy( (PDST)->super.btypes, (PSRC)->super.btypes, \
388-
OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t) ); \
387+
/* transfer the ptypes */ \
388+
(PDST)->super.ptypes = (PSRC)->super.ptypes; \
389+
(PSRC)->super.ptypes = NULL; \
389390
} while(0)
390391

391392
#define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2, FLAGS) \
392393
do { \
393394
struct { type1 v1; type2 v2; } s[2]; \
394395
ompi_datatype_t *types[2], *ptype; \
395396
int bLength[2] = {1, 1}; \
396-
ptrdiff_t base, displ[2]; \
397+
ptrdiff_t base, displ[2]; \
397398
\
398399
types[0] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType1]; \
399400
types[1] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType2]; \
400-
base = (ptrdiff_t)(&(s[0])); \
401-
displ[0] = (ptrdiff_t)(&(s[0].v1)); \
401+
base = (ptrdiff_t)(&(s[0])); \
402+
displ[0] = (ptrdiff_t)(&(s[0].v1)); \
402403
displ[0] -= base; \
403-
displ[1] = (ptrdiff_t)(&(s[0].v2)); \
404+
displ[1] = (ptrdiff_t)(&(s[0].v2)); \
404405
displ[1] -= base; \
405406
\
406407
ompi_datatype_create_struct( 2, bLength, displ, types, &ptype ); \
407-
displ[0] = (ptrdiff_t)(&(s[1])); \
408+
displ[0] = (ptrdiff_t)(&(s[1])); \
408409
displ[0] -= base; \
409-
if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \
410+
if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \
410411
ptype->super.ub = displ[0]; /* force a new extent for the datatype */ \
411412
ptype->super.flags |= (FLAGS); \
412413
ptype->id = MPIDDT; \
@@ -736,7 +737,7 @@ void ompi_datatype_dump( const ompi_datatype_t* pData )
736737
(long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used,
737738
(long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb),
738739
(long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb),
739-
(int)pData->super.nbElems, (int)pData->super.btypes[OPAL_DATATYPE_LOOP], (int)pData->super.flags );
740+
(int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags );
740741
/* dump the flags */
741742
if( ompi_datatype_is_predefined(pData) ) {
742743
index += snprintf( buffer + index, length - index, "predefined " );

ompi/include/ompi/memchecker.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ static inline int memchecker_datatype(MPI_Datatype type)
366366
opal_memchecker_base_isdefined (&type->super.opt_desc.length, sizeof(opal_datatype_count_t));
367367
opal_memchecker_base_isdefined (&type->super.opt_desc.used, sizeof(opal_datatype_count_t));
368368
opal_memchecker_base_isdefined (&type->super.opt_desc.desc, sizeof(dt_elem_desc_t *));
369-
opal_memchecker_base_isdefined (&type->super.btypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t));
369+
if( NULL != type->super.ptypes )
370+
opal_memchecker_base_isdefined (&type->super.ptypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(size_t));
370371

371372
opal_memchecker_base_isdefined (&type->id, sizeof(int32_t));
372373
opal_memchecker_base_isdefined (&type->d_f_to_c_index, sizeof(int32_t));

opal/datatype/opal_convertor.c

Lines changed: 51 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2016 The University of Tennessee and The University
6+
* Copyright (c) 2004-2017 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@@ -43,9 +43,6 @@
4343
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
4444
#endif
4545

46-
extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor,
47-
int starting_point, const int* sizes );
48-
4946
static void opal_convertor_construct( opal_convertor_t* convertor )
5047
{
5148
convertor->pStack = convertor->static_stack;
@@ -226,7 +223,7 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv,
226223
if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) {
227224
/**
228225
* We are doing conversion on a contiguous datatype on a homogeneous
229-
* environment. The convertor contain minimal informations, we only
226+
* environment. The convertor contain minimal information, we only
230227
* use the bConverted to manage the conversion.
231228
*/
232229
uint32_t i;
@@ -447,31 +444,49 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
447444
return rc;
448445
}
449446

447+
static size_t
448+
opal_datatype_compute_remote_size( const opal_datatype_t* pData,
449+
const size_t* sizes )
450+
{
451+
uint32_t typeMask = pData->bdt_used;
452+
size_t length = 0;
453+
454+
if( OPAL_UNLIKELY(NULL == pData->ptypes) ) {
455+
/* Allocate and fill the array of types used in the datatype description */
456+
opal_datatype_compute_ptypes( (opal_datatype_t*)pData );
457+
}
458+
459+
for( int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) {
460+
if( typeMask & ((uint32_t)1 << i) ) {
461+
length += (pData->ptypes[i] * sizes[i]);
462+
typeMask ^= ((uint32_t)1 << i);
463+
}
464+
}
465+
return length;
466+
}
450467

451468
/**
452469
* Compute the remote size. If necessary remove the homogeneous flag
453470
* and redirect the convertor description toward the non-optimized
454471
* datatype representation.
455472
*/
456-
#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \
457-
{ \
458-
if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \
459-
opal_convertor_master_t* master; \
460-
int i; \
461-
uint32_t mask = datatype->bdt_used; \
462-
convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \
463-
master = convertor->master; \
464-
convertor->remote_size = 0; \
465-
for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \
466-
if( mask & ((uint32_t)1 << i) ) { \
467-
convertor->remote_size += (datatype->btypes[i] * \
468-
master->remote_sizes[i]); \
469-
mask ^= ((uint32_t)1 << i); \
470-
} \
471-
} \
472-
convertor->remote_size *= convertor->count; \
473-
convertor->use_desc = &(datatype->desc); \
474-
} \
473+
size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
474+
{
475+
opal_datatype_t* datatype = (opal_datatype_t*)pConvertor->pDesc;
476+
477+
pConvertor->remote_size = pConvertor->local_size;
478+
if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
479+
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
480+
pConvertor->use_desc = &(datatype->desc);
481+
if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {
482+
/* This is for a single datatype, we must update it with the count */
483+
pConvertor->remote_size = opal_datatype_compute_remote_size(datatype,
484+
pConvertor->master->remote_sizes);
485+
pConvertor->remote_size *= pConvertor->count;
486+
}
487+
}
488+
pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE;
489+
return pConvertor->remote_size;
475490
}
476491

477492
/**
@@ -483,29 +498,26 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
483498
*/
484499
#define OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \
485500
{ \
486-
uint32_t bdt_mask; \
487-
\
501+
convertor->local_size = count * datatype->size; \
502+
convertor->pBaseBuf = (unsigned char*)pUserBuf; \
503+
convertor->count = count; \
504+
convertor->pDesc = (opal_datatype_t*)datatype; \
505+
convertor->bConverted = 0; \
506+
convertor->use_desc = &(datatype->opt_desc); \
488507
/* If the data is empty we just mark the convertor as \
489508
* completed. With this flag set the pack and unpack functions \
490509
* will not do anything. \
491510
*/ \
492511
if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \
493-
convertor->flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED; \
512+
convertor->flags |= (OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE); \
494513
convertor->local_size = convertor->remote_size = 0; \
495514
return OPAL_SUCCESS; \
496515
} \
497-
/* Compute the local in advance */ \
498-
convertor->local_size = count * datatype->size; \
499-
convertor->pBaseBuf = (unsigned char*)pUserBuf; \
500-
convertor->count = count; \
501516
\
502517
/* Grab the datatype part of the flags */ \
503518
convertor->flags &= CONVERTOR_TYPE_MASK; \
504519
convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \
505520
convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \
506-
convertor->pDesc = (opal_datatype_t*)datatype; \
507-
convertor->bConverted = 0; \
508-
convertor->use_desc = &(datatype->opt_desc); \
509521
\
510522
convertor->remote_size = convertor->local_size; \
511523
if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \
@@ -516,9 +528,8 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
516528
} \
517529
} \
518530
\
519-
bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \
520-
OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype, \
521-
bdt_mask ); \
531+
assert( (convertor)->pDesc == (datatype) ); \
532+
opal_convertor_compute_remote_size( convertor ); \
522533
assert( NULL != convertor->use_desc->desc ); \
523534
/* For predefined datatypes (contiguous) do nothing more */ \
524535
/* if checksum is enabled then always continue */ \
@@ -530,7 +541,7 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
530541
} \
531542
convertor->flags &= ~CONVERTOR_NO_OP; \
532543
{ \
533-
uint32_t required_stack_length = datatype->btypes[OPAL_DATATYPE_LOOP] + 1; \
544+
uint32_t required_stack_length = datatype->loops + 1; \
534545
\
535546
if( required_stack_length > convertor->stack_size ) { \
536547
assert(convertor->pStack == convertor->static_stack); \
@@ -714,8 +725,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos,
714725
opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index,
715726
(int)pStack[stack_pos].count, (long)pStack[stack_pos].disp );
716727
if( pStack->index != -1 )
717-
opal_output( 0, "\t[desc count %d disp %ld extent %ld]\n",
718-
pDesc[pStack[stack_pos].index].elem.count,
728+
opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n",
729+
(unsigned long)pDesc[pStack[stack_pos].index].elem.count,
719730
(long)pDesc[pStack[stack_pos].index].elem.disp,
720731
(long)pDesc[pStack[stack_pos].index].elem.extent );
721732
else

0 commit comments

Comments
 (0)