Skip to content

Commit 4f754d0

Browse files
committed
Optimized datatype description.
Move toward a base type of vector (count, type, blocklen, extent, disp) with disp and extent applying toward the count repertition and blocklen being a contiguous memory of type type. Implement 2 optimizations on this description used during type_commit: - collapse: successive similar datatype descriptions are collapsed together with an increased count. - fusion: fuse successive datatype descriptions in order to minimize the number of resulting memcpy during pack/unpack. Fixes at the OMPI datatype level including: - Fix the create_hindexed and vector creation. - Fix the handling of [get|set]_elements and _count. - Correctly compute the dispacement for block indexed types. - Support the MPI_LB and MPI_UB deprecation, aka. OMPI_ENABLE_MPI1_COMPAT. Signed-off-by: George Bosilca <[email protected]>
1 parent f68b06e commit 4f754d0

15 files changed

+548
-369
lines changed

ompi/datatype/ompi_datatype.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2009-2013 The University of Tennessee and The University
3+
* Copyright (c) 2009-2019 The University of Tennessee and The University
44
* of Tennessee Research Foundation. All rights
55
* reserved.
66
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.

ompi/datatype/ompi_datatype_create_indexed.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,10 @@ int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const
8787
return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType);
8888
}
8989

90+
ompi_datatype_type_extent( oldType, &extent );
9091
disp = pDisp[i];
9192
dLength = pBlockLength[i];
9293
endat = disp + dLength * extent;
93-
ompi_datatype_type_extent( oldType, &extent );
9494

9595
pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) );
9696
for( i += 1; i < count; i++ ) {
@@ -162,17 +162,17 @@ int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdi
162162
pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) );
163163
disp = pDisp[0];
164164
dLength = bLength;
165-
endat = disp + dLength;
165+
endat = disp + dLength * extent;
166166
for( i = 1; i < count; i++ ) {
167167
if( endat == pDisp[i] ) {
168168
/* contiguous with the previsious */
169169
dLength += bLength;
170-
endat += bLength;
170+
endat += bLength * extent;
171171
} else {
172172
ompi_datatype_add( pdt, oldType, dLength, disp, extent );
173173
disp = pDisp[i];
174174
dLength = bLength;
175-
endat = disp + bLength;
175+
endat = disp + bLength * extent;
176176
}
177177
}
178178
ompi_datatype_add( pdt, oldType, dLength, disp, extent );

ompi/datatype/ompi_datatype_external.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2016 The University of Tennessee and The University
6+
* Copyright (c) 2004-2019 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@@ -26,7 +26,6 @@
2626
#include <stdio.h>
2727

2828
#include "ompi/runtime/params.h"
29-
#include "ompi/communicator/communicator.h"
3029
#include "ompi/datatype/ompi_datatype.h"
3130
#include "opal/datatype/opal_convertor.h"
3231

opal/datatype/opal_convertor.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2017 The University of Tennessee and The University
6+
* Copyright (c) 2004-2019 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@@ -324,8 +324,9 @@ int32_t opal_convertor_unpack( opal_convertor_t* pConv,
324324
return pConv->fAdvance( pConv, iov, out_size, max_data );
325325
}
326326

327-
static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
328-
size_t starting_point, const size_t* sizes )
327+
static inline int
328+
opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
329+
size_t starting_point, const size_t* sizes )
329330
{
330331
dt_stack_t* pStack; /* pointer to the position on the stack */
331332
const opal_datatype_t* pData = pConvertor->pDesc;
@@ -349,7 +350,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
349350
pStack[0].disp = count * extent;
350351

351352
/* now compute the number of pending bytes */
352-
count = starting_point - count * pData->size;
353+
count = starting_point % pData->size;
353354
/**
354355
* We save the current displacement starting from the begining
355356
* of this data.
@@ -370,9 +371,9 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
370371
return OPAL_SUCCESS;
371372
}
372373

373-
static inline
374-
int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
375-
const size_t* sizes )
374+
static inline int
375+
opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
376+
const size_t* sizes )
376377
{
377378
dt_stack_t* pStack = convertor->pStack;
378379
dt_elem_desc_t* pElems;
@@ -402,7 +403,7 @@ int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
402403
pStack[1].count = pElems[0].loop.loops;
403404
pStack[1].type = OPAL_DATATYPE_LOOP;
404405
} else {
405-
pStack[1].count = pElems[0].elem.count;
406+
pStack[1].count = pElems[0].elem.count * pElems[0].elem.blocklen;
406407
pStack[1].type = pElems[0].elem.common.type;
407408
}
408409
return OPAL_SUCCESS;

opal/datatype/opal_datatype.h

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,41 @@ opal_datatype_is_contiguous_memory_layout( const opal_datatype_t* datatype, int3
224224
}
225225

226226

227-
OPAL_DECLSPEC void opal_datatype_dump( const opal_datatype_t* pData );
227+
OPAL_DECLSPEC void
228+
opal_datatype_dump( const opal_datatype_t* pData );
229+
228230
/* data creation functions */
229-
OPAL_DECLSPEC int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * dest_type );
230-
OPAL_DECLSPEC int32_t opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType, opal_datatype_t** newType );
231-
OPAL_DECLSPEC int32_t opal_datatype_resize( opal_datatype_t* type, ptrdiff_t lb, ptrdiff_t extent );
232-
OPAL_DECLSPEC int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd, size_t count,
233-
ptrdiff_t disp, ptrdiff_t extent );
231+
232+
/**
233+
* Create a duplicate of the source datatype.
234+
*/
235+
OPAL_DECLSPEC int32_t
236+
opal_datatype_clone( const opal_datatype_t* src_type,
237+
opal_datatype_t* dest_type );
238+
/**
239+
* A contiguous array of identical datatypes.
240+
*/
241+
OPAL_DECLSPEC int32_t
242+
opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType,
243+
opal_datatype_t** newType );
244+
/**
245+
* Add a new datatype to the base type description. The count is the number
246+
* repetitions of the same element to be added, and the extent is the extent
247+
* of each element. The displacement is the initial displacement of the
248+
* first element.
249+
*/
250+
OPAL_DECLSPEC int32_t
251+
opal_datatype_add( opal_datatype_t* pdtBase,
252+
const opal_datatype_t* pdtAdd, size_t count,
253+
ptrdiff_t disp, ptrdiff_t extent );
254+
255+
/**
256+
* Alter the lb and extent of an existing datatype in place.
257+
*/
258+
OPAL_DECLSPEC int32_t
259+
opal_datatype_resize( opal_datatype_t* type,
260+
ptrdiff_t lb,
261+
ptrdiff_t extent );
234262

235263
static inline int32_t
236264
opal_datatype_type_lb( const opal_datatype_t* pData, ptrdiff_t* disp )

opal/datatype/opal_datatype_add.c

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2017 The University of Tennessee and The University
6+
* Copyright (c) 2004-2019 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@@ -281,15 +281,23 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
281281
if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) {
282282
if( NULL != pdtBase->ptypes )
283283
pdtBase->ptypes[pdtAdd->id] += count;
284+
285+
pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED);
284286
pLast->elem.common.type = pdtAdd->id;
285-
pLast->elem.count = count;
286287
pLast->elem.disp = disp;
287-
pLast->elem.extent = extent;
288-
pdtBase->desc.used++;
289-
pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED);
290-
if( (extent != (ptrdiff_t)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */
291-
pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS);
288+
pLast->elem.extent = count * extent;
289+
/* assume predefined datatypes without extent, aka. contiguous */
290+
pLast->elem.count = 1;
291+
pLast->elem.blocklen = count;
292+
if( extent != (ptrdiff_t)pdtAdd->size ) { /* not contiguous: let's fix */
293+
pLast->elem.count = count;
294+
pLast->elem.blocklen = 1;
295+
pLast->elem.extent = extent;
296+
if( count > 1 ) { /* gaps around the predefined datatype */
297+
pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS);
298+
}
292299
}
300+
pdtBase->desc.used++;
293301
} else {
294302
/* keep trace of the total number of basic datatypes in the datatype definition */
295303
pdtBase->loops += pdtAdd->loops;
@@ -299,13 +307,40 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
299307
for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
300308
if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]);
301309
}
302-
if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) &&
303-
(extent == pdtAdd->desc.desc[0].elem.extent) ){
310+
if( 1 == pdtAdd->desc.used ) {
304311
pLast->elem = pdtAdd->desc.desc[0].elem;
305-
pLast->elem.count *= count;
306312
pLast->elem.disp += disp;
313+
if( 1 == count ) {
314+
/* Extent only has a meaning when there are multiple elements. Bail out */
315+
} else if( 1 == pLast->elem.count ) {
316+
/* The size and true_extent of the added datatype are identical, signaling a datatype
317+
* that is mostly contiguous with the exception of the initial and final gaps. These
318+
* gaps do not matter here as they will amended (the initial gaps being shifted by the
319+
* new displacement and the final gap being replaced with the new gap
320+
*/
321+
if( pdtAdd->desc.desc[0].elem.extent == extent ) {
322+
/* pure bliss everything is fully contiguous and we can collapse
323+
* everything by updating the blocklen and extent
324+
*/
325+
pLast->elem.blocklen *= count;
326+
pLast->elem.extent *= count;
327+
} else {
328+
pLast->elem.count = count;
329+
pLast->elem.extent = extent;
330+
}
331+
} else if( extent == (ptrdiff_t)(pLast->elem.count * pLast->elem.extent) ) {
332+
/* It's just a repetition of the same element, increase the count */
333+
pLast->elem.count *= count;
334+
} else {
335+
/* No luck here, no optimization can be applied. Fall back to the
336+
* normal case where we add a loop around the datatype.
337+
*/
338+
goto build_loop;
339+
}
307340
pdtBase->desc.used++;
308341
} else {
342+
343+
build_loop:
309344
/* if the extent of the datatype is the same as the extent of the loop
310345
* description of the datatype then we simply have to update the main loop.
311346
*/

opal/datatype/opal_datatype_copy.h

Lines changed: 28 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -48,37 +48,37 @@ static inline void _predefined_data( const dt_elem_desc_t* ELEM,
4848
unsigned char* DESTINATION,
4949
size_t* SPACE )
5050
{
51-
size_t _copy_count = (COUNT);
52-
size_t _copy_blength;
5351
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
5452
unsigned char* _source = (SOURCE) + _elem->disp;
5553
unsigned char* _destination = (DESTINATION) + _elem->disp;
54+
size_t total_count = _elem->count * _elem->blocklen;
55+
size_t do_now, do_now_bytes;
5656

57-
_copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size;
57+
assert( (COUNT) == total_count);
58+
assert( total_count <= ((*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size) );
5859

59-
if( _copy_blength == (size_t)_elem->extent ) {
60-
_copy_blength *= _copy_count;
61-
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE),
62-
(DATATYPE), (TOTAL_COUNT) );
63-
/* the extent and the size of the basic datatype are equals */
64-
DO_DEBUG( opal_output( 0, "copy 1. %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n",
65-
STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, _copy_blength, *(SPACE) ); );
66-
MEM_OP( _destination, _source, _copy_blength );
67-
_source += _copy_blength;
68-
_destination += _copy_blength;
69-
} else {
70-
for(size_t _i = 0; _i < _copy_count; _i++ ) {
71-
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE),
72-
(DATATYPE), (TOTAL_COUNT) );
73-
DO_DEBUG( opal_output( 0, "copy 2. %s( %p, %p, %lu ) => space %lu\n",
74-
STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); );
75-
MEM_OP( _destination, _source, _copy_blength );
76-
_source += _elem->extent;
60+
/* We don't a prologue and epilogue here as we are __always__ working
61+
* with full copies of the data description.
62+
*/
63+
64+
/**
65+
* Compute how many full blocklen we need to do and do them.
66+
*/
67+
do_now = _elem->count;
68+
if( 0 != do_now ) {
69+
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
70+
for(size_t _i = 0; _i < do_now; _i++ ) {
71+
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE),
72+
(DATATYPE), (TOTAL_COUNT) );
73+
DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n",
74+
STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) ); );
75+
MEM_OP( _destination, _source, do_now_bytes );
7776
_destination += _elem->extent;
77+
_source += _elem->extent;
78+
*(SPACE) -= do_now_bytes;
7879
}
79-
_copy_blength *= _copy_count;
80+
(COUNT) -= total_count;
8081
}
81-
*(SPACE) -= _copy_blength;
8282
}
8383

8484
static inline void _contiguous_loop( const dt_elem_desc_t* ELEM,
@@ -147,12 +147,10 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i
147147
if( (ptrdiff_t)datatype->size == extent ) { /* all contiguous == no gaps around */
148148
size_t total_length = iov_len_local;
149149
size_t memop_chunk = opal_datatype_memop_block_size;
150+
OPAL_DATATYPE_SAFEGUARD_POINTER( source, iov_len_local,
151+
(unsigned char*)source_base, datatype, count );
150152
while( total_length > 0 ) {
151153
if( memop_chunk > total_length ) memop_chunk = total_length;
152-
OPAL_DATATYPE_SAFEGUARD_POINTER( destination, memop_chunk,
153-
(unsigned char*)destination_base, datatype, count );
154-
OPAL_DATATYPE_SAFEGUARD_POINTER( source, memop_chunk,
155-
(unsigned char*)source_base, datatype, count );
156154
DO_DEBUG( opal_output( 0, "copy c1. %s( %p, %p, %lu ) => space %lu\n",
157155
STRINGIFY(MEM_OP_NAME), (void*)destination, (void*)source, (unsigned long)memop_chunk, (unsigned long)total_length ); );
158156
MEM_OP( destination, source, memop_chunk );
@@ -184,17 +182,12 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i
184182
pos_desc = 0;
185183
stack_pos = 0;
186184

187-
if( datatype->opt_desc.desc != NULL ) {
188-
description = datatype->opt_desc.desc;
189-
} else {
185+
description = datatype->opt_desc.desc;
186+
if( NULL == description ) {
190187
description = datatype->desc.desc;
191188
}
192189

193-
if( description[0].elem.common.type == OPAL_DATATYPE_LOOP )
194-
count_desc = description[0].loop.loops;
195-
else
196-
count_desc = description[0].elem.count;
197-
pElem = &(description[pos_desc]);
190+
UPDATE_INTERNAL_COUNTERS( description, 0, pElem, count_desc );
198191

199192
while( 1 ) {
200193
while( OPAL_LIKELY(pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) ) {

opal/datatype/opal_datatype_get_count.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,14 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t
6969
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
7070
/* now here we have a basic datatype */
7171
const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
72-
local_size = pElems[pos_desc].elem.count * basic_type->size;
72+
local_size = (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen) * basic_type->size;
7373
if( local_size >= iSize ) {
7474
local_size = iSize / basic_type->size;
7575
nbElems += (int32_t)local_size;
7676
iSize -= local_size * basic_type->size;
7777
return (iSize == 0 ? nbElems : -1);
7878
}
79-
nbElems += pElems[pos_desc].elem.count;
79+
nbElems += (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen);
8080
iSize -= local_size;
8181
pos_desc++; /* advance to the next data */
8282
}
@@ -131,7 +131,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
131131
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
132132
/* now here we have a basic datatype */
133133
const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
134-
local_length = pElems[pos_desc].elem.count;
134+
local_length = (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen);
135135
if( local_length >= count ) {
136136
*length += count * basic_type->size;
137137
return 0;
@@ -188,8 +188,8 @@ int opal_datatype_compute_ptypes( opal_datatype_t* datatype )
188188
}
189189
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
190190
/* now here we have a basic datatype */
191-
datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count;
192-
nbElems += pElems[pos_desc].elem.count;
191+
datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen;
192+
nbElems += pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen;
193193

194194
DUMP( " compute_ptypes-add: type %d count %"PRIsize_t" (total type %"PRIsize_t" total %lld)\n",
195195
pElems[pos_desc].elem.common.type, datatype->ptypes[pElems[pos_desc].elem.common.type],

0 commit comments

Comments
 (0)