Skip to content

Commit 6e0ce95

Browse files
committed
Rework the datatype commit.
Optimize contiguous loops by collapsing them into a single element. During datatype optimization collapse similar elements into larger blocks. Signed-off-by: George Bosilca <[email protected]>
1 parent ecf7d9a commit 6e0ce95

File tree

2 files changed

+49
-23
lines changed

2 files changed

+49
-23
lines changed

opal/datatype/opal_datatype_internal.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,8 @@ union dt_elem_desc {
217217

218218

219219
/**
220-
* Create one or more elements depending on the value of _count. If the value
221-
* is too large for the type of elem.count then use oth the elem.count and
222-
* elem.blocklen to create it. If the number is prime then create a second
223-
* element to account for the difference.
220+
* Create an element entry in the description. If the element is contiguous
221+
* collapse everything into the blocklen.
224222
*/
225223
#define CREATE_ELEM(_place, _type, _flags, _blocklen, _count, _disp, _extent) \
226224
do { \
@@ -230,6 +228,12 @@ union dt_elem_desc {
230228
(_place)->elem.count = (_count); \
231229
(_place)->elem.extent = (_extent); \
232230
(_place)->elem.disp = (_disp); \
231+
if( _extent == (ptrdiff_t)(_blocklen * opal_datatype_basicDatatypes[_type]->size) ) { \
232+
/* collapse it into a single large blocklen */ \
233+
(_place)->elem.blocklen *= _count; \
234+
(_place)->elem.extent *= _count; \
235+
(_place)->elem.count = 1; \
236+
} \
233237
} while(0)
234238
/*
235239
* This array holds the descriptions desc.desc[2] of the predefined basic datatypes.

opal/datatype/opal_datatype_optimize.c

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -60,27 +60,27 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
6060
CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC,
6161
last.blocklen, last.count, last.disp, last.extent );
6262
pElemDesc++; nbElems++;
63-
last.disp += last.count;
6463
last.count= 0;
6564
}
6665
CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */
6766
end_loop->first_elem_disp, end_loop->size, end_loop->common.flags );
68-
pElemDesc++; nbElems++;
6967
if( --stack_pos >= 0 ) { /* still something to do ? */
7068
ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop);
71-
pStartLoop->items = end_loop->items;
69+
pStartLoop->items = pElemDesc->end_loop.items;
7270
total_disp = pStack->disp; /* update the displacement position */
7371
}
72+
pElemDesc++; nbElems++;
7473
pStack--; /* go down one position on the stack */
7574
pos_desc++;
7675
continue;
7776
}
7877
if( OPAL_DATATYPE_LOOP == pData->desc.desc[pos_desc].elem.common.type ) {
7978
ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
80-
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
8179
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
8280

8381
if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
82+
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
83+
8484
assert(pData->desc.desc[pos_desc + index].elem.disp == end_loop->first_elem_disp);
8585
compress.common.flags = loop->common.flags;
8686
compress.common.type = pData->desc.desc[pos_desc + index].elem.common.type;
@@ -99,7 +99,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
9999
compress.count = loop->loops;
100100
compress.extent = loop->extent;
101101
compress.disp = end_loop->first_elem_disp;
102-
102+
if( compress.extent == (ptrdiff_t)(compress.blocklen * opal_datatype_basicDatatypes[compress.common.type]->size) ) {
103+
/* The compressed element is contiguous: collapse it into a single large blocklen */
104+
compress.blocklen *= compress.count;
105+
compress.extent *= compress.count;
106+
compress.count = 1;
107+
}
103108
/**
104109
* The current loop has been compressed and can now be treated as if it
105110
* was a data element. We can now look if it can be fused with last,
@@ -161,26 +166,43 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
161166
}
162167

163168
/* are the two elements compatible: aka they have very similar values and they
164-
* can be merged together by increasing the count. This optimizes the memory
165-
* required for storing the datatype description.
169+
* can be merged together by increasing the count, and/or changing the extent.
166170
*/
167-
if( ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ==
168-
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) &&
169-
(current->disp == (last.disp + (ptrdiff_t)last.count * last.extent)) &&
170-
((current->count == 1) || (last.extent == current->extent)) ) {
171-
last.count += current->count;
172-
/* find the lowest common denomitaor type */
171+
if( (last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ==
172+
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size) ) {
173+
ddt_elem_desc_t save = last; /* safekeep the type and blocklen */
173174
if( last.common.type != current->common.type ) {
174175
last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size;
175176
last.common.type = OPAL_DATATYPE_UINT1;
176177
}
177-
/* maximize the contiguous pieces */
178-
if( last.extent == (ptrdiff_t)(last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ) {
179-
last.blocklen *= last.count;
180-
last.count = 1;
181-
last.extent = last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size;
178+
179+
if( 1 == last.count ) {
180+
/* we can ignore the extent of the element with count == 1 and merge them together if their displacements match */
181+
if( 1 == current->count ) {
182+
last.extent = current->disp - last.disp;
183+
last.count++;
184+
continue;
185+
}
186+
/* can we compute a matching displacement ? */
187+
if( (last.disp + current->extent) == current->disp ) {
188+
last.extent = current->extent;
189+
last.count = current->count + 1;
190+
continue;
191+
}
182192
}
183-
continue; /* next data */
193+
if( (last.extent * (ptrdiff_t)last.count + last.disp) == current->disp ) {
194+
if( 1 == current->count ) {
195+
last.count++;
196+
continue;
197+
}
198+
if( last.extent == current->extent ) {
199+
last.count += current->count;
200+
continue;
201+
}
202+
}
203+
last.blocklen = save.blocklen;
204+
last.common.type = save.common.type;
205+
/* try other optimizations */
184206
}
185207
/* are the elements fusionable such that we can fusion the last blocklen of one with the first
186208
* blocklen of the other.

0 commit comments

Comments
 (0)