@@ -60,27 +60,27 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
60
60
CREATE_ELEM ( pElemDesc , last .common .type , OPAL_DATATYPE_FLAG_BASIC ,
61
61
last .blocklen , last .count , last .disp , last .extent );
62
62
pElemDesc ++ ; nbElems ++ ;
63
- last .disp += last .count ;
64
63
last .count = 0 ;
65
64
}
66
65
CREATE_LOOP_END ( pElemDesc , nbElems - pStack -> index + 1 , /* # of elems in this loop */
67
66
end_loop -> first_elem_disp , end_loop -> size , end_loop -> common .flags );
68
- pElemDesc ++ ; nbElems ++ ;
69
67
if ( -- stack_pos >= 0 ) { /* still something to do ? */
70
68
ddt_loop_desc_t * pStartLoop = & (pTypeDesc -> desc [pStack -> index - 1 ].loop );
71
- pStartLoop -> items = end_loop -> items ;
69
+ pStartLoop -> items = pElemDesc -> end_loop . items ;
72
70
total_disp = pStack -> disp ; /* update the displacement position */
73
71
}
72
+ pElemDesc ++ ; nbElems ++ ;
74
73
pStack -- ; /* go down one position on the stack */
75
74
pos_desc ++ ;
76
75
continue ;
77
76
}
78
77
if ( OPAL_DATATYPE_LOOP == pData -> desc .desc [pos_desc ].elem .common .type ) {
79
78
ddt_loop_desc_t * loop = (ddt_loop_desc_t * )& (pData -> desc .desc [pos_desc ]);
80
- ddt_endloop_desc_t * end_loop = (ddt_endloop_desc_t * )& (pData -> desc .desc [pos_desc + loop -> items ]);
81
79
int index = GET_FIRST_NON_LOOP ( & (pData -> desc .desc [pos_desc ]) );
82
80
83
81
if ( loop -> common .flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
82
+ ddt_endloop_desc_t * end_loop = (ddt_endloop_desc_t * )& (pData -> desc .desc [pos_desc + loop -> items ]);
83
+
84
84
assert (pData -> desc .desc [pos_desc + index ].elem .disp == end_loop -> first_elem_disp );
85
85
compress .common .flags = loop -> common .flags ;
86
86
compress .common .type = pData -> desc .desc [pos_desc + index ].elem .common .type ;
@@ -99,7 +99,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
99
99
compress .count = loop -> loops ;
100
100
compress .extent = loop -> extent ;
101
101
compress .disp = end_loop -> first_elem_disp ;
102
-
102
+ if ( compress .extent == (ptrdiff_t )(compress .blocklen * opal_datatype_basicDatatypes [compress .common .type ]-> size ) ) {
103
+ /* The compressed element is contiguous: collapse it into a single large blocklen */
104
+ compress .blocklen *= compress .count ;
105
+ compress .extent *= compress .count ;
106
+ compress .count = 1 ;
107
+ }
103
108
/**
104
109
* The current loop has been compressed and can now be treated as if it
105
110
* was a data element. We can now look if it can be fused with last,
@@ -161,26 +166,43 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
161
166
}
162
167
163
168
/* are the two elements compatible: aka they have very similar values and they
164
- * can be merged together by increasing the count. This optimizes the memory
165
- * required for storing the datatype description.
169
+ * can be merged together by increasing the count, and/or changing the extent.
166
170
*/
167
- if ( ((last .blocklen * opal_datatype_basicDatatypes [last .common .type ]-> size ) ==
168
- (current -> blocklen * opal_datatype_basicDatatypes [current -> common .type ]-> size )) &&
169
- (current -> disp == (last .disp + (ptrdiff_t )last .count * last .extent )) &&
170
- ((current -> count == 1 ) || (last .extent == current -> extent )) ) {
171
- last .count += current -> count ;
172
- /* find the lowest common denomitaor type */
171
+ if ( (last .blocklen * opal_datatype_basicDatatypes [last .common .type ]-> size ) ==
172
+ (current -> blocklen * opal_datatype_basicDatatypes [current -> common .type ]-> size ) ) {
173
+ ddt_elem_desc_t save = last ; /* safekeep the type and blocklen */
173
174
if ( last .common .type != current -> common .type ) {
174
175
last .blocklen *= opal_datatype_basicDatatypes [last .common .type ]-> size ;
175
176
last .common .type = OPAL_DATATYPE_UINT1 ;
176
177
}
177
- /* maximize the contiguous pieces */
178
- if ( last .extent == (ptrdiff_t )(last .blocklen * opal_datatype_basicDatatypes [last .common .type ]-> size ) ) {
179
- last .blocklen *= last .count ;
180
- last .count = 1 ;
181
- last .extent = last .blocklen * opal_datatype_basicDatatypes [last .common .type ]-> size ;
178
+
179
+ if ( 1 == last .count ) {
180
+ /* we can ignore the extent of the element with count == 1 and merge them together if their displacements match */
181
+ if ( 1 == current -> count ) {
182
+ last .extent = current -> disp - last .disp ;
183
+ last .count ++ ;
184
+ continue ;
185
+ }
186
+ /* can we compute a matching displacement ? */
187
+ if ( (last .disp + current -> extent ) == current -> disp ) {
188
+ last .extent = current -> extent ;
189
+ last .count = current -> count + 1 ;
190
+ continue ;
191
+ }
182
192
}
183
- continue ; /* next data */
193
+ if ( (last .extent * (ptrdiff_t )last .count + last .disp ) == current -> disp ) {
194
+ if ( 1 == current -> count ) {
195
+ last .count ++ ;
196
+ continue ;
197
+ }
198
+ if ( last .extent == current -> extent ) {
199
+ last .count += current -> count ;
200
+ continue ;
201
+ }
202
+ }
203
+ last .blocklen = save .blocklen ;
204
+ last .common .type = save .common .type ;
205
+ /* try other optimizations */
184
206
}
185
207
/* are the elements fusionable such that we can fusion the last blocklen of one with the first
186
208
* blocklen of the other.
0 commit comments