@@ -153,7 +153,7 @@ int32_t opal_unpack_homogeneous_contig_function(opal_convertor_t *pConv, struct
153
153
}
154
154
}
155
155
}
156
- * out_size = iov_idx ; /* we only reach this line after the for loop succesfully complete */
156
+ * out_size = iov_idx ; /* we only reach this line after the for loop successfully complete */
157
157
* max_data = pConv -> bConverted - initial_bytes_converted ;
158
158
if (pConv -> bConverted == pConv -> local_size ) {
159
159
pConv -> flags |= CONVERTOR_COMPLETED ;
@@ -183,36 +183,38 @@ static inline void opal_unpack_partial_datatype(opal_convertor_t *pConvertor, dt
183
183
unsigned char * user_data = * user_buffer + pElem -> elem .disp ;
184
184
size_t count_desc = 1 ;
185
185
size_t data_length = opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size ;
186
-
187
- DO_DEBUG (opal_output (0 ,
188
- "unpack partial data start %lu end %lu data_length %lu user %p\n"
189
- "\tbConverted %lu total_length %lu count %ld\n" ,
190
- (unsigned long ) start_position , (unsigned long ) start_position + length ,
191
- (unsigned long ) data_length , (void * ) * user_buffer ,
192
- (unsigned long ) pConvertor -> bConverted ,
193
- (unsigned long ) pConvertor -> local_size , pConvertor -> count ););
194
-
195
- /* Find a byte that is not used in the partial buffer */
196
- find_unused_byte :
197
- for (size_t i = 0 ; i < length ; i ++ ) {
198
- if (unused_byte == partial_data [i ]) {
186
+ dt_elem_desc_t single_elem = { .elem = { .common = pElem -> elem .common , .count = 1 , .blocklen = 1 ,
187
+ .extent = data_length , /* advance by a full data element */
188
+ .disp = pElem -> elem .disp } };
189
+ DO_DEBUG ( opal_output ( 0 , "unpack partial data start %lu end %lu data_length %lu user %p\n"
190
+ "\tbConverted %lu total_length %lu count %ld\n" ,
191
+ (unsigned long )start_position , (unsigned long )start_position + length ,
192
+ (unsigned long )data_length , (void * )* user_buffer ,
193
+ (unsigned long )pConvertor -> bConverted ,
194
+ (unsigned long )pConvertor -> local_size , pConvertor -> count ); );
195
+
196
+ /* Find a byte value that is not used in the partial buffer. We use it as a marker
197
+ * to identify what has not been modified by the unpack call. */
198
+ find_unused_byte :
199
+ for (size_t i = 0 ; i < length ; i ++ ) {
200
+ if ( unused_byte == partial_data [i ] ) {
199
201
unused_byte -- ;
200
202
goto find_unused_byte ;
201
203
}
202
204
}
203
205
204
- /* Copy and fill the rest of the buffer with the unused byte */
205
- memset (temporary , unused_byte , data_length );
206
- MEMCPY (temporary + start_position , partial_data , length );
206
+ /* Prepare an full element of the predefined type, by populating an entire type
207
+ * with the unused byte and then put the partial data at the right position. */
208
+ memset ( temporary , unused_byte , data_length );
209
+ MEMCPY ( temporary + start_position , partial_data , length );
207
210
211
+ /* Save the original content of the user memory */
208
212
#if OPAL_CUDA_SUPPORT
209
213
/* In the case where the data is being unpacked from device memory, need to
210
- * use the special host to device memory copy. Note this code path was only
211
- * seen on large receives of noncontiguous data via buffered sends. */
212
- pConvertor -> cbmemcpy (saved_data , user_data , data_length , pConvertor );
214
+ * use the special host to device memory copy. */
215
+ pConvertor -> cbmemcpy (saved_data , user_data , data_length , pConvertor );
213
216
#else
214
- /* Save the content of the user memory */
215
- MEMCPY (saved_data , user_data , data_length );
217
+ MEMCPY ( saved_data , user_data , data_length );
216
218
#endif
217
219
218
220
/* Then unpack the data into the user memory */
@@ -222,14 +224,11 @@ static inline void opal_unpack_partial_datatype(opal_convertor_t *pConvertor, dt
222
224
/* reload the length as it is reset by the macro */
223
225
data_length = opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size ;
224
226
225
- /* For every occurrence of the unused byte move data from the saved
226
- * buffer back into the user memory.
227
- */
227
+ /* Rebuild the data by pulling back the unmodified bytes from the original
228
+ * content in the user memory. */
228
229
#if OPAL_CUDA_SUPPORT
229
230
/* Need to copy the modified user_data again so we can see which
230
- * bytes need to be converted back to their original values. Note
231
- * this code path was only seen on large receives of noncontiguous
232
- * data via buffered sends. */
231
+ * bytes need to be converted back to their original values. */
233
232
{
234
233
char resaved_data [16 ];
235
234
pConvertor -> cbmemcpy (resaved_data , user_data , data_length , pConvertor );
@@ -271,9 +270,8 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct
271
270
size_t iov_len_local ;
272
271
uint32_t iov_count ;
273
272
274
- DO_DEBUG (opal_output (0 , "opal_convertor_generic_simple_unpack( %p, {%p, %lu}, %u )\n" ,
275
- (void * ) pConvertor , (void * ) iov [0 ].iov_base ,
276
- (unsigned long ) iov [0 ].iov_len , * out_size ););
273
+ DO_DEBUG ( opal_output ( 0 , "opal_convertor_generic_simple_unpack( %p, iov[%u] = {%p, %lu} )\n" ,
274
+ (void * )pConvertor , * out_size , (void * )iov [0 ].iov_base , (unsigned long )iov [0 ].iov_len ); );
277
275
278
276
description = pConvertor -> use_desc -> desc ;
279
277
@@ -300,26 +298,37 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct
300
298
iov_ptr = (unsigned char * ) iov [iov_count ].iov_base ;
301
299
iov_len_local = iov [iov_count ].iov_len ;
302
300
303
- if (0 != pConvertor -> partial_length ) {
304
- size_t element_length = opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size ;
305
- size_t missing_length = element_length - pConvertor -> partial_length ;
306
-
307
- assert (pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA );
308
- COMPUTE_CSUM (iov_ptr , missing_length , pConvertor );
309
- opal_unpack_partial_datatype (pConvertor , pElem , iov_ptr , pConvertor -> partial_length ,
310
- (size_t )(element_length - pConvertor -> partial_length ),
311
- & conv_ptr );
312
- -- count_desc ;
313
- if (0 == count_desc ) {
314
- conv_ptr = pConvertor -> pBaseBuf + pStack -> disp ;
315
- pos_desc ++ ; /* advance to the next data */
316
- UPDATE_INTERNAL_COUNTERS (description , pos_desc , pElem , count_desc );
317
- }
318
- iov_ptr += missing_length ;
319
- iov_len_local -= missing_length ;
320
- pConvertor -> partial_length = 0 ; /* nothing more inside */
321
- }
301
+ /* Deal with all types of partial predefined datatype unpacking, including when
302
+ * unpacking a partial predefined element and when unpacking a part smaller than
303
+ * the blocklen.
304
+ */
322
305
if (pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) {
306
+ if (0 != pConvertor -> partial_length ) { /* partial predefined element */
307
+ size_t element_length = opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size ;
308
+ size_t missing_length = element_length - pConvertor -> partial_length ;
309
+
310
+ assert ( pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA );
311
+ COMPUTE_CSUM ( iov_ptr , missing_length , pConvertor );
312
+ opal_unpack_partial_datatype ( pConvertor , pElem ,
313
+ iov_ptr ,
314
+ pConvertor -> partial_length , missing_length ,
315
+ & conv_ptr );
316
+ -- count_desc ;
317
+ if ( 0 == (count_desc % pElem -> elem .blocklen )) { /* did we reach the end of the blocklen ? */
318
+ if (0 == count_desc ) { /* the end of the vector ? */
319
+ conv_ptr = pConvertor -> pBaseBuf + pStack -> disp ;
320
+ pos_desc ++ ; /* advance to the next data */
321
+ UPDATE_INTERNAL_COUNTERS (description , pos_desc , pElem , count_desc );
322
+ } else {
323
+ conv_ptr += pElem -> elem .extent - (pElem -> elem .blocklen * element_length );
324
+ }
325
+ }
326
+ iov_ptr += missing_length ;
327
+ iov_len_local -= missing_length ;
328
+ pConvertor -> partial_length = 0 ; /* nothing more inside */
329
+ if ( 0 == iov_len_local )
330
+ goto complete_loop ;
331
+ }
323
332
if (((size_t ) pElem -> elem .count * pElem -> elem .blocklen ) != count_desc ) {
324
333
/* we have a partial (less than blocklen) basic datatype */
325
334
int rc = UNPACK_PARTIAL_BLOCKLEN (pConvertor , pElem , count_desc , iov_ptr , conv_ptr ,
@@ -401,14 +410,14 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct
401
410
}
402
411
}
403
412
complete_loop :
404
- assert (pElem -> elem .common .type < OPAL_DATATYPE_MAX_PREDEFINED );
405
- if (( pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) && (0 != iov_len_local )) {
406
- unsigned char * temp = conv_ptr ;
413
+ assert ( pElem -> elem .common .type < OPAL_DATATYPE_MAX_PREDEFINED );
414
+ if ( ( pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) && (0 != iov_len_local ) ) {
415
+ unsigned char * temp = conv_ptr ;
407
416
/* We have some partial data here. Let's copy it into the convertor
408
417
* and keep it hot until the next round.
409
418
*/
410
- assert (iov_len_local < opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size );
411
- COMPUTE_CSUM (iov_ptr , iov_len_local , pConvertor );
419
+ assert ( iov_len_local < opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size );
420
+ COMPUTE_CSUM ( iov_ptr , iov_len_local , pConvertor );
412
421
413
422
opal_unpack_partial_datatype (pConvertor , pElem , iov_ptr , 0 , iov_len_local , & temp );
414
423
@@ -543,11 +552,6 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec
543
552
unsigned char * conv_ptr , * iov_ptr ;
544
553
uint32_t iov_count ;
545
554
size_t iov_len_local ;
546
- #if 0
547
- const opal_convertor_master_t * master = pConvertor -> master ;
548
- ptrdiff_t advance ; /* number of bytes that we should advance the buffer */
549
- #endif
550
- size_t rc ;
551
555
552
556
DO_DEBUG (opal_output (0 , "opal_convertor_general_unpack( %p, {%p, %lu}, %d )\n" ,
553
557
(void * ) pConvertor , (void * ) iov [0 ].iov_base ,
0 commit comments