@@ -35,82 +35,74 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
35
35
size_t * SPACE )
36
36
{
37
37
const ddt_elem_desc_t * _elem = & ((ELEM )-> elem );
38
- size_t total_count = _elem -> count * _elem -> blocklen ;
39
38
size_t cando_count = (* SPACE ) / opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
40
39
size_t do_now , do_now_bytes ;
40
+ size_t blocklen_bytes = _elem -> blocklen * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
41
41
unsigned char * _memory = (* memory ) + _elem -> disp ;
42
+ unsigned char * _packed = * packed ;
42
43
43
44
assert ( * (COUNT ) <= _elem -> count * _elem -> blocklen );
44
45
45
46
if ( cando_count > * (COUNT ) )
46
47
cando_count = * (COUNT );
47
48
48
49
/**
49
- * First check if we already did something on this element ?
50
+ * First check if we already did something on this element ? The COUNT is the number
51
+ * of remaining predefined types in the current elem, not how many predefined types
52
+ * should be manipulated in the current call (this number is instead reflected on the
53
+ * SPACE).
50
54
*/
51
- do_now = (total_count - * (COUNT )); /* done elements */
55
+ do_now = * (COUNT ) % _elem -> blocklen ; /* any partial elements ? */
56
+ /* premptively update the number of COUNT we will return. */
57
+ * (COUNT ) -= cando_count ;
52
58
if ( 0 != do_now ) {
53
- do_now = do_now % _elem -> blocklen ; /* partial blocklen? */
54
-
55
- if ( 0 != do_now ) {
56
- size_t left_in_block = _elem -> blocklen - do_now ; /* left in the current blocklen */
57
- do_now = (left_in_block > cando_count ) ? cando_count : left_in_block ;
58
- do_now_bytes = do_now * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
59
-
60
- OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
61
- (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
62
- DO_DEBUG ( opal_output ( 0 , "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n" ,
63
- (void * )* (packed ), (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
64
- MEMCPY_CSUM ( * (packed ), _memory , do_now_bytes , (CONVERTOR ) );
65
- _memory = (* memory ) + _elem -> disp + (ptrdiff_t )do_now_bytes ;
66
- /* compensate if we just completed a blocklen */
67
- if ( do_now == left_in_block )
68
- _memory += _elem -> extent - (_elem -> blocklen * opal_datatype_basicDatatypes [_elem -> common .type ]-> size );
69
- * (packed ) += do_now_bytes ;
70
- * (SPACE ) -= do_now_bytes ;
71
- * (COUNT ) -= do_now ;
72
- cando_count -= do_now ;
73
- }
59
+ size_t left_in_block = do_now ; /* left in the current blocklen */
60
+ do_now = (do_now > cando_count ) ? cando_count : do_now ;
61
+ do_now_bytes = do_now * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
62
+
63
+ OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
64
+ (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
65
+ DO_DEBUG ( opal_output ( 0 , "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n" ,
66
+ _packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
67
+ MEMCPY_CSUM ( _packed , _memory , do_now_bytes , (CONVERTOR ) );
68
+ _memory += (ptrdiff_t )do_now_bytes ;
69
+ /* compensate if we just completed a blocklen */
70
+ if ( do_now == left_in_block )
71
+ _memory += _elem -> extent - blocklen_bytes ;
72
+ _packed += do_now_bytes ;
73
+ cando_count -= do_now ;
74
74
}
75
75
76
- /**
77
- * Compute how many full blocklen we need to do and do them.
78
- */
79
- do_now = cando_count / _elem -> blocklen ;
80
- if ( 0 != do_now ) {
81
- do_now_bytes = _elem -> blocklen * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
82
- for (size_t _i = 0 ; _i < do_now ; _i ++ ) {
83
- OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
84
- (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
85
- DO_DEBUG ( opal_output ( 0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
86
- (void * )* (packed ), (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )* (SPACE ) ); );
87
- MEMCPY_CSUM ( * (packed ), _memory , do_now_bytes , (CONVERTOR ) );
88
- * (packed ) += do_now_bytes ;
89
- _memory += _elem -> extent ;
90
- * (SPACE ) -= do_now_bytes ;
91
- * (COUNT ) -= _elem -> blocklen ;
92
- cando_count -= _elem -> blocklen ;
93
- }
76
+ /* Do as many full blocklen as possible */
77
+ for (size_t _i = 0 ; _elem -> blocklen <= cando_count ; _i ++ ) {
78
+ OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , blocklen_bytes , (CONVERTOR )-> pBaseBuf ,
79
+ (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
80
+ DO_DEBUG ( opal_output ( 0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
81
+ (void * )_packed , (void * )_memory , (unsigned long )blocklen_bytes , (unsigned long )(* (SPACE ) - (_packed - * (packed ))) ); );
82
+ MEMCPY_CSUM ( _packed , _memory , blocklen_bytes , (CONVERTOR ) );
83
+ _packed += blocklen_bytes ;
84
+ _memory += _elem -> extent ;
85
+ cando_count -= _elem -> blocklen ;
94
86
}
95
87
96
88
/**
97
89
* As an epilog do anything left from the last blocklen.
98
90
*/
99
- do_now = cando_count ;
100
- if ( 0 != do_now ) {
101
- do_now_bytes = do_now * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
91
+ if ( 0 ! = cando_count ) {
92
+ assert ( cando_count < _elem -> blocklen );
93
+ do_now_bytes = cando_count * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
102
94
OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
103
95
(CONVERTOR )-> pDesc , (CONVERTOR )-> count );
104
96
DO_DEBUG ( opal_output ( 0 , "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n" ,
105
- (void * )* ( packed ) , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
106
- MEMCPY_CSUM ( * ( packed ) , _memory , do_now_bytes , (CONVERTOR ) );
97
+ (void * )_packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
98
+ MEMCPY_CSUM ( _packed , _memory , do_now_bytes , (CONVERTOR ) );
107
99
_memory += do_now_bytes ;
108
- * (packed ) += do_now_bytes ;
109
- * (SPACE ) -= do_now_bytes ;
110
- * (COUNT ) -= do_now ;
100
+ _packed += do_now_bytes ;
111
101
}
112
102
113
103
* (memory ) = _memory - _elem -> disp ;
104
+ * (SPACE ) -= (_packed - * packed );
105
+ * (packed ) = _packed ;
114
106
}
115
107
116
108
static inline void pack_contiguous_loop ( opal_convertor_t * CONVERTOR ,
0 commit comments