26
26
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
27
27
#endif
28
28
29
+ /**
30
+ * This function deals only with partial elements. The COUNT points however to the whole leftover count,
31
+ * but this function is only expected to operate on an amount less than blength, that would allow the rest
32
+ * of the pack process to handle only entire blength blocks (plus the left over).
33
+ *
34
+ * Return 1 if we are now aligned on a block, 0 otherwise.
35
+ */
36
+ static inline int
37
+ pack_partial_blocklen ( opal_convertor_t * CONVERTOR ,
38
+ const dt_elem_desc_t * ELEM ,
39
+ size_t * COUNT ,
40
+ unsigned char * * memory ,
41
+ unsigned char * * packed ,
42
+ size_t * SPACE )
43
+ {
44
+ const ddt_elem_desc_t * _elem = & ((ELEM )-> elem );
45
+ size_t do_now_bytes = opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
46
+ size_t do_now = * (COUNT );
47
+ unsigned char * _memory = (* memory ) + _elem -> disp ;
48
+ unsigned char * _packed = * packed ;
49
+
50
+ assert ( * (COUNT ) <= _elem -> count * _elem -> blocklen );
51
+
52
+ /**
53
+ * First check if we already did something on this element ? The COUNT is the number
54
+ * of remaining predefined types in the current elem, not how many predefined types
55
+ * should be manipulated in the current call (this number is instead reflected on the
56
+ * SPACE).
57
+ */
58
+ if ( 0 == (do_now = (* COUNT ) % _elem -> blocklen ) )
59
+ return 1 ;
60
+
61
+ size_t left_in_block = do_now ; /* left in the current blocklen */
62
+
63
+ if ( (do_now_bytes * do_now ) > * (SPACE ) )
64
+ do_now = (* SPACE ) / do_now_bytes ;
65
+
66
+ do_now_bytes *= do_now ;
67
+
68
+ OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
69
+ (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
70
+ DO_DEBUG ( opal_output ( 0 , "pack memcpy( %p, %p, %lu ) => space %lu [partial]\n" ,
71
+ _packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
72
+ MEMCPY_CSUM ( _packed , _memory , do_now_bytes , (CONVERTOR ) );
73
+ * (memory ) += (ptrdiff_t )do_now_bytes ;
74
+ if ( do_now == left_in_block ) /* compensate if completed a blocklen */
75
+ * (memory ) += _elem -> extent - (_elem -> blocklen * opal_datatype_basicDatatypes [_elem -> common .type ]-> size );
76
+
77
+ * (COUNT ) -= do_now ;
78
+ * (SPACE ) -= do_now_bytes ;
79
+ * (packed ) += do_now_bytes ;
80
+ return (do_now == left_in_block );
81
+ }
82
+
83
+ /**
84
+ * Pack entire blocks, plus a possible remainder if SPACE is constrained to less than COUNT elements.
85
+ */
29
86
static inline void
30
87
pack_predefined_data ( opal_convertor_t * CONVERTOR ,
31
88
const dt_elem_desc_t * ELEM ,
@@ -36,27 +93,24 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
36
93
{
37
94
const ddt_elem_desc_t * _elem = & ((ELEM )-> elem );
38
95
size_t blocklen_bytes = opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
39
- size_t cando_count = * (COUNT ), do_now , do_now_bytes ;
96
+ size_t cando_count = * (COUNT ), do_now_bytes ;
40
97
unsigned char * _memory = (* memory ) + _elem -> disp ;
41
98
unsigned char * _packed = * packed ;
42
99
100
+ assert ( 0 == (cando_count % _elem -> blocklen ) ); /* no partials here */
43
101
assert ( * (COUNT ) <= _elem -> count * _elem -> blocklen );
44
102
45
103
if ( (blocklen_bytes * cando_count ) > * (SPACE ) )
46
104
cando_count = (* SPACE ) / blocklen_bytes ;
47
105
48
- do_now = * (COUNT ); /* save the COUNT for later */
49
106
/* premptively update the number of COUNT we will return. */
50
107
* (COUNT ) -= cando_count ;
51
108
52
- if ( 1 == _elem -> count ) { /* Everything is contiguous, handle it as a prologue */
53
- goto do_epilog ;
54
- }
55
109
if ( 1 == _elem -> blocklen ) { /* Do as many full blocklen as possible */
56
110
for (; cando_count > 0 ; cando_count -- ) {
57
111
OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , blocklen_bytes , (CONVERTOR )-> pBaseBuf ,
58
112
(CONVERTOR )-> pDesc , (CONVERTOR )-> count );
59
- DO_DEBUG ( opal_output ( 0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
113
+ DO_DEBUG ( opal_output ( 0 , "pack memcpy( %p, %p, %lu ) => space %lu [blen = 1] \n" ,
60
114
(void * )_packed , (void * )_memory , (unsigned long )blocklen_bytes , (unsigned long )(* (SPACE ) - (_packed - * (packed ))) ); );
61
115
MEMCPY_CSUM ( _packed , _memory , blocklen_bytes , (CONVERTOR ) );
62
116
_packed += blocklen_bytes ;
@@ -65,61 +119,32 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
65
119
goto update_and_return ;
66
120
}
67
121
68
- blocklen_bytes *= _elem -> blocklen ;
69
- if ( (_elem -> count * _elem -> blocklen ) == cando_count ) {
70
- goto skip_prolog ;
71
- }
72
- /**
73
- * First check if we already did something on this element ? The COUNT is the number
74
- * of remaining predefined types in the current elem, not how many predefined types
75
- * should be manipulated in the current call (this number is instead reflected on the
76
- * SPACE).
77
- */
78
- do_now = do_now % _elem -> blocklen ; /* any partial elements ? */
122
+ if ( (1 < _elem -> count ) && (_elem -> blocklen <= cando_count ) ) {
123
+ blocklen_bytes *= _elem -> blocklen ;
79
124
80
- if ( 0 != do_now ) {
81
- size_t left_in_block = do_now ; /* left in the current blocklen */
82
- do_now = (do_now > cando_count ) ? cando_count : do_now ;
83
- do_now_bytes = do_now * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
84
-
85
- OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
86
- (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
87
- DO_DEBUG ( opal_output ( 0 , "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n" ,
88
- _packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
89
- MEMCPY_CSUM ( _packed , _memory , do_now_bytes , (CONVERTOR ) );
90
- _memory += (ptrdiff_t )do_now_bytes ;
91
- /* compensate if we just completed a blocklen */
92
- if ( do_now == left_in_block )
93
- _memory += _elem -> extent - blocklen_bytes ;
94
- _packed += do_now_bytes ;
95
- cando_count -= do_now ;
96
- }
97
-
98
- skip_prolog :
99
- /* Do as many full blocklen as possible */
100
- for (size_t _i = 0 ; _elem -> blocklen <= cando_count ; _i ++ ) {
101
- OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , blocklen_bytes , (CONVERTOR )-> pBaseBuf ,
102
- (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
103
- DO_DEBUG ( opal_output ( 0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
104
- (void * )_packed , (void * )_memory , (unsigned long )blocklen_bytes , (unsigned long )(* (SPACE ) - (_packed - * (packed ))) ); );
105
- MEMCPY_CSUM ( _packed , _memory , blocklen_bytes , (CONVERTOR ) );
106
- _packed += blocklen_bytes ;
107
- _memory += _elem -> extent ;
108
- cando_count -= _elem -> blocklen ;
125
+ do { /* Do as many full blocklen as possible */
126
+ OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , blocklen_bytes , (CONVERTOR )-> pBaseBuf ,
127
+ (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
128
+ DO_DEBUG ( opal_output ( 0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
129
+ (void * )_packed , (void * )_memory , (unsigned long )blocklen_bytes , (unsigned long )(* (SPACE ) - (_packed - * (packed ))) ); );
130
+ MEMCPY_CSUM ( _packed , _memory , blocklen_bytes , (CONVERTOR ) );
131
+ _packed += blocklen_bytes ;
132
+ _memory += _elem -> extent ;
133
+ cando_count -= _elem -> blocklen ;
134
+ } while (_elem -> blocklen <= cando_count );
109
135
}
110
136
111
137
/**
112
138
* As an epilog do anything left from the last blocklen.
113
139
*/
114
140
if ( 0 != cando_count ) {
115
-
116
- do_epilog :
117
- assert ( cando_count < _elem -> blocklen );
141
+ assert ( (cando_count < _elem -> blocklen ) ||
142
+ ((1 == _elem -> count ) && (cando_count <= _elem -> blocklen )) );
118
143
do_now_bytes = cando_count * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
119
144
OPAL_DATATYPE_SAFEGUARD_POINTER ( _memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
120
145
(CONVERTOR )-> pDesc , (CONVERTOR )-> count );
121
146
DO_DEBUG ( opal_output ( 0 , "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n" ,
122
- (void * )_packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE )) ); );
147
+ (void * )_packed , (void * )_memory , (unsigned long )do_now_bytes , (unsigned long )(* (SPACE ) - ( _packed - * ( packed )) ) ); );
123
148
MEMCPY_CSUM ( _packed , _memory , do_now_bytes , (CONVERTOR ) );
124
149
_memory += do_now_bytes ;
125
150
_packed += do_now_bytes ;
@@ -159,7 +184,15 @@ static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR,
159
184
* (COUNT ) -= _copy_loops ;
160
185
}
161
186
162
- #define PACK_PREDEFINED_DATATYPE ( CONVERTOR , /* the convertor */ \
187
+ #define PACK_PARTIAL_BLOCKLEN ( CONVERTOR , /* the convertor */ \
188
+ ELEM , /* the basic element to be packed */ \
189
+ COUNT , /* the number of elements */ \
190
+ MEMORY , /* the source pointer (char*) */ \
191
+ PACKED , /* the destination pointer (char*) */ \
192
+ SPACE ) /* the space in the destination buffer */ \
193
+ pack_partial_blocklen( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) )
194
+
195
+ #define PACK_PREDEFINED_DATATYPE ( CONVERTOR , /* the convertor */ \
163
196
ELEM , /* the basic element to be packed */ \
164
197
COUNT , /* the number of elements */ \
165
198
MEMORY , /* the source pointer (char*) */ \
0 commit comments