Skip to content

Commit 41aab40

Browse files
committed
Optimization for blocklen == 1
Signed-off-by: George Bosilca <[email protected]>
1 parent 7cd3aba commit 41aab40

File tree

3 files changed

+49
-8
lines changed

3 files changed

+49
-8
lines changed

opal/datatype/opal_datatype_pack.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
3737
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
3838
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
3939
size_t do_now, do_now_bytes;
40-
size_t blocklen_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
40+
size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
4141
unsigned char* _memory = (*memory) + _elem->disp;
4242
unsigned char* _packed = *packed;
4343

@@ -46,6 +46,21 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
4646
if( cando_count > *(COUNT) )
4747
cando_count = *(COUNT);
4848

49+
if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */
50+
*(COUNT) -= cando_count;
51+
for(; cando_count > 0; cando_count--) {
52+
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
53+
(CONVERTOR)->pDesc, (CONVERTOR)->count );
54+
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
55+
(void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
56+
MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) );
57+
_packed += blocklen_bytes;
58+
_memory += _elem->extent;
59+
}
60+
goto update_and_return;
61+
}
62+
blocklen_bytes *= _elem->blocklen;
63+
4964
/**
5065
* First check if we already did something on this element ? The COUNT is the number
5166
* of remaining predefined types in the current elem, not how many predefined types
@@ -92,14 +107,15 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
92107
assert( cando_count < _elem->blocklen );
93108
do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size;
94109
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
95-
(CONVERTOR)->pDesc, (CONVERTOR)->count );
110+
(CONVERTOR)->pDesc, (CONVERTOR)->count );
96111
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
97112
(void*)_packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
98113
MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) );
99114
_memory += do_now_bytes;
100115
_packed += do_now_bytes;
101116
}
102117

118+
update_and_return:
103119
*(memory) = _memory - _elem->disp;
104120
*(SPACE) -= (_packed - *packed);
105121
*(packed) = _packed;

opal/datatype/opal_datatype_position.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ position_single_block(opal_convertor_t* CONVERTOR,
6565
}
6666

6767
/**
68-
* Advance the current position in the convertor based using the
69-
* current element and a left-over counter. Update the head pointer
70-
* and the leftover byte space.
68+
* Advance the convertors' position according. Update the pointer and the remaining space
69+
* accordingly.
7170
*/
7271
static inline void
7372
position_predefined_data( opal_convertor_t* CONVERTOR,
@@ -79,14 +78,23 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
7978
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
8079
size_t total_count = _elem->count * _elem->blocklen;
8180
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
82-
size_t do_now, do_now_bytes;
81+
size_t do_now, do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
8382
unsigned char* _memory = (*POINTER) + _elem->disp;
8483

8584
assert( *(COUNT) <= _elem->count * _elem->blocklen);
8685

8786
if( cando_count > *(COUNT) )
8887
cando_count = *(COUNT);
8988

89+
if( 1 == _elem->blocklen ) {
90+
DO_DEBUG( opal_output( 0, "position( %p, %" PRIsize_t " ) x (count %" PRIsize_t ", extent %ld) => space %lu [prolog]\n",
91+
(void*)_memory, (unsigned long)do_now_bytes, cando_count, _elem->extent, (unsigned long)(*SPACE) ); );
92+
_memory += cando_count * _elem->extent;
93+
*SPACE -= cando_count * do_now_bytes;
94+
*COUNT -= cando_count;
95+
goto update_and_return;
96+
}
97+
9098
/**
9199
* First check if we already did something on this element ?
92100
*/
@@ -139,6 +147,7 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
139147
SPACE, do_now_bytes, COUNT, do_now );
140148
}
141149

150+
update_and_return:
142151
*(POINTER) = _memory - _elem->disp;
143152
}
144153

opal/datatype/opal_datatype_unpack.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR,
3737
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
3838
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
3939
size_t do_now, do_now_bytes;
40-
size_t blocklen_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
40+
size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
4141
unsigned char* _memory = (*memory) + _elem->disp;
4242
unsigned char* _packed = *packed;
4343

@@ -46,6 +46,21 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR,
4646
if( cando_count > *(COUNT) )
4747
cando_count = *(COUNT);
4848

49+
if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */
50+
*(COUNT) -= cando_count;
51+
for(; cando_count > 0; cando_count--) {
52+
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
53+
(CONVERTOR)->pDesc, (CONVERTOR)->count );
54+
DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n",
55+
(void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
56+
MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) );
57+
_packed += blocklen_bytes;
58+
_memory += _elem->extent;
59+
}
60+
goto update_and_return;
61+
}
62+
blocklen_bytes *= _elem->blocklen;
63+
4964
/**
5065
* First check if we already did something on this element ? The COUNT is the number
5166
* of remaining predefined types in the current elem, not how many predefined types
@@ -92,14 +107,15 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR,
92107
assert( cando_count < _elem->blocklen );
93108
do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size;
94109
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
95-
(CONVERTOR)->pDesc, (CONVERTOR)->count );
110+
(CONVERTOR)->pDesc, (CONVERTOR)->count );
96111
DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
97112
(void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
98113
MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) );
99114
_memory += do_now_bytes;
100115
_packed += do_now_bytes;
101116
}
102117

118+
update_and_return:
103119
*(memory) = _memory - _elem->disp;
104120
*(SPACE) -= (_packed - *packed);
105121
*(packed) = _packed;

0 commit comments

Comments
 (0)