Skip to content

Force memcpy inlining to assignments during pack/unpack of some DDTs #6678

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions opal/datatype/opal_datatype_checksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,17 @@ do { \
(CONVERTOR)->checksum += OPAL_CSUM_PARTIAL( (SRC), (BLENGTH), &(CONVERTOR)->csum_ui1, &(CONVERTOR)->csum_ui2 ); \
} while (0)

#define BASIC_DTT_MEMCPY_CSUM(DST, SRC, BLENGTH, CONVERTOR ) \
MEMCPY_CSUM( (DST), (SRC), (BLENGTH), (CONVERTOR) )

#else /* if CHECKSUM */

#define MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
MEMCPY( (DST), (SRC), (BLENGTH) )

#define BASIC_DTT_MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
BASIC_DTT_MEMCPY( (DST), (SRC), (BLENGTH) )

#define COMPUTE_CSUM( SRC, BLENGTH, CONVERTOR )

#endif /* if CHECKSUM */
Expand Down
19 changes: 19 additions & 0 deletions opal/datatype/opal_datatype_memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,23 @@
#define MEMCPY( DST, SRC, BLENGTH ) \
memcpy( (DST), (SRC), (BLENGTH) )

/*
* This macro is called whenever we are packing/unpacking a DDT that
* that is built with basic datatypes.
* Specifying a fixed size for the memcpy() makes the intel compiler
* inline it as an assignment operation.
* This code is a bit hacky, but doing this we can divide the latency
* by up to 2 during DDT exechanges.
*/
#define BASIC_DTT_MEMCPY( DST, SRC, BLENGTH ) \
do { \
if (4 == (BLENGTH)) { /* We are copying an int */ \
memcpy((DST), (SRC), 4); \
} else if (8 == (BLENGTH)) { /* We are copying a double */ \
memcpy((DST), (SRC), 8); \
} else { \
memcpy((DST), (SRC), (BLENGTH)); \
} \
} while (0)

#endif /* OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED */
7 changes: 6 additions & 1 deletion opal/datatype/opal_datatype_pack.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
#undef MEMCPY_CSUM
#define MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )

#undef BASIC_DTT_MEMCPY_CSUM
#define BASIC_DTT_MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
MEMCPY_CSUM( (DST), (SRC), (BLENGTH), (CONVERTOR) )

#endif

static inline void pack_predefined_data( opal_convertor_t* CONVERTOR,
Expand Down Expand Up @@ -53,7 +58,7 @@ static inline void pack_predefined_data( opal_convertor_t* CONVERTOR,
(CONVERTOR)->pDesc, (CONVERTOR)->count );
DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu\n",
(void*)*(DESTINATION), (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); );
MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) );
BASIC_DTT_MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) );
_source += _copy_blength;
*(DESTINATION) += _copy_blength;
} else {
Expand Down
7 changes: 6 additions & 1 deletion opal/datatype/opal_datatype_unpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
#undef MEMCPY_CSUM
#define MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )

#undef BASIC_DTT_MEMCPY_CSUM
#define BASIC_DTT_MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
MEMCPY_CSUM( (DST), (SRC), (BLENGTH), (CONVERTOR) )

#endif

static inline void
Expand Down Expand Up @@ -52,7 +57,7 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, /* the convertor */
(CONVERTOR)->pDesc, (CONVERTOR)->count );
DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu\n",
(void*)_destination, (void*)*(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); );
MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) );
BASIC_DTT_MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) );
*(SOURCE) += _copy_blength;
_destination += _copy_blength;
} else {
Expand Down