Skip to content

Reenable heterogeneous support. #8735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ompi/datatype/ompi_datatype.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ BEGIN_C_DECLS
#define OMPI_DATATYPE_FLAG_DATA_FORTRAN 0xC000
#define OMPI_DATATYPE_FLAG_DATA_LANGUAGE 0xC000

#define OMPI_DATATYPE_MAX_PREDEFINED 50
#define OMPI_DATATYPE_MAX_PREDEFINED 52

#if OMPI_DATATYPE_MAX_PREDEFINED > OPAL_DATATYPE_MAX_SUPPORTED
#error Need to increase the number of supported dataypes by OPAL (value OPAL_DATATYPE_MAX_SUPPORTED).
Expand Down
2 changes: 1 addition & 1 deletion ompi/datatype/ompi_datatype_external.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ int ompi_datatype_pack_external_size(const char datarep[], int incount,
CONVERTOR_SEND_CONVERSION,
&local_convertor );

opal_convertor_get_unpacked_size( &local_convertor, &length );
opal_convertor_get_packed_size( &local_convertor, &length );
*size = (MPI_Aint)length;
OBJ_DESTRUCT( &local_convertor );

Expand Down
97 changes: 66 additions & 31 deletions ompi/datatype/ompi_datatype_external32.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,39 +26,74 @@

/* From the MPI standard. external32 use the following types:
* Type Length
* MPI_PACKED 1
* MPI_BYTE 1
* MPI_CHAR 1
* MPI_UNSIGNED_CHAR 1
* MPI_SIGNED_CHAR 1
* MPI_WCHAR 2
* MPI_SHORT 2
* MPI_UNSIGNED_SHORT 2
* MPI_INT 4
* MPI_UNSIGNED 4
* MPI_LONG 4
* MPI_UNSIGNED_LONG 4
* MPI_FLOAT 4
* MPI_DOUBLE 8
* MPI_LONG_DOUBLE 16
* MPI_PACKED 1
* MPI_BYTE 1
* MPI_CHAR 1
* MPI_UNSIGNED_CHAR 1
* MPI_SIGNED_CHAR 1
* MPI_WCHAR 2
* MPI_SHORT 2
* MPI_UNSIGNED_SHORT 2
* MPI_INT 4
* MPI_LONG 4
* MPI_UNSIGNED 4
* MPI_UNSIGNED_LONG 4
* MPI_LONG_LONG_INT 8
* MPI_UNSIGNED_LONG_LONG 8
* MPI_FLOAT 4
* MPI_DOUBLE 8
* MPI_LONG_DOUBLE 16
*
* MPI_C_BOOL 1
* MPI_INT8_T 1
* MPI_INT16_T 2
* MPI_INT32_T 4
* MPI_INT64_T 8
* MPI_UINT8_T 1
* MPI_UINT16_T 2
* MPI_UINT32_T 4
* MPI_UINT64_T 8
* MPI_AINT 8
* MPI_COUNT 8
* MPI_OFFSET 8
* MPI_C_COMPLEX 2*4
* MPI_C_FLOAT_COMPLEX 2*4
* MPI_C_DOUBLE_COMPLEX 2*8
* MPI_C_LONG_DOUBLE_COMPLEX 2*16
*
* Fortran types
* MPI_CHARACTER 1
* MPI_LOGICAL 4
* MPI_INTEGER 4
* MPI_REAL 4
* MPI_DOUBLE_PRECISION 8
* MPI_COMPLEX 2*4
* MPI_DOUBLE_COMPLEX 2*8
* MPI_CHARACTER 1
* MPI_LOGICAL 4
* MPI_INTEGER 4
* MPI_REAL 4
* MPI_DOUBLE_PRECISION 8
* MPI_COMPLEX 2*4
* MPI_DOUBLE_COMPLEX 2*8
*
* MPI_CXX_BOOL 1
* MPI_CXX_FLOAT_COMPLEX 2*4
* MPI_CXX_DOUBLE_COMPLEX 2*8
* MPI_CXX_LONG_DOUBLE_COMPLEX 2*16
*
* Optional types
* MPI_INTEGER1 1
* MPI_INTEGER2 2
* MPI_INTEGER4 4
* MPI_INTEGER8 8
* MPI_LONG_LONG_INT 8
* MPI_UNSIGNED_LONG_LONG 8
* MPI_REAL4 4
* MPI_REAL8 8
* MPI_REAL16 16
* MPI_INTEGER1 1
* MPI_INTEGER2 2
* MPI_INTEGER4 4
* MPI_INTEGER8 8
* MPI_INTEGER16 16
* MPI_REAL2 2
* MPI_REAL4 4
* MPI_REAL8 8
* MPI_REAL16 16
* MPI_COMPLEX4 2*2
* MPI_COMPLEX8 2*4
* MPI_COMPLEX16 2*8
* MPI_COMPLEX32 2*16
*
* MPI_CXX_BOOL 1
* MPI_CXX_FLOAT_COMPLEX 2*4
* MPI_CXX_DOUBLE_COMPLEX 2*8
* MPI_CXX_LONG_DOUBLE_COMPLEX 2*16
*
* All floating point values are in big-endian IEEE format. Double extended use 16 bytes, with
* 15 exponent bits (bias = 10383), 112 mantissa bits and the same encoding as double. All
Expand Down
34 changes: 9 additions & 25 deletions ompi/datatype/ompi_datatype_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,14 @@
#define OMPI_DATATYPE_MPI_SHORT_FLOAT 0x30
#define OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX 0x31

/*
* Datatypes that have a different external32 length.
*/
#define OMPI_DATATYPE_MPI_LONG 0x32
#define OMPI_DATATYPE_MPI_UNSIGNED_LONG 0x33

/* This should __ALWAYS__ stay last */
#define OMPI_DATATYPE_MPI_UNAVAILABLE 0x32
#define OMPI_DATATYPE_MPI_UNAVAILABLE 0x34


#define OMPI_DATATYPE_MPI_MAX_PREDEFINED (OMPI_DATATYPE_MPI_UNAVAILABLE+1)
Expand Down Expand Up @@ -177,20 +183,6 @@
#define OMPI_DATATYPE_MPI_UNSIGNED OMPI_DATATYPE_MPI_UINT64_T
#endif

#if SIZEOF_LONG == 1
#define OMPI_DATATYPE_MPI_LONG OMPI_DATATYPE_MPI_INT8_T
#define OMPI_DATATYPE_MPI_UNSIGNED_LONG OMPI_DATATYPE_MPI_UINT8_T
#elif SIZEOF_LONG == 2
#define OMPI_DATATYPE_MPI_LONG OMPI_DATATYPE_MPI_INT16_T
#define OMPI_DATATYPE_MPI_UNSIGNED_LONG OMPI_DATATYPE_MPI_UINT16_T
#elif SIZEOF_LONG == 4
#define OMPI_DATATYPE_MPI_LONG OMPI_DATATYPE_MPI_INT32_T
#define OMPI_DATATYPE_MPI_UNSIGNED_LONG OMPI_DATATYPE_MPI_UINT32_T
#elif SIZEOF_LONG == 8
#define OMPI_DATATYPE_MPI_LONG OMPI_DATATYPE_MPI_INT64_T
#define OMPI_DATATYPE_MPI_UNSIGNED_LONG OMPI_DATATYPE_MPI_UINT64_T
#endif

#if SIZEOF_LONG_LONG == 1
#define OMPI_DATATYPE_MPI_LONG_LONG_INT OMPI_DATATYPE_MPI_INT8_T
#define OMPI_DATATYPE_MPI_UNSIGNED_LONG_LONG OMPI_DATATYPE_MPI_UINT8_T
Expand Down Expand Up @@ -571,16 +563,8 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED OPAL_DATATYPE_INITIALIZER_UINT8
#endif

#if SIZEOF_LONG == 4
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT4
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT4
#elif SIZEOF_LONG == 8
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT8
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT8
#elif SIZEOF_LONG == 16
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT16
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT16
#endif
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_LONG
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UNSIGNED_LONG

#if SIZEOF_LONG_LONG == 4
#define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INITIALIZER_INT4
Expand Down
2 changes: 2 additions & 0 deletions ompi/datatype/ompi_datatype_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,8 @@ const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX_PREDEF
[OMPI_DATATYPE_MPI_LB] = &ompi_mpi_lb.dt,
[OMPI_DATATYPE_MPI_UB] = &ompi_mpi_ub.dt,

[OMPI_DATATYPE_MPI_LONG] = &ompi_mpi_long.dt,
[OMPI_DATATYPE_MPI_UNSIGNED_LONG] = &ompi_mpi_long.dt,
/* MPI 3.0 types */
[OMPI_DATATYPE_MPI_COUNT] = &ompi_mpi_count.dt,

Expand Down
4 changes: 2 additions & 2 deletions ompi/mca/pml/ob1/pml_ob1_recvreq.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ static inline void prepare_recv_req_converter(mca_pml_ob1_recv_request_t *req)
req->req_recv.req_base.req_addr,
0,
&req->req_recv.req_base.req_convertor);
opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor,
&req->req_bytes_expected);
opal_convertor_get_packed_size(&req->req_recv.req_base.req_convertor,
&req->req_bytes_expected);
}
}

Expand Down
14 changes: 10 additions & 4 deletions opal/datatype/opal_convertor.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,13 @@ opal_convertor_master_t *opal_convertor_find_or_create_master(uint32_t remote_ar
} else {
opal_output(0, "Unknown sizeof(bool) for the remote architecture\n");
}

if (opal_arch_checkmask(&master->remote_arch, OPAL_ARCH_LONGIS64)) {
remote_sizes[OPAL_DATATYPE_LONG] = 8;
remote_sizes[OPAL_DATATYPE_UNSIGNED_LONG] = 8;
} else {
remote_sizes[OPAL_DATATYPE_LONG] = 4;
remote_sizes[OPAL_DATATYPE_UNSIGNED_LONG] = 4;
}
/**
* Now we can compute the conversion mask. For all sizes where the remote
* and local architecture differ a conversion is needed. Moreover, if the
Expand Down Expand Up @@ -434,7 +440,7 @@ int32_t opal_convertor_set_position_nocheck(opal_convertor_t *convertor, size_t
}
rc = opal_convertor_generic_simple_position(convertor, position);
/**
* If we have a non-contigous send convertor don't allow it move in the middle
* If we have a non-contiguous send convertor don't allow it move in the middle
* of a predefined datatype, it won't be able to copy out the left-overs
* anyway. Instead force the position to stay on predefined datatypes
* boundaries. As we allow partial predefined datatypes on the contiguous
Expand Down Expand Up @@ -484,8 +490,8 @@ size_t opal_convertor_compute_remote_size(opal_convertor_t *pConvertor)
pConvertor->remote_size = pConvertor->local_size;
if (OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask)) {
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
if (!(pConvertor->flags & CONVERTOR_SEND
&& pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) {
/* Can we use the optimized description? */
if (pConvertor->flags & OPAL_DATATYPE_OPTIMIZED_RESTRICTED) {
pConvertor->use_desc = &(datatype->desc);
}
if (0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE)) {
Expand Down
23 changes: 10 additions & 13 deletions opal/datatype/opal_convertor.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,22 +199,19 @@ static inline int32_t opal_convertor_need_buffers(const opal_convertor_t *pConve
size_t opal_convertor_compute_remote_size(opal_convertor_t *pConv);

/**
* Return the local size of the convertor (count times the size of the datatype).
* Return the packed size of the memory layout represented by this
* convertor. This is the size of the buffer that would be needed
* for the conversion (takes in account the type of the operation,
* aka pack or unpack, as well as which side is supposed to do the
* type conversion).
*/
static inline void opal_convertor_get_packed_size(const opal_convertor_t *pConv, size_t *pSize)
static inline void
opal_convertor_get_packed_size(const opal_convertor_t *pConv, size_t *pSize)
{
*pSize = pConv->local_size;
}

/**
* Return the remote size of the convertor (count times the remote size of the
* datatype). On homogeneous environments the local and remote sizes are
* identical.
*/
static inline void opal_convertor_get_unpacked_size(const opal_convertor_t *pConv, size_t *pSize)
{
if (pConv->flags & CONVERTOR_HOMOGENEOUS) {
*pSize = pConv->local_size;
if ((pConv->flags & CONVERTOR_HOMOGENEOUS) ||
((pConv->flags & CONVERTOR_SEND) && !(pConv->flags & CONVERTOR_SEND_CONVERSION)) ||
((pConv->flags & CONVERTOR_RECV) && (pConv->flags & CONVERTOR_SEND_CONVERSION))) {
return;
}
if (0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags)) {
Expand Down
63 changes: 35 additions & 28 deletions opal/datatype/opal_copy_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@
\
if ((from_extent == (ptrdiff_t) local_TYPE_size) \
&& (to_extent == (ptrdiff_t) remote_TYPE_size)) { \
/* copy of contigous data at both source and destination */ \
/* copy of contiguous data at both source and destination */ \
MEMCPY(to, from, count *local_TYPE_size); \
} else { \
/* source or destination are non-contigous */ \
/* source or destination are non-contiguous */ \
for (size_t i = 0; i < count; i++) { \
MEMCPY(to, from, local_TYPE_size); \
to += to_extent; \
Expand Down Expand Up @@ -254,30 +254,37 @@ COPY_TYPE(wchar, wchar_t, 1)
/* Table of predefined copy functions - one for each OPAL type */
/* NOTE: The order of this array *MUST* match the order in opal_datatype_basicDatatypes */
conversion_fct_t opal_datatype_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = {
(conversion_fct_t) NULL, /* OPAL_DATATYPE_LOOP */
(conversion_fct_t) NULL, /* OPAL_DATATYPE_END_LOOP */
(conversion_fct_t) NULL, /* OPAL_DATATYPE_LB */
(conversion_fct_t) NULL, /* OPAL_DATATYPE_UB */
(conversion_fct_t) copy_bytes_1, /* OPAL_DATATYPE_INT1 */
(conversion_fct_t) copy_bytes_2, /* OPAL_DATATYPE_INT2 */
(conversion_fct_t) copy_bytes_4, /* OPAL_DATATYPE_INT4 */
(conversion_fct_t) copy_bytes_8, /* OPAL_DATATYPE_INT8 */
(conversion_fct_t) copy_bytes_16, /* OPAL_DATATYPE_INT16 */
(conversion_fct_t) copy_bytes_1, /* OPAL_DATATYPE_UINT1 */
(conversion_fct_t) copy_bytes_2, /* OPAL_DATATYPE_UINT2 */
(conversion_fct_t) copy_bytes_4, /* OPAL_DATATYPE_UINT4 */
(conversion_fct_t) copy_bytes_8, /* OPAL_DATATYPE_UINT8 */
(conversion_fct_t) copy_bytes_16, /* OPAL_DATATYPE_UINT16 */
(conversion_fct_t) copy_float_2, /* OPAL_DATATYPE_FLOAT2 */
(conversion_fct_t) copy_float_4, /* OPAL_DATATYPE_FLOAT4 */
(conversion_fct_t) copy_float_8, /* OPAL_DATATYPE_FLOAT8 */
(conversion_fct_t) copy_float_12, /* OPAL_DATATYPE_FLOAT12 */
(conversion_fct_t) copy_float_16, /* OPAL_DATATYPE_FLOAT16 */
(conversion_fct_t) copy_short_float_complex, /* OPAL_DATATYPE_SHORT_FLOAT_COMPLEX */
(conversion_fct_t) copy_float_complex, /* OPAL_DATATYPE_FLOAT_COMPLEX */
(conversion_fct_t) copy_double_complex, /* OPAL_DATATYPE_DOUBLE_COMPLEX */
(conversion_fct_t) copy_long_double_complex, /* OPAL_DATATYPE_LONG_DOUBLE_COMPLEX */
(conversion_fct_t) copy_bool, /* OPAL_DATATYPE_BOOL */
(conversion_fct_t) copy_wchar, /* OPAL_DATATYPE_WCHAR */
(conversion_fct_t) NULL /* OPAL_DATATYPE_UNAVAILABLE */
[OPAL_DATATYPE_LOOP] = (conversion_fct_t) NULL,
[OPAL_DATATYPE_END_LOOP] = (conversion_fct_t) NULL,
[OPAL_DATATYPE_LB] = (conversion_fct_t) NULL,
[OPAL_DATATYPE_UB] = (conversion_fct_t) NULL,
[OPAL_DATATYPE_INT1] = (conversion_fct_t) copy_bytes_1,
[OPAL_DATATYPE_INT2] = (conversion_fct_t) copy_bytes_2,
[OPAL_DATATYPE_INT4] = (conversion_fct_t) copy_bytes_4,
[OPAL_DATATYPE_INT8] = (conversion_fct_t) copy_bytes_8,
[OPAL_DATATYPE_INT16] = (conversion_fct_t) copy_bytes_16,
[OPAL_DATATYPE_UINT1] = (conversion_fct_t) copy_bytes_1,
[OPAL_DATATYPE_UINT2] = (conversion_fct_t) copy_bytes_2,
[OPAL_DATATYPE_UINT4] = (conversion_fct_t) copy_bytes_4,
[OPAL_DATATYPE_UINT8] = (conversion_fct_t) copy_bytes_8,
[OPAL_DATATYPE_UINT16] = (conversion_fct_t) copy_bytes_16,
[OPAL_DATATYPE_FLOAT2] = (conversion_fct_t) copy_float_2,
[OPAL_DATATYPE_FLOAT4] = (conversion_fct_t) copy_float_4,
[OPAL_DATATYPE_FLOAT8] = (conversion_fct_t) copy_float_8,
[OPAL_DATATYPE_FLOAT12] = (conversion_fct_t) copy_float_12,
[OPAL_DATATYPE_FLOAT16] = (conversion_fct_t) copy_float_16,
[OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = (conversion_fct_t) copy_short_float_complex,
[OPAL_DATATYPE_FLOAT_COMPLEX] = (conversion_fct_t) copy_float_complex,
[OPAL_DATATYPE_DOUBLE_COMPLEX] = (conversion_fct_t) copy_double_complex,
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = (conversion_fct_t) copy_long_double_complex,
[OPAL_DATATYPE_BOOL] = (conversion_fct_t) copy_bool,
[OPAL_DATATYPE_WCHAR] = (conversion_fct_t) copy_wchar,
#if SIZEOF_LONG == 4
[OPAL_DATATYPE_LONG] = (conversion_fct_t)copy_bytes_4,
[OPAL_DATATYPE_UNSIGNED_LONG] = (conversion_fct_t)copy_bytes_4,
#elif SIZEOF_LONG == 8
[OPAL_DATATYPE_LONG] = (conversion_fct_t)copy_bytes_8,
[OPAL_DATATYPE_UNSIGNED_LONG] = (conversion_fct_t)copy_bytes_8,
#endif
[OPAL_DATATYPE_UNAVAILABLE] = NULL,
};
Loading