Skip to content

Commit 6fb7a79

Browse files
committed
external32 fix for MPI_LONG
Here's a gist for a testcase pack.c https://gist.github.com/markalle/ea3e48e8987bcd8a18923304e80833d4 % mpicc -o x pack.c % mpirun -np 1 ./x Without this fix if sizeof(long) is 8 and a machine is little endian an input buffer of (long)1 == [01 00 00 00 00 00 00 00] packs external32 as [00 00 00 00 00 00 00 01], using 8 bytes. But the external32 representation for MPI_LONG is supposed to be 4 bytes. I don't see much in the design to support the sizing of packed datatypes. Convertors have a remote_sizes[] array, but those are indexed by OPAL_DATATYPE_* which are buckets that contain multiple MPI datatypes. Eg MPI_LONG is OPAL_DATATYPE_INT8 (if sizeof(long) == 8), but so is MPI_INT64_T which has an external32 size of 8. So the OPAL bucket is too big and as far as I can tell the information is lost that would allow MPI_LONG to be packed to a different size. For this PR I followed the design of BOOL (which has its own OPAL_DATATYPE_ entry as opposed to mapping to OPAL_DATATYPE_INT1), and separated out another OPAL_DATATYPE_ bucket for MPI_LONG and MPI_UNSIGNED_LONG. I don't like this solution because there's no guarantee that any MPI datatype has a sizeof() that matches the size specified in external32. This PR only carves out another special case for long and unsigned long. The MPI_LONG datatype now maps to the new OPAL_DATATYPE_LONG which allows the external32 converter to set a remote_sizes[] of 4 for it. Then in the functions like copy_int8_heterogenous() that are made from pack/unpack with pFunction[], there are new arguments for from_is_bigendian and to_is_bigendian that allow conversions when from/to have different sizes, and the input sizes arguments are for the base elements to be converted in a contiguous chunk, the extents between contigous chunks are handled at the next level up in the pack/unpack functions. Prior to this checkin the "bottom" of the pack/unpack loops processed a single element from the datatype description[] entry, but inside the pFunction[] call it didn't loop through the elem->count and elem->blocklen right. In this PR I'm putting that loop in pack/unpack and having pFunctions like copy_int8_heterogeneous() processing contiguous chunks. I don't believe partial copies are handled correctly before or after this PR. pack doesn't try at all to handle this, it exclusively loops over all the blocks and all the elements in each block. Unpack has more code for partial copies, but doesn't seems to account for how each elem is basically a type_vector. Other misc changes that went into this: - added a datatype converter arch_id flag for OPAL_ARCH_LONGIS32 and set it for the external32 convertor - changed the bit location for OPAL_ARCH_LONGIS64 because it wasn't living under the OPAL_ARCH_LONGISxx mask which appears to be how those masks are designed to work - the terminating condition in pack used to say "bConverted == local_size" but bConverted is counting bytes on the "to" side and for pack the "to" side is the remote_size. Signed-off-by: Mark Allen <[email protected]>
1 parent 834cbff commit 6fb7a79

13 files changed

+296
-137
lines changed

ompi/datatype/ompi_datatype_external32.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* Copyright (c) 2004-2006 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
14+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1415
* $COPYRIGHT$
1516
*
1617
* Additional copyrights may follow
@@ -71,7 +72,8 @@
7172
uint32_t ompi_datatype_external32_arch_id = OPAL_ARCH_LDEXPSIZEIS15 | OPAL_ARCH_LDMANTDIGIS113 |
7273
OPAL_ARCH_LONGDOUBLEIS128 | OPAL_ARCH_ISBIGENDIAN |
7374
OPAL_ARCH_HEADERMASK | OPAL_ARCH_HEADERMASK2 |
74-
OPAL_ARCH_BOOLIS8 | OPAL_ARCH_LOGICALIS8;
75+
OPAL_ARCH_BOOLIS8 | OPAL_ARCH_LOGICALIS8 |
76+
OPAL_ARCH_LONGIS32;
7577

7678
opal_convertor_t* ompi_mpi_external32_convertor = NULL;
7779
opal_convertor_t* ompi_mpi_local_convertor = NULL;

ompi/datatype/ompi_datatype_internal.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Copyright (c) 2015-2018 Research Organization for Information Science
1111
* and Technology (RIST). All rights reserved.
1212
* Copyright (c) 2016-2018 FUJITSU LIMITED. All rights reserved.
13+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1314
* $COPYRIGHT$
1415
*
1516
* Additional copyrights may follow
@@ -570,16 +571,8 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
570571
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED OPAL_DATATYPE_INITIALIZER_UINT8
571572
#endif
572573

573-
#if SIZEOF_LONG == 4
574-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT4
575-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT4
576-
#elif SIZEOF_LONG == 8
577-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT8
578-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT8
579-
#elif SIZEOF_LONG == 16
580-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT16
581-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT16
582-
#endif
574+
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_LONG
575+
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_ULONG
583576

584577
#if SIZEOF_LONG_LONG == 4
585578
#define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INITIALIZER_INT4

ompi/mca/coll/hcoll/coll_hcoll_dtypes.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,19 @@ static dte_data_representation_t* ompi_datatype_2_dte_data_rep[OMPI_DATATYPE_MAX
9292
#else
9393
&DTE_ZERO,
9494
#endif
95-
&DTE_ZERO /*OPAL_DATATYPE_UNAVAILABLE 25 */
95+
96+
#if SIZEOF_LONG == 4
97+
&DTE_INT32, /*OPAL_DATATYPE_LONG 25 */
98+
&DTE_UINT32, /*OPAL_DATATYPE_ULONG 26 */
99+
#elif SIZEOF_LONG == 8
100+
&DTE_INT64, /*OPAL_DATATYPE_LONG 25 */
101+
&DTE_UINT64, /*OPAL_DATATYPE_ULONG 26 */
102+
#elif SIZEOF_LONG == 16
103+
&DTE_INT128, /*OPAL_DATATYPE_LONG 25 */
104+
&DTE_UINT128, /*OPAL_DATATYPE_ULONG 26 */
105+
#endif
106+
107+
&DTE_ZERO /*OPAL_DATATYPE_UNAVAILABLE 27 */
96108
};
97109

98110
enum {

opal/datatype/opal_convertor.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@ opal_convertor_master_t* opal_convertor_find_or_create_master( uint32_t remote_a
142142
opal_output( 0, "Unknown sizeof(bool) for the remote architecture\n" );
143143
}
144144

145+
/* Same for long */
146+
if( opal_arch_checkmask( &master->remote_arch, OPAL_ARCH_LONGIS32 ) ) {
147+
remote_sizes[OPAL_DATATYPE_LONG] = 4;
148+
remote_sizes[OPAL_DATATYPE_ULONG] = 4;
149+
}
150+
145151
/**
146152
* Now we can compute the conversion mask. For all sizes where the remote
147153
* and local architecture differ a conversion is needed. Moreover, if the

opal/datatype/opal_convertor_internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
88
* Copyright (c) 2017 Research Organization for Information Science
99
* and Technology (RIST). All rights reserved.
10+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1011
* $COPYRIGHT$
1112
*
1213
* Additional copyrights may follow
@@ -24,7 +25,9 @@ BEGIN_C_DECLS
2425

2526
typedef int32_t (*conversion_fct_t)( opal_convertor_t* pConvertor, uint32_t count,
2627
const void* from, size_t from_len, ptrdiff_t from_extent,
28+
int from_is_bigendian,
2729
void* to, size_t to_length, ptrdiff_t to_extent,
30+
int to_is_bigendian,
2831
ptrdiff_t *advance );
2932

3033
typedef struct opal_convertor_master_t {

opal/datatype/opal_copy_functions.c

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* and Technology (RIST). All rights reserved.
99
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
1010
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
11+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1112
* $COPYRIGHT$
1213
*
1314
* Additional copyrights may follow
@@ -41,8 +42,12 @@
4142
*/
4243
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
4344
static int copy_##TYPENAME( opal_convertor_t *pConvertor, size_t count, \
44-
char* from, size_t from_len, ptrdiff_t from_extent, \
45-
char* to, size_t to_len, ptrdiff_t to_extent, \
45+
char* from, size_t from_len, \
46+
size_t from_element_size, \
47+
int from_is_bigendian, \
48+
char* to, size_t to_len, \
49+
size_t to_element_size, \
50+
int to_is_bigendian, \
4651
ptrdiff_t *advance) \
4752
{ \
4853
size_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
@@ -61,19 +66,9 @@ static int copy_##TYPENAME( opal_convertor_t *pConvertor, size_t count,
6166
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
6267
#TYPE, count, from, from_len, to, to_len ); \
6368
\
64-
if( (from_extent == (ptrdiff_t)local_TYPE_size) && \
65-
(to_extent == (ptrdiff_t)remote_TYPE_size) ) { \
66-
/* copy of contigous data at both source and destination */ \
67-
MEMCPY( to, from, count * local_TYPE_size ); \
68-
} else { \
69-
/* source or destination are non-contigous */ \
70-
for(size_t i = 0; i < count; i++ ) { \
71-
MEMCPY( to, from, local_TYPE_size ); \
72-
to += to_extent; \
73-
from += from_extent; \
74-
} \
75-
} \
76-
*advance = count * from_extent; \
69+
MEMCPY( to, from, count * to_element_size ); \
70+
\
71+
*advance = count * from_element_size; \
7772
return count; \
7873
}
7974

@@ -93,8 +88,12 @@ static int copy_##TYPENAME( opal_convertor_t *pConvertor, size_t count,
9388
*/
9489
#define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \
9590
static size_t copy_##TYPENAME##_##COUNT( opal_convertor_t *pConvertor, size_t count, \
96-
char* from, size_t from_len, ptrdiff_t from_extent, \
97-
char* to, size_t to_len, ptrdiff_t to_extent, \
91+
char* from, size_t from_len, \
92+
size_t from_element_size, \
93+
int from_is_bigendian, \
94+
char* to, size_t to_len, \
95+
size_t to_element_size, \
96+
int to_is_bigendian, \
9897
ptrdiff_t *advance ) \
9998
{ \
10099
size_t remote_TYPE_size = (size_t)(COUNT); /* TODO */ \
@@ -112,17 +111,8 @@ static size_t copy_##TYPENAME##_##COUNT( opal_convertor_t *pConvertor, size_t co
112111
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
113112
#TYPENAME, count, from, from_len, to, to_len ); \
114113
\
115-
if( (from_extent == (ptrdiff_t)local_TYPE_size) && \
116-
(to_extent == (ptrdiff_t)remote_TYPE_size) ) { \
117-
MEMCPY( to, from, count * local_TYPE_size ); \
118-
} else { \
119-
for(size_t i = 0; i < count; i++ ) { \
120-
MEMCPY( to, from, local_TYPE_size ); \
121-
to += to_extent; \
122-
from += from_extent; \
123-
} \
124-
} \
125-
*advance = count * from_extent; \
114+
MEMCPY( to, from, count * to_element_size ); \
115+
*advance = count * from_element_size; \
126116
return count; \
127117
}
128118

@@ -281,5 +271,15 @@ conversion_fct_t opal_datatype_copy_functions[OPAL_DATATYPE_MAX_PREDEFINED] = {
281271
(conversion_fct_t)copy_long_double_complex, /* OPAL_DATATYPE_LONG_DOUBLE_COMPLEX */
282272
(conversion_fct_t)copy_bool, /* OPAL_DATATYPE_BOOL */
283273
(conversion_fct_t)copy_wchar, /* OPAL_DATATYPE_WCHAR */
274+
#if SIZEOF_LONG == 4
275+
(conversion_fct_t)copy_bytes_4, /* OPAL_DATATYPE_LONG */
276+
(conversion_fct_t)copy_bytes_4, /* OPAL_DATATYPE_ULONG */
277+
#elif SIZEOF_LONG == 8
278+
(conversion_fct_t)copy_bytes_8, /* OPAL_DATATYPE_LONG */
279+
(conversion_fct_t)copy_bytes_8, /* OPAL_DATATYPE_ULONG */
280+
#elif SIZEOF_LONG == 16
281+
(conversion_fct_t)copy_bytes_16, /* OPAL_DATATYPE_LONG */
282+
(conversion_fct_t)copy_bytes_16, /* OPAL_DATATYPE_ULONG */
283+
#endif
284284
(conversion_fct_t)NULL /* OPAL_DATATYPE_UNAVAILABLE */
285285
};

0 commit comments

Comments
 (0)