Skip to content

Commit fc06a8d

Browse files
committed
external32 fixes
* let external32 use the non-optimized datatype description so it can know what it's supposed to be packing * add another OPAL_DATATYPE_* bucket for MPI_LONG * redo the pFunctions to take more arguments, including describing a vector with nblocks/blocklen/extent to be iterated over and specifying the size and endianness of the two buffers (one being the vector, the other being a packed buffer) * VECTOR traversal macros to walk an _elem from a datatype's description[] Here's a gist for a testcase pack2.c https://gist.github.com/markalle/6d70cf8ca14761e94bce9d0240000a3e % mpicc -o x pack2.c % mpirun -np 1 ./x Without this fix if sizeof(long) is 8 and a machine is little endian an input buffer of (long)1 == [01 00 00 00 00 00 00 00] packs external32 as [00 00 00 00 00 00 00 01], using 8 bytes. But the external32 representation for MPI_LONG is supposed to be 4 bytes. I don't see much in the design to support the sizing of packed datatypes. Convertors have a remote_sizes[] array, but those are indexed by OPAL_DATATYPE_* which are buckets that contain multiple MPI datatypes. Eg MPI_LONG is OPAL_DATATYPE_INT8 (if sizeof(long) == 8), but so is MPI_INT64_T which has an external32 size of 8. So the OPAL bucket is too big and as far as I can tell the information is lost that would allow MPI_LONG to be packed to a different size. For this PR I followed the design of BOOL (which has its own OPAL_DATATYPE_ entry as opposed to mapping to OPAL_DATATYPE_INT1), and separated out another OPAL_DATATYPE_ bucket for MPI_LONG and MPI_UNSIGNED_LONG. I don't like this solution because there's no guarantee that any MPI datatype has a sizeof() that matches the size specified in external32. This PR only carves out another special case for long and unsigned long. The MPI_LONG datatype now maps to the new OPAL_DATATYPE_LONG which allows the external32 converter to set a remote_sizes[] of 4 for it. Then in the functions like copy_int8_heterogenous() that are made from pack/unpack with pFunction[], there are new arguments for from_is_bigendian and to_is_bigendian that allow conversions when from/to have different sizes. The calls are also more explicitly oriented around the concept of a vector (with a number of blocks, blocklen, and extent between blocks, like an MPI_Type-vector: this is what the elements of a datatype's description[] entries are). There's a chance that partial copies are being handled right in unpack. The VECTOR walking macroes at least are built to take an input "elements_already_done" to pick back up in the middle of a vector, and are able to stop in the middle of walking the vector too. I didn't change the loops in opal_datatype_pack.c though, and it's not currently making any effort to allow partial walking of the vector described by an _elem. A few other misc changes that went into this: - added a datatype converter arch_id flag for OPAL_ARCH_LONGIS32 and set it for the external32 convertor - changed the bit location for OPAL_ARCH_LONGIS64 because it wasn't living under the OPAL_ARCH_LONGISxx mask which appears to be how those masks are designed to work - the terminating condition in pack used to say "bConverted == local_size" but bConverted is counting bytes on the "to" side and for pack the "to" side is the remote_size. Signed-off-by: Mark Allen <[email protected]>
1 parent 963d82a commit fc06a8d

13 files changed

+622
-200
lines changed

ompi/datatype/ompi_datatype_external32.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* Copyright (c) 2004-2006 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
14+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1415
* $COPYRIGHT$
1516
*
1617
* Additional copyrights may follow
@@ -71,7 +72,8 @@
7172
uint32_t ompi_datatype_external32_arch_id = OPAL_ARCH_LDEXPSIZEIS15 | OPAL_ARCH_LDMANTDIGIS113 |
7273
OPAL_ARCH_LONGDOUBLEIS128 | OPAL_ARCH_ISBIGENDIAN |
7374
OPAL_ARCH_HEADERMASK | OPAL_ARCH_HEADERMASK2 |
74-
OPAL_ARCH_BOOLIS8 | OPAL_ARCH_LOGICALIS8;
75+
OPAL_ARCH_BOOLIS8 | OPAL_ARCH_LOGICALIS8 |
76+
OPAL_ARCH_LONGIS32;
7577

7678
opal_convertor_t* ompi_mpi_external32_convertor = NULL;
7779
opal_convertor_t* ompi_mpi_local_convertor = NULL;

ompi/datatype/ompi_datatype_internal.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Copyright (c) 2015-2018 Research Organization for Information Science
1111
* and Technology (RIST). All rights reserved.
1212
* Copyright (c) 2016-2018 FUJITSU LIMITED. All rights reserved.
13+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1314
* $COPYRIGHT$
1415
*
1516
* Additional copyrights may follow
@@ -570,16 +571,8 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
570571
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED OPAL_DATATYPE_INITIALIZER_UINT8
571572
#endif
572573

573-
#if SIZEOF_LONG == 4
574-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT4
575-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT4
576-
#elif SIZEOF_LONG == 8
577-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT8
578-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT8
579-
#elif SIZEOF_LONG == 16
580-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT16
581-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT16
582-
#endif
574+
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_LONG
575+
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_ULONG
583576

584577
#if SIZEOF_LONG_LONG == 4
585578
#define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INITIALIZER_INT4

ompi/mca/coll/hcoll/coll_hcoll_dtypes.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,19 @@ static dte_data_representation_t* ompi_datatype_2_dte_data_rep[OMPI_DATATYPE_MAX
9292
#else
9393
&DTE_ZERO,
9494
#endif
95-
&DTE_ZERO /*OPAL_DATATYPE_UNAVAILABLE 25 */
95+
96+
#if SIZEOF_LONG == 4
97+
&DTE_INT32, /*OPAL_DATATYPE_LONG 25 */
98+
&DTE_UINT32, /*OPAL_DATATYPE_ULONG 26 */
99+
#elif SIZEOF_LONG == 8
100+
&DTE_INT64, /*OPAL_DATATYPE_LONG 25 */
101+
&DTE_UINT64, /*OPAL_DATATYPE_ULONG 26 */
102+
#elif SIZEOF_LONG == 16
103+
&DTE_INT128, /*OPAL_DATATYPE_LONG 25 */
104+
&DTE_UINT128, /*OPAL_DATATYPE_ULONG 26 */
105+
#endif
106+
107+
&DTE_ZERO /*OPAL_DATATYPE_UNAVAILABLE 27 */
96108
};
97109

98110
enum {

opal/datatype/opal_convertor.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2013-2018 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
1717
* Copyright (c) 2017 Intel, Inc. All rights reserved
18+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -141,6 +142,12 @@ opal_convertor_master_t* opal_convertor_find_or_create_master( uint32_t remote_a
141142
opal_output( 0, "Unknown sizeof(bool) for the remote architecture\n" );
142143
}
143144

145+
/* Same for long */
146+
if( opal_arch_checkmask( &master->remote_arch, OPAL_ARCH_LONGIS32 ) ) {
147+
remote_sizes[OPAL_DATATYPE_LONG] = 4;
148+
remote_sizes[OPAL_DATATYPE_ULONG] = 4;
149+
}
150+
144151
/**
145152
* Now we can compute the conversion mask. For all sizes where the remote
146153
* and local architecture differ a conversion is needed. Moreover, if the
@@ -482,8 +489,24 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
482489

483490
pConvertor->remote_size = pConvertor->local_size;
484491
if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
492+
int is_send_conversion = 0;
493+
if (pConvertor->flags & CONVERTOR_SEND_CONVERSION) {
494+
// Adding to the conditions for keeping the optimized description.
495+
// Now it's only optimized if (send && contiguous &&
496+
// !something like external32 that needs conversion)
497+
//
498+
// Note, elsewhere there are similar checks that boil down to
499+
// checking that CONVERTOR_SEND_CONVERSION is on but that
500+
// HOMOGENEOUS is off. That kind of makes sense, except
501+
// OPAL_CONVERTOR_PREPARE seems to universally set HOMOGENEOUS
502+
// so I don't think that setting means what it looks like it
503+
// means, so I'm not using it.
504+
is_send_conversion = 1;
505+
}
485506
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
486-
if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) {
507+
if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS
508+
&& !is_send_conversion))
509+
{
487510
pConvertor->use_desc = &(datatype->desc);
488511
}
489512
if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {

opal/datatype/opal_convertor_internal.h

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
88
* Copyright (c) 2017 Research Organization for Information Science
99
* and Technology (RIST). All rights reserved.
10+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1011
* $COPYRIGHT$
1112
*
1213
* Additional copyrights may follow
@@ -22,10 +23,98 @@
2223

2324
BEGIN_C_DECLS
2425

25-
typedef int32_t (*conversion_fct_t)( opal_convertor_t* pConvertor, uint32_t count,
26-
const void* from, size_t from_len, ptrdiff_t from_extent,
27-
void* to, size_t to_length, ptrdiff_t to_extent,
28-
ptrdiff_t *advance );
26+
#define COPY_TO_VECTOR 1
27+
#define COPY_FROM_VECTOR 2
28+
typedef size_t (*conversion_fct_t)( opal_convertor_t* pConvertor, int mode,
29+
char* vector_buf,
30+
size_t vector_len,
31+
size_t nblocks,
32+
size_t count_per_block,
33+
size_t extent_between_blocks,
34+
size_t vector_element_size,
35+
int vector_is_bigendian,
36+
char* packed_buf,
37+
size_t packed_len,
38+
size_t packed_element_size,
39+
int packed_is_bigendian,
40+
size_t *elements_done );
41+
42+
typedef struct {
43+
char *buf; // where to write next data
44+
size_t count_per_block;
45+
size_t extent_between_blocks;
46+
size_t element_size;
47+
size_t i;
48+
size_t j;
49+
size_t elements_done;
50+
size_t max_elements;
51+
} vector_iteration_state_t;
52+
53+
#define VECTOR_INIT(buf_, \
54+
len_, \
55+
nblocks_, \
56+
count_per_block_, \
57+
extent_between_blocks_, \
58+
element_size_, \
59+
elements_already_done_, \
60+
max_elements_) \
61+
do { \
62+
vec.buf = buf_; \
63+
vec.count_per_block = count_per_block_; \
64+
vec.extent_between_blocks = extent_between_blocks_; \
65+
vec.element_size = element_size_; \
66+
vec.i = 0; \
67+
vec.j = 0; \
68+
vec.elements_done = 0; \
69+
\
70+
/* vec.max_elements is the min of several factors: */ \
71+
/* 1. what will fit in vec.buf based on its vec.len */ \
72+
/* 2. nblocks * count_per_block */ \
73+
/* 3. what is specified as max_elements to the macro */ \
74+
/* for computing what will fit in vec.buf base on its len: */ \
75+
/* number of complete blocks : vector_len / extent_between_blocks */ \
76+
/* bytes left for the next block : vector_len % extent_between_blocks */ \
77+
vec.max_elements = ((len_) / (extent_between_blocks_)) * (count_per_block_) \
78+
+ ((len_) % (extent_between_blocks_)) / (element_size_); \
79+
if ((nblocks_) * (count_per_block_) < vec.max_elements) { \
80+
vec.max_elements = (nblocks_) * (count_per_block_); \
81+
} \
82+
if ((max_elements_) < vec.max_elements) { \
83+
vec.max_elements = (max_elements_); \
84+
} \
85+
\
86+
if (elements_already_done_ != 0) { \
87+
vec.i = elements_already_done_ / count_per_block_; \
88+
vec.j = elements_already_done_ % count_per_block_; \
89+
vec.buf += (vec.i * extent_between_blocks_ + vec.j * element_size_); \
90+
vec.elements_done = elements_already_done_; \
91+
} \
92+
} while (0);
93+
94+
#define VECTOR_GET_NEXT_CONTIG_BLOCK(elements_for_this_block) \
95+
do { \
96+
elements_for_this_block = vec.count_per_block - vec.j; \
97+
/*printf("elements done so far: %d, j=%d\n", vec.elements_done, vec.j);*/ \
98+
if (vec.elements_done + elements_for_this_block > vec.max_elements) { \
99+
elements_for_this_block = vec.max_elements - vec.elements_done; \
100+
} \
101+
} while (0);
102+
103+
#define VECTOR_UPDATE(elements_for_this_block) \
104+
do { \
105+
if (elements_for_this_block == vec.count_per_block) { \
106+
vec.buf += vec.extent_between_blocks; \
107+
} else { \
108+
vec.j += elements_for_this_block; \
109+
vec.buf += (elements_for_this_block * vec.element_size); \
110+
if (vec.j == vec.count_per_block) { \
111+
vec.j = 0; \
112+
vec.buf -= (vec.count_per_block * vec.element_size); \
113+
vec.buf += vec.extent_between_blocks; \
114+
} \
115+
} \
116+
vec.elements_done += elements_for_this_block; \
117+
} while (0);
29118

30119
typedef struct opal_convertor_master_t {
31120
struct opal_convertor_master_t* next;

0 commit comments

Comments
 (0)