Skip to content

Commit 9a794e3

Browse files
committed
external32 fixes
* let external32 use the non-optimized datatype description so it can know what it's supposed to be packing * add another OPAL_DATATYPE_* bucket for MPI_LONG * redo the pFunctions to take more arguments, including describing a vector with nblocks/blocklen/extent to be iterated over and specifying the size and endianness of the two buffers (one being the vector, the other being a packed buffer) * VECTOR traversal macros to walk an _elem from a datatype's description[] Here's a gist for a testcase pack2.c https://gist.github.com/markalle/6d70cf8ca14761e94bce9d0240000a3e % mpicc -o x pack2.c % mpirun -np 1 ./x Without this fix if sizeof(long) is 8 and a machine is little endian an input buffer of (long)1 == [01 00 00 00 00 00 00 00] packs external32 as [00 00 00 00 00 00 00 01], using 8 bytes. But the external32 representation for MPI_LONG is supposed to be 4 bytes. I don't see much in the design to support the sizing of packed datatypes. Convertors have a remote_sizes[] array, but those are indexed by OPAL_DATATYPE_* which are buckets that contain multiple MPI datatypes. Eg MPI_LONG is OPAL_DATATYPE_INT8 (if sizeof(long) == 8), but so is MPI_INT64_T which has an external32 size of 8. So the OPAL bucket is too big and as far as I can tell the information is lost that would allow MPI_LONG to be packed to a different size. For this PR I followed the design of BOOL (which has its own OPAL_DATATYPE_ entry as opposed to mapping to OPAL_DATATYPE_INT1), and separated out another OPAL_DATATYPE_ bucket for MPI_LONG and MPI_UNSIGNED_LONG. I don't like this solution because there's no guarantee that any MPI datatype has a sizeof() that matches the size specified in external32. This PR only carves out another special case for long and unsigned long. The MPI_LONG datatype now maps to the new OPAL_DATATYPE_LONG which allows the external32 converter to set a remote_sizes[] of 4 for it. Then in the functions like copy_int8_heterogenous() that are made from pack/unpack with pFunction[], there are new arguments for from_is_bigendian and to_is_bigendian that allow conversions when from/to have different sizes. The calls are also more explicitly oriented around the concept of a vector (with a number of blocks, blocklen, and extent between blocks, like an MPI_Type-vector: this is what the elements of a datatype's description[] entries are). There's a chance that partial copies are being handled right in unpack. The VECTOR walking macroes at least are built to take an input "elements_already_done" to pick back up in the middle of a vector, and are able to stop in the middle of walking the vector too. I didn't change the loops in opal_datatype_pack.c though, and it's not currently making any effort to allow partial walking of the vector described by an _elem. A few other misc changes that went into this: - added a datatype converter arch_id flag for OPAL_ARCH_LONGIS32 and set it for the external32 convertor - changed the bit location for OPAL_ARCH_LONGIS64 because it wasn't living under the OPAL_ARCH_LONGISxx mask which appears to be how those masks are designed to work - the terminating condition in pack used to say "bConverted == local_size" but bConverted is counting bytes on the "to" side and for pack the "to" side is the remote_size. Signed-off-by: Mark Allen <[email protected]>
1 parent 1139d9e commit 9a794e3

13 files changed

+723
-201
lines changed

ompi/datatype/ompi_datatype_external32.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* Copyright (c) 2004-2006 The Regents of the University of California.
1212
* All rights reserved.
1313
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
14+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1415
* $COPYRIGHT$
1516
*
1617
* Additional copyrights may follow
@@ -71,7 +72,8 @@
7172
uint32_t ompi_datatype_external32_arch_id = OPAL_ARCH_LDEXPSIZEIS15 | OPAL_ARCH_LDMANTDIGIS113 |
7273
OPAL_ARCH_LONGDOUBLEIS128 | OPAL_ARCH_ISBIGENDIAN |
7374
OPAL_ARCH_HEADERMASK | OPAL_ARCH_HEADERMASK2 |
74-
OPAL_ARCH_BOOLIS8 | OPAL_ARCH_LOGICALIS8;
75+
OPAL_ARCH_BOOLIS8 | OPAL_ARCH_LOGICALIS8 |
76+
OPAL_ARCH_LONGIS32;
7577

7678
opal_convertor_t* ompi_mpi_external32_convertor = NULL;
7779
opal_convertor_t* ompi_mpi_local_convertor = NULL;

ompi/datatype/ompi_datatype_internal.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Copyright (c) 2015-2018 Research Organization for Information Science
1111
* and Technology (RIST). All rights reserved.
1212
* Copyright (c) 2016-2018 FUJITSU LIMITED. All rights reserved.
13+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1314
* $COPYRIGHT$
1415
*
1516
* Additional copyrights may follow
@@ -570,16 +571,8 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
570571
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED OPAL_DATATYPE_INITIALIZER_UINT8
571572
#endif
572573

573-
#if SIZEOF_LONG == 4
574-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT4
575-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT4
576-
#elif SIZEOF_LONG == 8
577-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT8
578-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT8
579-
#elif SIZEOF_LONG == 16
580-
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_INT16
581-
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_UINT16
582-
#endif
574+
#define OMPI_DATATYPE_INITIALIZER_LONG OPAL_DATATYPE_INITIALIZER_LONG
575+
#define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG OPAL_DATATYPE_INITIALIZER_ULONG
583576

584577
#if SIZEOF_LONG_LONG == 4
585578
#define OMPI_DATATYPE_INITIALIZER_LONG_LONG_INT OPAL_DATATYPE_INITIALIZER_INT4

ompi/mca/coll/hcoll/coll_hcoll_dtypes.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,19 @@ static dte_data_representation_t* ompi_datatype_2_dte_data_rep[OMPI_DATATYPE_MAX
9292
#else
9393
&DTE_ZERO,
9494
#endif
95-
&DTE_ZERO /*OPAL_DATATYPE_UNAVAILABLE 25 */
95+
96+
#if SIZEOF_LONG == 4
97+
&DTE_INT32, /*OPAL_DATATYPE_LONG 25 */
98+
&DTE_UINT32, /*OPAL_DATATYPE_ULONG 26 */
99+
#elif SIZEOF_LONG == 8
100+
&DTE_INT64, /*OPAL_DATATYPE_LONG 25 */
101+
&DTE_UINT64, /*OPAL_DATATYPE_ULONG 26 */
102+
#elif SIZEOF_LONG == 16
103+
&DTE_INT128, /*OPAL_DATATYPE_LONG 25 */
104+
&DTE_UINT128, /*OPAL_DATATYPE_ULONG 26 */
105+
#endif
106+
107+
&DTE_ZERO /*OPAL_DATATYPE_UNAVAILABLE 27 */
96108
};
97109

98110
enum {

opal/datatype/opal_convertor.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2013-2018 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
1717
* Copyright (c) 2017 Intel, Inc. All rights reserved
18+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -141,6 +142,12 @@ opal_convertor_master_t* opal_convertor_find_or_create_master( uint32_t remote_a
141142
opal_output( 0, "Unknown sizeof(bool) for the remote architecture\n" );
142143
}
143144

145+
/* Same for long */
146+
if( opal_arch_checkmask( &master->remote_arch, OPAL_ARCH_LONGIS32 ) ) {
147+
remote_sizes[OPAL_DATATYPE_LONG] = 4;
148+
remote_sizes[OPAL_DATATYPE_ULONG] = 4;
149+
}
150+
144151
/**
145152
* Now we can compute the conversion mask. For all sizes where the remote
146153
* and local architecture differ a conversion is needed. Moreover, if the
@@ -482,8 +489,24 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
482489

483490
pConvertor->remote_size = pConvertor->local_size;
484491
if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
492+
int is_send_conversion = 0;
493+
if (pConvertor->flags & CONVERTOR_SEND_CONVERSION) {
494+
// Adding to the conditions for keeping the optimized description.
495+
// Now it's only optimized if (send && contiguous &&
496+
// !something like external32 that needs conversion)
497+
//
498+
// Note, elsewhere there are similar checks that boil down to
499+
// checking that CONVERTOR_SEND_CONVERSION is on but that
500+
// HOMOGENEOUS is off. That kind of makes sense, except
501+
// OPAL_CONVERTOR_PREPARE seems to universally set HOMOGENEOUS
502+
// so I don't think that setting means what it looks like it
503+
// means, so I'm not using it.
504+
is_send_conversion = 1;
505+
}
485506
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
486-
if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) {
507+
if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS
508+
&& !is_send_conversion))
509+
{
487510
pConvertor->use_desc = &(datatype->desc);
488511
}
489512
if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {

opal/datatype/opal_convertor_internal.h

Lines changed: 233 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
88
* Copyright (c) 2017 Research Organization for Information Science
99
* and Technology (RIST). All rights reserved.
10+
* Copyright (c) 2020 IBM Corporation. All rights reserved.
1011
* $COPYRIGHT$
1112
*
1213
* Additional copyrights may follow
@@ -19,13 +20,241 @@
1920
#include "opal_config.h"
2021

2122
#include "opal/datatype/opal_convertor.h"
23+
#include "opal/datatype/opal_datatype_internal.h"
2224

2325
BEGIN_C_DECLS
2426

25-
typedef int32_t (*conversion_fct_t)( opal_convertor_t* pConvertor, uint32_t count,
26-
const void* from, size_t from_len, ptrdiff_t from_extent,
27-
void* to, size_t to_length, ptrdiff_t to_extent,
28-
ptrdiff_t *advance );
27+
#define COPY_TO_VECTOR 1
28+
#define COPY_FROM_VECTOR 2
29+
// returns elements processed in the packed_buf for this call
30+
typedef size_t (*conversion_fct_t)( opal_convertor_t* pConvertor, int mode,
31+
char* vector_buf,
32+
const ddt_elem_desc_t *vector_info,
33+
char* packed_buf,
34+
size_t packed_len,
35+
size_t packed_element_size,
36+
size_t *total_elements_done );
37+
38+
/*
39+
* The VECTOR_* macros are made to help iterate a buffer pointer over
40+
* data that resembles an MPI_Type_vector, ie it has a number of
41+
* blocks, a count_per_block, an extent_between_blocks, and an element
42+
* size.
43+
*
44+
* For example if a buffer's data were
45+
* [INT4,INT4,----,INT4,INT4,----,INT4,INT4]
46+
* the vars describing the vector would be
47+
* nblocks = 3
48+
* count_per_block = 2
49+
* extent_between_blocks = 12
50+
* element_size = 4
51+
*
52+
* The macros also have inputs for stopping part of the way through
53+
* a vector, and for picking back up in the middle of the vector.
54+
* The granularity for that is an "element" though, so you could
55+
* stop after writing 3 INT4 in the above example, then pick
56+
* back up at the next INT4, but you can't stop in the middle of
57+
* an INT4.
58+
*
59+
* Interface description
60+
* VECTOR_INIT():
61+
* arguments:
62+
* buf : starting point of a buffer whose
63+
* : data is a vector, like
64+
* : [INT4,INT4,----,INT4,INT4,----,INT4,INT4]
65+
* nblocks : vector description for number of
66+
* : contiguous chunks, in this example 3
67+
* count_per_block : vector description for number of elements
68+
* : per chunk, in this example 2
69+
* extent_between_blocks : vector description for bytes between
70+
* : blocks, in this example 12
71+
* element_size : vector description for element size,
72+
* : in this example 4 for size of the INT4
73+
* elements_already_done : allow iteration to pick up in the middle
74+
* max_elements : specify an external factor limiting the
75+
* : number of vector elements to itearte over
76+
* : (beyond the current elements_alrady_done).
77+
* : The expected use case for this is that
78+
* : we're iterating over two buffers: one
79+
* : a vector, the other a packed buf, and
80+
* : we're probably copying data from one to
81+
* : the other. So "max_elements" could be
82+
* : given as the number of elements the
83+
* : packed buffers has, so that constraint
84+
* : will then be included in deciding how
85+
* : many elements are to be iterated over
86+
* : in the vector. If no external factor like
87+
* : that is relevant, then using max_elements
88+
* : = (nblocks * count_per_block) would make
89+
* : this argument a no-op. Note that this
90+
* : arg is relative to elements_already_done,
91+
* : so if you've already done 3, and you have
92+
* : room to iterate 3 more elements, you'd
93+
* : specify max_elements=3
94+
* VECTOR_GET_NEXT_CONTIG_BLOCK():
95+
* updates vec.buf and vec.len
96+
* VECTOR_GET_NEXT_CONTIG_BLOCK_LIMITED_TO(max):
97+
* updates vec.buf and vec.len but only gives a vec.len
98+
* of at most max
99+
* VECTOR_UPDATE():
100+
* vector_iteration_state_t vec public fields;
101+
* char *buf : where to write next data
102+
* size_t len : length of next contiguous block of elements
103+
* size_t elements_done : num elements (total) iterated over
104+
* : (so if you initialize with done=3 then
105+
* : iterate over 2 then iterate over 3 more,
106+
* : done=8)
107+
* size_t max_elements : num elements till done iterating the vector
108+
*
109+
* Documentation by example:
110+
*
111+
* vector_iteration_state_t vec;
112+
* // [INT4,INT4,....,INT4,INT4,....,INT4,INT4]
113+
* int buf[] = {1, 2, -1, 3, 4, -1, 5, 6};
114+
* int i;
115+
* VECTOR_INIT(buf,
116+
* 3, 2, 12, 4, // vector description
117+
* 0, 6); // start at the beginning, no external count limitation
118+
* printf("num elements in vector: %d\n", vec.max_elements);
119+
* while (vec.elements_done < vec.max_elements) {
120+
* VECTOR_GET_NEXT_CONTIG_BLOCK();
121+
* printf("(buf offset %d) contig count %d\n",
122+
* (int)(vec.buf - (char*)buf), vec.len);
123+
* for (i=0; i<vec.len; ++i) {
124+
* printf(" vec.buf[%d] = %d\n", i, ((int*)vec.buf)[i]);
125+
* }
126+
* VECTOR_UPDATE();
127+
* printf(" total iterated over so far: %d\n", vec.elements_done);
128+
* }
129+
*
130+
* > num elements in vector: 6
131+
* > (buf offset 0) contig count 2
132+
* > vec.buf[0] = 1
133+
* > vec.buf[1] = 2
134+
* > total iterated over so far: 2
135+
* > (buf offset 12) contig count 2
136+
* > vec.buf[0] = 3
137+
* > vec.buf[1] = 4
138+
* > total iterated over so far: 4
139+
* > (buf offset 24) contig count 2
140+
* > vec.buf[0] = 5
141+
* > vec.buf[1] = 6
142+
* > total iterated over so far: 6
143+
*
144+
* VECTOR_INIT(buf,
145+
* 3, 2, 12, 4,
146+
* 3, 2); // This usage might represent walking the vector
147+
* // piecemeal putting it into a packed buffer of
148+
* // limited size. So the 3 indicates what has
149+
* // already been iterated over in the past and
150+
* // 2 is the limit for the current packed buf
151+
* printf("num elements in vector: %d, starting at %d\n",
152+
* vec.max_elements, vec.elements_done);
153+
* while (vec.elements_done < vec.max_elements) {
154+
* VECTOR_GET_NEXT_CONTIG_BLOCK();
155+
* printf("(buf offset %d) contig count %d\n",
156+
* (int)(vec.buf - (char*)buf), vec.len);
157+
* for (i=0; i<vec.len; ++i) {
158+
* printf(" vec.buf[%d] = %d\n", i, ((int*)vec.buf)[i]);
159+
* }
160+
* VECTOR_UPDATE();
161+
* printf(" total iterated over so far: %d\n", vec.elements_done);
162+
* }
163+
*
164+
* > num elements in vector: 5, starting at 3
165+
* > (buf offset 16) contig count 1
166+
* > vec.buf[0] = 4
167+
* > total iterated over so far: 4
168+
* > (buf offset 24) contig count 1
169+
* > vec.buf[0] = 5
170+
* > total iterated over so far: 5
171+
*
172+
*/
173+
174+
typedef struct {
175+
// fields intended to be used by the user:
176+
char *buf; // where to write next data
177+
size_t len; // length of next contiguous block of elements
178+
size_t elements_done; // num elements (total) iterated over
179+
// (so if you initialize with done=3 then iterate over 2
180+
// then iterate over 3 more, done=8)
181+
size_t max_elements; // num elements till done iterating the vector
182+
// internal:
183+
size_t count_per_block;
184+
size_t extent_between_blocks;
185+
size_t element_size;
186+
size_t i;
187+
size_t j;
188+
} vector_iteration_state_t;
189+
190+
#define VECTOR_INIT(buf_, \
191+
nblocks_, \
192+
count_per_block_, \
193+
extent_between_blocks_, \
194+
element_size_, \
195+
elements_already_done_, \
196+
max_elements_) \
197+
do { \
198+
vec.buf = (char*)(buf_); \
199+
vec.count_per_block = (count_per_block_); \
200+
vec.extent_between_blocks = (extent_between_blocks_); \
201+
vec.element_size = (element_size_); \
202+
vec.i = 0; \
203+
vec.j = 0; \
204+
vec.elements_done = (elements_already_done_); \
205+
\
206+
/* vec.max_elements is the min of two factors: */ \
207+
/* 1. nblocks * count_per_block */ \
208+
/* 2. what is specified as max_elements to the macro */ \
209+
/* for computing what will fit in vec.buf base on its len: */ \
210+
/* number of complete blocks : vector_len / extent_between_blocks */ \
211+
/* bytes left for the next block : vector_len % extent_between_blocks */ \
212+
vec.max_elements = (nblocks_) * (count_per_block_); \
213+
if (vec.elements_done + (max_elements_) < vec.max_elements) { \
214+
vec.max_elements = vec.elements_done + (max_elements_); \
215+
} \
216+
\
217+
if ((elements_already_done_) != 0) { \
218+
vec.i = (elements_already_done_) / (count_per_block_); \
219+
vec.j = (elements_already_done_) % (count_per_block_); \
220+
vec.buf += (vec.i * (extent_between_blocks_) + vec.j * (element_size_)); \
221+
} \
222+
} while (0);
223+
224+
#define VECTOR_GET_NEXT_CONTIG_BLOCK() \
225+
do { \
226+
vec.len = vec.count_per_block - vec.j; \
227+
if (vec.elements_done + vec.len > vec.max_elements) { \
228+
vec.len = vec.max_elements - vec.elements_done; \
229+
} \
230+
} while (0);
231+
232+
#define VECTOR_GET_NEXT_CONTIG_BLOCK_LIMITED_TO(max_) \
233+
do { \
234+
vec.len = vec.count_per_block - vec.j; \
235+
if (vec.elements_done + vec.len > vec.max_elements) { \
236+
vec.len = vec.max_elements - vec.elements_done; \
237+
} \
238+
if (vec.len > (max_)) { \
239+
vec.len = (max_); \
240+
} \
241+
} while (0);
242+
243+
#define VECTOR_UPDATE() \
244+
do { \
245+
if (vec.len == vec.count_per_block) { \
246+
vec.buf += vec.extent_between_blocks; \
247+
} else { \
248+
vec.j += vec.len; \
249+
vec.buf += (vec.len * vec.element_size); \
250+
if (vec.j == vec.count_per_block) { \
251+
vec.j = 0; \
252+
vec.buf -= (vec.count_per_block * vec.element_size); \
253+
vec.buf += vec.extent_between_blocks; \
254+
} \
255+
} \
256+
vec.elements_done += vec.len; \
257+
} while (0);
29258

30259
typedef struct opal_convertor_master_t {
31260
struct opal_convertor_master_t* next;

0 commit comments

Comments
 (0)