|
7 | 7 | * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
8 | 8 | * Copyright (c) 2017 Research Organization for Information Science
|
9 | 9 | * and Technology (RIST). All rights reserved.
|
| 10 | + * Copyright (c) 2020 IBM Corporation. All rights reserved. |
10 | 11 | * $COPYRIGHT$
|
11 | 12 | *
|
12 | 13 | * Additional copyrights may follow
|
|
19 | 20 | #include "opal_config.h"
|
20 | 21 |
|
21 | 22 | #include "opal/datatype/opal_convertor.h"
|
| 23 | +#include "opal/datatype/opal_datatype_internal.h" |
22 | 24 |
|
23 | 25 | BEGIN_C_DECLS
|
24 | 26 |
|
25 |
| -typedef int32_t (*conversion_fct_t)( opal_convertor_t* pConvertor, uint32_t count, |
26 |
| - const void* from, size_t from_len, ptrdiff_t from_extent, |
27 |
| - void* to, size_t to_length, ptrdiff_t to_extent, |
28 |
| - ptrdiff_t *advance ); |
| 27 | +#define COPY_TO_VECTOR 1 |
| 28 | +#define COPY_FROM_VECTOR 2 |
| 29 | +/* returns elements processed in the packed_buf for this call */ |
| 30 | +typedef size_t (*conversion_fct_t)( opal_convertor_t* pConvertor, int mode, |
| 31 | + char* vector_buf, |
| 32 | + const ddt_elem_desc_t *vector_info, |
| 33 | + char* packed_buf, |
| 34 | + size_t packed_len, |
| 35 | + size_t packed_element_size, |
| 36 | + size_t *total_elements_done ); |
| 37 | + |
| 38 | +/* |
| 39 | + * The VECTOR_* macros are made to help iterate a buffer pointer over |
| 40 | + * data that resembles an MPI_Type_vector, ie it has a number of |
| 41 | + * blocks, a count_per_block, an extent_between_blocks, and an element |
| 42 | + * size. |
| 43 | + * |
| 44 | + * For example if a buffer's data were |
| 45 | + * [INT4,INT4,----,INT4,INT4,----,INT4,INT4] |
| 46 | + * the vars describing the vector would be |
| 47 | + * nblocks = 3 |
| 48 | + * count_per_block = 2 |
| 49 | + * extent_between_blocks = 12 |
| 50 | + * element_size = 4 |
| 51 | + * |
| 52 | + * The macros also have inputs for stopping part of the way through |
| 53 | + * a vector, and for picking back up in the middle of the vector. |
| 54 | + * The granularity for that is an "element" though, so you could |
| 55 | + * stop after writing 3 INT4 in the above example, then pick |
| 56 | + * back up at the next INT4, but you can't stop in the middle of |
| 57 | + * an INT4. |
| 58 | + * |
| 59 | + * Interface description |
| 60 | + * VECTOR_INIT(): |
| 61 | + * arguments: |
| 62 | + * buf : starting point of a buffer whose |
| 63 | + * : data is a vector, like |
| 64 | + * : [INT4,INT4,----,INT4,INT4,----,INT4,INT4] |
| 65 | + * nblocks : vector description for number of |
| 66 | + * : contiguous chunks, in this example 3 |
| 67 | + * count_per_block : vector description for number of elements |
| 68 | + * : per chunk, in this example 2 |
| 69 | + * extent_between_blocks : vector description for bytes between |
| 70 | + * : blocks, in this example 12 |
| 71 | + * element_size : vector description for element size, |
| 72 | + * : in this example 4 for size of the INT4 |
| 73 | + * elements_already_done : allow iteration to pick up in the middle |
| 74 | + * max_elements : specify an external factor limiting the |
| 75 | + * : number of vector elements to itearte over |
| 76 | + * : (beyond the current elements_alrady_done). |
| 77 | + * : The expected use case for this is that |
| 78 | + * : we're iterating over two buffers: one |
| 79 | + * : a vector, the other a packed buf, and |
| 80 | + * : we're probably copying data from one to |
| 81 | + * : the other. So "max_elements" could be |
| 82 | + * : given as the number of elements the |
| 83 | + * : packed buffers has, so that constraint |
| 84 | + * : will then be included in deciding how |
| 85 | + * : many elements are to be iterated over |
| 86 | + * : in the vector. If no external factor like |
| 87 | + * : that is relevant, then using max_elements |
| 88 | + * : = (nblocks * count_per_block) would make |
| 89 | + * : this argument a no-op. Note that this |
| 90 | + * : arg is relative to elements_already_done, |
| 91 | + * : so if you've already done 3, and you have |
| 92 | + * : room to iterate 3 more elements, you'd |
| 93 | + * : specify max_elements=3 |
| 94 | + * VECTOR_GET_NEXT_CONTIG_BLOCK(): |
| 95 | + * updates vec.buf and vec.len |
| 96 | + * VECTOR_GET_NEXT_CONTIG_BLOCK_LIMITED_TO(max): |
| 97 | + * updates vec.buf and vec.len but only gives a vec.len |
| 98 | + * of at most max |
| 99 | + * VECTOR_UPDATE(): |
| 100 | + * vector_iteration_state_t vec public fields; |
| 101 | + * char *buf : where to write next data |
| 102 | + * size_t len : length of next contiguous block of elements |
| 103 | + * size_t elements_done : num elements (total) iterated over |
| 104 | + * : (so if you initialize with done=3 then |
| 105 | + * : iterate over 2 then iterate over 3 more, |
| 106 | + * : done=8) |
| 107 | + * size_t max_elements : num elements till done iterating the vector |
| 108 | + * |
| 109 | + * Documentation by example: |
| 110 | + * |
| 111 | + * vector_iteration_state_t vec; |
| 112 | + * // [INT4,INT4,....,INT4,INT4,....,INT4,INT4] |
| 113 | + * int buf[] = {1, 2, -1, 3, 4, -1, 5, 6}; |
| 114 | + * int i; |
| 115 | + * VECTOR_INIT(buf, |
| 116 | + * 3, 2, 12, 4, // vector description |
| 117 | + * 0, 6); // start at the beginning, no external count limitation |
| 118 | + * printf("num elements in vector: %d\n", vec.max_elements); |
| 119 | + * while (vec.elements_done < vec.max_elements) { |
| 120 | + * VECTOR_GET_NEXT_CONTIG_BLOCK(); |
| 121 | + * printf("(buf offset %d) contig count %d\n", |
| 122 | + * (int)(vec.buf - (char*)buf), vec.len); |
| 123 | + * for (i=0; i<vec.len; ++i) { |
| 124 | + * printf(" vec.buf[%d] = %d\n", i, ((int*)vec.buf)[i]); |
| 125 | + * } |
| 126 | + * VECTOR_UPDATE(); |
| 127 | + * printf(" total iterated over so far: %d\n", vec.elements_done); |
| 128 | + * } |
| 129 | + * |
| 130 | + * > num elements in vector: 6 |
| 131 | + * > (buf offset 0) contig count 2 |
| 132 | + * > vec.buf[0] = 1 |
| 133 | + * > vec.buf[1] = 2 |
| 134 | + * > total iterated over so far: 2 |
| 135 | + * > (buf offset 12) contig count 2 |
| 136 | + * > vec.buf[0] = 3 |
| 137 | + * > vec.buf[1] = 4 |
| 138 | + * > total iterated over so far: 4 |
| 139 | + * > (buf offset 24) contig count 2 |
| 140 | + * > vec.buf[0] = 5 |
| 141 | + * > vec.buf[1] = 6 |
| 142 | + * > total iterated over so far: 6 |
| 143 | + * |
| 144 | + * VECTOR_INIT(buf, |
| 145 | + * 3, 2, 12, 4, |
| 146 | + * 3, 2); // This usage might represent walking the vector |
| 147 | + * // piecemeal putting it into a packed buffer of |
| 148 | + * // limited size. So the 3 indicates what has |
| 149 | + * // already been iterated over in the past and |
| 150 | + * // 2 is the limit for the current packed buf |
| 151 | + * printf("num elements in vector: %d, starting at %d\n", |
| 152 | + * vec.max_elements, vec.elements_done); |
| 153 | + * while (vec.elements_done < vec.max_elements) { |
| 154 | + * VECTOR_GET_NEXT_CONTIG_BLOCK(); |
| 155 | + * printf("(buf offset %d) contig count %d\n", |
| 156 | + * (int)(vec.buf - (char*)buf), vec.len); |
| 157 | + * for (i=0; i<vec.len; ++i) { |
| 158 | + * printf(" vec.buf[%d] = %d\n", i, ((int*)vec.buf)[i]); |
| 159 | + * } |
| 160 | + * VECTOR_UPDATE(); |
| 161 | + * printf(" total iterated over so far: %d\n", vec.elements_done); |
| 162 | + * } |
| 163 | + * |
| 164 | + * > num elements in vector: 5, starting at 3 |
| 165 | + * > (buf offset 16) contig count 1 |
| 166 | + * > vec.buf[0] = 4 |
| 167 | + * > total iterated over so far: 4 |
| 168 | + * > (buf offset 24) contig count 1 |
| 169 | + * > vec.buf[0] = 5 |
| 170 | + * > total iterated over so far: 5 |
| 171 | + * |
| 172 | + */ |
| 173 | + |
| 174 | +typedef struct { |
| 175 | + /* fields intended to be used by the user: */ |
| 176 | + char *buf; /* where to write next data */ |
| 177 | + size_t len; /* length of next contiguous block of elements */ |
| 178 | + size_t elements_done; /* num elements (total) iterated over */ |
| 179 | + /* (so if you initialize with done=3 then iterate over 2 */ |
| 180 | + /* then iterate over 3 more, done=8) */ |
| 181 | + size_t max_elements; /* num elements till done iterating the vector */ |
| 182 | + /* internal: */ |
| 183 | + size_t count_per_block; |
| 184 | + size_t extent_between_blocks; |
| 185 | + size_t element_size; |
| 186 | + size_t i; |
| 187 | + size_t j; |
| 188 | +} vector_iteration_state_t; |
| 189 | + |
| 190 | +#define VECTOR_INIT(buf_, \ |
| 191 | + nblocks_, \ |
| 192 | + count_per_block_, \ |
| 193 | + extent_between_blocks_, \ |
| 194 | + element_size_, \ |
| 195 | + elements_already_done_, \ |
| 196 | + max_elements_) \ |
| 197 | +do { \ |
| 198 | + vec.buf = (char*)(buf_); \ |
| 199 | + vec.count_per_block = (count_per_block_); \ |
| 200 | + vec.extent_between_blocks = (extent_between_blocks_); \ |
| 201 | + vec.element_size = (element_size_); \ |
| 202 | + vec.i = 0; \ |
| 203 | + vec.j = 0; \ |
| 204 | + vec.elements_done = (elements_already_done_); \ |
| 205 | + \ |
| 206 | + /* vec.max_elements is the min of two factors: */ \ |
| 207 | + /* 1. nblocks * count_per_block */ \ |
| 208 | + /* 2. what is specified as max_elements to the macro */ \ |
| 209 | + /* for computing what will fit in vec.buf base on its len: */ \ |
| 210 | + /* number of complete blocks : vector_len / extent_between_blocks */ \ |
| 211 | + /* bytes left for the next block : vector_len % extent_between_blocks */ \ |
| 212 | + vec.max_elements = (nblocks_) * (count_per_block_); \ |
| 213 | + if (vec.elements_done + (max_elements_) < vec.max_elements) { \ |
| 214 | + vec.max_elements = vec.elements_done + (max_elements_); \ |
| 215 | + } \ |
| 216 | + \ |
| 217 | + if ((elements_already_done_) != 0) { \ |
| 218 | + vec.i = (elements_already_done_) / (count_per_block_); \ |
| 219 | + vec.j = (elements_already_done_) % (count_per_block_); \ |
| 220 | + vec.buf += (vec.i * (extent_between_blocks_) + vec.j * (element_size_)); \ |
| 221 | + } \ |
| 222 | +} while (0); |
| 223 | + |
| 224 | +#define VECTOR_GET_NEXT_CONTIG_BLOCK() \ |
| 225 | +do { \ |
| 226 | + vec.len = vec.count_per_block - vec.j; \ |
| 227 | + if (vec.elements_done + vec.len > vec.max_elements) { \ |
| 228 | + vec.len = vec.max_elements - vec.elements_done; \ |
| 229 | + } \ |
| 230 | +} while (0); |
| 231 | + |
| 232 | +#define VECTOR_GET_NEXT_CONTIG_BLOCK_LIMITED_TO(max_) \ |
| 233 | +do { \ |
| 234 | + vec.len = vec.count_per_block - vec.j; \ |
| 235 | + if (vec.elements_done + vec.len > vec.max_elements) { \ |
| 236 | + vec.len = vec.max_elements - vec.elements_done; \ |
| 237 | + } \ |
| 238 | + if (vec.len > (max_)) { \ |
| 239 | + vec.len = (max_); \ |
| 240 | + } \ |
| 241 | +} while (0); |
| 242 | + |
| 243 | +#define VECTOR_UPDATE() \ |
| 244 | +do { \ |
| 245 | + if (vec.len == vec.count_per_block) { \ |
| 246 | + vec.buf += vec.extent_between_blocks; \ |
| 247 | + } else { \ |
| 248 | + vec.j += vec.len; \ |
| 249 | + vec.buf += (vec.len * vec.element_size); \ |
| 250 | + if (vec.j == vec.count_per_block) { \ |
| 251 | + vec.j = 0; \ |
| 252 | + vec.buf -= (vec.count_per_block * vec.element_size); \ |
| 253 | + vec.buf += vec.extent_between_blocks; \ |
| 254 | + } \ |
| 255 | + } \ |
| 256 | + vec.elements_done += vec.len; \ |
| 257 | +} while (0); |
29 | 258 |
|
30 | 259 | typedef struct opal_convertor_master_t {
|
31 | 260 | struct opal_convertor_master_t* next;
|
|
0 commit comments