|
9 | 9 | * University of Stuttgart. All rights reserved.
|
10 | 10 | * Copyright (c) 2004-2005 The Regents of the University of California.
|
11 | 11 | * All rights reserved.
|
12 |
| - * Copyright (c) 2014-2017 Research Organization for Information Science |
13 |
| - * and Technology (RIST). All rights reserved. |
| 12 | + * Copyright (c) 2014-2019 Research Organization for Information Science |
| 13 | + * and Technology (RIST). All rights reserved. |
14 | 14 | * $COPYRIGHT$
|
15 | 15 | *
|
16 | 16 | * Additional copyrights may follow
|
|
26 | 26 | #include "ompi/communicator/communicator.h"
|
27 | 27 | #include "ompi/mca/coll/base/coll_tags.h"
|
28 | 28 | #include "ompi/mca/coll/base/coll_base_functions.h"
|
| 29 | +#include "ompi/mca/topo/base/base.h" |
29 | 30 | #include "ompi/mca/pml/pml.h"
|
30 | 31 | #include "coll_base_util.h"
|
31 | 32 |
|
@@ -103,3 +104,187 @@ int ompi_rounddown(int num, int factor)
|
103 | 104 | num /= factor;
|
104 | 105 | return num * factor; /* floor(num / factor) * factor */
|
105 | 106 | }
|
| 107 | + |
| 108 | +static void release_objs_callback(struct ompi_coll_base_nbc_request_t *request) { |
| 109 | + if (NULL != request->data.objs.objs[0]) { |
| 110 | + OBJ_RELEASE(request->data.objs.objs[0]); |
| 111 | + } |
| 112 | + if (NULL != request->data.objs.objs[1]) { |
| 113 | + OBJ_RELEASE(request->data.objs.objs[1]); |
| 114 | + } |
| 115 | +} |
| 116 | + |
| 117 | +static int complete_objs_callback(struct ompi_request_t *req) { |
| 118 | + struct ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; |
| 119 | + int rc = OMPI_SUCCESS; |
| 120 | + assert (NULL != request); |
| 121 | + if (NULL != request->cb.req_complete_cb) { |
| 122 | + rc = request->cb.req_complete_cb(request->req_complete_cb_data); |
| 123 | + } |
| 124 | + release_objs_callback(request); |
| 125 | + return rc; |
| 126 | +} |
| 127 | + |
| 128 | +static int free_objs_callback(struct ompi_request_t **rptr) { |
| 129 | + struct ompi_coll_base_nbc_request_t *request = *(ompi_coll_base_nbc_request_t **)rptr; |
| 130 | + int rc = OMPI_SUCCESS; |
| 131 | + if (NULL != request->cb.req_free) { |
| 132 | + rc = request->cb.req_free(rptr); |
| 133 | + } |
| 134 | + release_objs_callback(request); |
| 135 | + return rc; |
| 136 | +} |
| 137 | + |
| 138 | +int ompi_coll_base_retain_op( ompi_request_t *req, ompi_op_t *op, |
| 139 | + ompi_datatype_t *type) { |
| 140 | + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; |
| 141 | + bool retain = false; |
| 142 | + if (!ompi_op_is_intrinsic(op)) { |
| 143 | + OBJ_RETAIN(op); |
| 144 | + request->data.op.op = op; |
| 145 | + retain = true; |
| 146 | + } |
| 147 | + if (!ompi_datatype_is_predefined(type)) { |
| 148 | + OBJ_RETAIN(type); |
| 149 | + request->data.op.datatype = type; |
| 150 | + retain = true; |
| 151 | + } |
| 152 | + if (OPAL_UNLIKELY(retain)) { |
| 153 | + /* We need to consider two cases : |
| 154 | + * - non blocking collectives: |
| 155 | + * the objects can be released when MPI_Wait() completes |
| 156 | + * and we use the req_complete_cb callback |
| 157 | + * - persistent non blocking collectives: |
| 158 | + * the objects can only be released when the request is freed |
| 159 | + * (e.g. MPI_Request_free() completes) and we use req_free callback |
| 160 | + */ |
| 161 | + if (req->req_persistent) { |
| 162 | + request->cb.req_free = req->req_free; |
| 163 | + req->req_free = free_objs_callback; |
| 164 | + } else { |
| 165 | + request->cb.req_complete_cb = req->req_complete_cb; |
| 166 | + request->req_complete_cb_data = req->req_complete_cb_data; |
| 167 | + req->req_complete_cb = complete_objs_callback; |
| 168 | + req->req_complete_cb_data = request; |
| 169 | + } |
| 170 | + } |
| 171 | + return OMPI_SUCCESS; |
| 172 | +} |
| 173 | + |
| 174 | +int ompi_coll_base_retain_datatypes( ompi_request_t *req, ompi_datatype_t *stype, |
| 175 | + ompi_datatype_t *rtype) { |
| 176 | + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; |
| 177 | + bool retain = false; |
| 178 | + if (NULL != stype && !ompi_datatype_is_predefined(stype)) { |
| 179 | + OBJ_RETAIN(stype); |
| 180 | + request->data.types.stype = stype; |
| 181 | + retain = true; |
| 182 | + } |
| 183 | + if (NULL != rtype && !ompi_datatype_is_predefined(rtype)) { |
| 184 | + OBJ_RETAIN(rtype); |
| 185 | + request->data.types.rtype = rtype; |
| 186 | + retain = true; |
| 187 | + } |
| 188 | + if (OPAL_UNLIKELY(retain)) { |
| 189 | + if (req->req_persistent) { |
| 190 | + request->cb.req_free = req->req_free; |
| 191 | + req->req_free = free_objs_callback; |
| 192 | + } else { |
| 193 | + request->cb.req_complete_cb = req->req_complete_cb; |
| 194 | + request->req_complete_cb_data = req->req_complete_cb_data; |
| 195 | + req->req_complete_cb = complete_objs_callback; |
| 196 | + req->req_complete_cb_data = request; |
| 197 | + } |
| 198 | + } |
| 199 | + return OMPI_SUCCESS; |
| 200 | +} |
| 201 | + |
| 202 | +static void release_vecs_callback(ompi_coll_base_nbc_request_t *request) { |
| 203 | + ompi_communicator_t *comm = request->super.req_mpi_object.comm; |
| 204 | + int scount, rcount; |
| 205 | + if (OMPI_COMM_IS_TOPO(comm)) { |
| 206 | + (void)mca_topo_base_neighbor_count (comm, &rcount, &scount); |
| 207 | + } else { |
| 208 | + scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); |
| 209 | + } |
| 210 | + for (int i=0; i<scount; i++) { |
| 211 | + if (NULL != request->data.vecs.stypes && NULL != request->data.vecs.stypes[i]) { |
| 212 | + OMPI_DATATYPE_RELEASE(request->data.vecs.stypes[i]); |
| 213 | + } |
| 214 | + } |
| 215 | + for (int i=0; i<rcount; i++) { |
| 216 | + if (NULL != request->data.vecs.rtypes && NULL != request->data.vecs.rtypes[i]) { |
| 217 | + OMPI_DATATYPE_RELEASE(request->data.vecs.rtypes[i]); |
| 218 | + } |
| 219 | + } |
| 220 | +} |
| 221 | + |
| 222 | +static int complete_vecs_callback(struct ompi_request_t *req) { |
| 223 | + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; |
| 224 | + int rc = OMPI_SUCCESS; |
| 225 | + assert (NULL != request); |
| 226 | + if (NULL != request->cb.req_complete_cb) { |
| 227 | + rc = request->cb.req_complete_cb(request->req_complete_cb_data); |
| 228 | + } |
| 229 | + release_vecs_callback(request); |
| 230 | + return rc; |
| 231 | +} |
| 232 | + |
| 233 | +static int free_vecs_callback(struct ompi_request_t **rptr) { |
| 234 | + struct ompi_coll_base_nbc_request_t *request = *(ompi_coll_base_nbc_request_t **)rptr; |
| 235 | + int rc = OMPI_SUCCESS; |
| 236 | + if (NULL != request->cb.req_free) { |
| 237 | + rc = request->cb.req_free(rptr); |
| 238 | + } |
| 239 | + release_vecs_callback(request); |
| 240 | + return rc; |
| 241 | +} |
| 242 | + |
| 243 | +int ompi_coll_base_retain_datatypes_w( ompi_request_t *req, |
| 244 | + ompi_datatype_t *stypes[], ompi_datatype_t *rtypes[]) { |
| 245 | + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; |
| 246 | + bool retain = false; |
| 247 | + ompi_communicator_t *comm = request->super.req_mpi_object.comm; |
| 248 | + int scount, rcount; |
| 249 | + if (OMPI_COMM_IS_TOPO(comm)) { |
| 250 | + (void)mca_topo_base_neighbor_count (comm, &rcount, &scount); |
| 251 | + } else { |
| 252 | + scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); |
| 253 | + } |
| 254 | + |
| 255 | + for (int i=0; i<scount; i++) { |
| 256 | + if (NULL != stypes && NULL != stypes[i] && !ompi_datatype_is_predefined(stypes[i])) { |
| 257 | + OBJ_RETAIN(stypes[i]); |
| 258 | + retain = true; |
| 259 | + } |
| 260 | + } |
| 261 | + for (int i=0; i<rcount; i++) { |
| 262 | + if (NULL != rtypes && NULL != rtypes[i] && !ompi_datatype_is_predefined(rtypes[i])) { |
| 263 | + OBJ_RETAIN(rtypes[i]); |
| 264 | + retain = true; |
| 265 | + } |
| 266 | + } |
| 267 | + if (OPAL_UNLIKELY(retain)) { |
| 268 | + request->data.vecs.stypes = stypes; |
| 269 | + request->data.vecs.rtypes = rtypes; |
| 270 | + if (req->req_persistent) { |
| 271 | + request->cb.req_free = req->req_free; |
| 272 | + req->req_free = free_vecs_callback; |
| 273 | + } else { |
| 274 | + request->cb.req_complete_cb = req->req_complete_cb; |
| 275 | + request->req_complete_cb_data = req->req_complete_cb_data; |
| 276 | + req->req_complete_cb = complete_vecs_callback; |
| 277 | + req->req_complete_cb_data = request; |
| 278 | + } |
| 279 | + } |
| 280 | + return OMPI_SUCCESS; |
| 281 | +} |
| 282 | + |
| 283 | +static void nbc_req_cons(ompi_coll_base_nbc_request_t *req) { |
| 284 | + req->cb.req_complete_cb = NULL; |
| 285 | + req->req_complete_cb_data = NULL; |
| 286 | + req->data.objs.objs[0] = NULL; |
| 287 | + req->data.objs.objs[1] = NULL; |
| 288 | +} |
| 289 | + |
| 290 | +OBJ_CLASS_INSTANCE(ompi_coll_base_nbc_request_t, ompi_request_t, nbc_req_cons, NULL); |
0 commit comments