diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index e9c37568d93..7aecf80df50 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -541,9 +541,7 @@ static inline int allred_sched_linear(int rank, int rsize, void *sendbuf, void * } else { res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, 0, schedule); } - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } if (0 == rank) { char *rbuf, *lbuf, *buf; @@ -574,9 +572,7 @@ static inline int allred_sched_linear(int rank, int rsize, void *sendbuf, void * if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } res = NBC_Sched_op (lbuf, tmplbuf, rbuf, tmprbuf, count, datatype, op, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } + if (NBC_OK != res) { printf("Error in NBC_Sched_op() (%i)\n", res); return res; } res = NBC_Sched_barrier(schedule); if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c index 62b3bdd719d..77e096d79f0 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c @@ -7,6 +7,8 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler * @@ -16,6 +18,7 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); +static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, void* buf, int count, MPI_Datatype type, MPI_Aint ext, ptrdiff_t gap, MPI_Comm comm); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ @@ -48,10 +51,11 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty NBC_Alltoall_args *args, *found, search; #endif char *rbuf, *sbuf, inplace; - enum {NBC_A2A_LINEAR, NBC_A2A_PAIRWISE, NBC_A2A_DISS} alg; + enum {NBC_A2A_LINEAR, NBC_A2A_PAIRWISE, NBC_A2A_DISS, NBC_A2A_INPLACE} alg; NBC_Handle *handle; ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + ptrdiff_t span, gap; NBC_IN_PLACE(sendbuf, recvbuf, inplace); @@ -72,7 +76,9 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty /* algorithm selection */ a2asize = sndsize*sendcount*p; /* this number is optimized for TCP on odin.cs.indiana.edu */ - if((p <= 8) && ((a2asize < 1<<17) || (sndsize*sendcount < 1<<12))) { + if (inplace) { + alg = NBC_A2A_INPLACE; + } else if((p <= 8) && ((a2asize < 1<<17) || (sndsize*sendcount < 1<<12))) { /* just send as fast as we can if we have less than 8 peers, if the * total communicated size is smaller than 1<<17 *and* if we don't * have eager messages (msgsize < 1<<13) */ @@ -92,7 +98,11 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty } /* allocate temp buffer if we need one */ - if(alg == NBC_A2A_DISS) { + if (alg == NBC_A2A_INPLACE) { + span = opal_datatype_span(&recvtype->super, recvcount, &gap); + handle->tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { printf("Error in malloc()\n"); return NBC_OOR; } + } else if (alg == NBC_A2A_DISS) { /* only A2A_DISS needs buffers */ if(NBC_Type_intrinsic(sendtype)) { datasize = sndext*sendcount; @@ -146,13 +156,16 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty #endif /* not found - generate new schedule */ schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } + if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } res = NBC_Sched_create(schedule); if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } switch(alg) { - case NBC_A2A_LINEAR: + case NBC_A2A_INPLACE: + res = a2a_sched_inplace(rank, p, schedule, recvbuf, recvcount, recvtype, rcvext, gap, comm); + break; + case NBC_A2A_LINEAR: res = a2a_sched_linear(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); break; case NBC_A2A_DISS: @@ -224,7 +237,7 @@ int ompi_coll_libnbc_ialltoall_inter (void* sendbuf, int sendcount, MPI_Datatype if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } handle->tmpbuf=NULL; @@ -378,3 +391,48 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve return NBC_OK; } +static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, void* buf, int count, + MPI_Datatype type, MPI_Aint ext, ptrdiff_t gap, MPI_Comm comm) { + int res; + + for (int i = 1 ; i < (p+1)/2 ; i++) { + int speer = (rank + i) % p; + int rpeer = (rank + p - i) % p; + char *sbuf = (char *) buf + speer * count * ext; + char *rbuf = (char *) buf + rpeer * count * ext; + + res = NBC_Sched_copy (rbuf, false, count, type, + (void *)(-gap), true, count, type, + schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + res = NBC_Sched_send (sbuf, false , count, type, speer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_recv (rbuf, false , count, type, rpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + res = NBC_Sched_send ((void *)(-gap), true, count, type, rpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_recv (sbuf, false, count, type, speer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + if (0 == (p%2)) { + int peer = (rank + p/2) % p; + + char *tbuf = (char *) buf + peer * count * ext; + res = NBC_Sched_copy (tbuf, false, count, type, (void *)(-gap), true, count, type, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + res = NBC_Sched_send ((void *)(-gap), true , count, type, peer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_recv (tbuf, false , count, type, peer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + } + + return NBC_OK; +} diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallv.c b/ompi/mca/coll/libnbc/nbc_ialltoallv.c index 0096f30b4ca..0db774dbe96 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallv.c @@ -1,15 +1,33 @@ /* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. + * Copyright (c) 2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 The Technical University of Chemnitz. All + * rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler * */ #include "nbc_internal.h" +static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, + const int *sdispls, MPI_Aint sndext, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, + const int *rdispls, MPI_Aint rcvext, MPI_Datatype recvtype); + +static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + MPI_Aint sndext, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, const int *rdispls, + MPI_Aint rcvext, MPI_Datatype recvtype); + +static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, + void *buf, const int *counts, const int *displs, + MPI_Aint ext, MPI_Datatype type, ptrdiff_t gap); + /* an alltoallv schedule can not be cached easily because the contents * ot the recvcounts array may change, so a comparison of the address * would not be sufficient ... we simply do not cache it */ @@ -20,10 +38,11 @@ int ompi_coll_libnbc_ialltoallv(void* sendbuf, int *sendcounts, int *sdispls, MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_0_0_t *module) { - int rank, p, res, i; + int rank, p, res; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; + ptrdiff_t gap, span; NBC_Handle *handle; ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; @@ -43,7 +62,7 @@ int ompi_coll_libnbc_ialltoallv(void* sendbuf, int *sendcounts, int *sdispls, if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } handle->tmpbuf=NULL; @@ -51,32 +70,36 @@ int ompi_coll_libnbc_ialltoallv(void* sendbuf, int *sendcounts, int *sdispls, if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } /* copy data to receivbuffer */ - if((sendcounts[rank] != 0) && !inplace) { - rbuf = ((char *) recvbuf) + (rdispls[rank] * rcvext); - sbuf = ((char *) sendbuf) + (sdispls[rank] * sndext); - res = NBC_Copy(sbuf, sendcounts[rank], sendtype, rbuf, recvcounts[rank], recvtype, comm); + if (inplace) { + int count = 0; + for (int i = 0; i < p; i++) { + if (recvcounts[i] > count) { + count = recvcounts[i]; + } + } + span = opal_datatype_span(&recvtype->super, count, &gap); + handle->tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { printf("Error in malloc()\n"); return NBC_OOR; } + sendcounts = recvcounts; + sdispls = rdispls; + } else if (sendcounts[rank] != 0) { + rbuf = (char *) recvbuf + rdispls[rank] * rcvext; + sbuf = (char *) sendbuf + sdispls[rank] * sndext; + res = NBC_Copy (sbuf, sendcounts[rank], sendtype, rbuf, recvcounts[rank], recvtype, comm); if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } } - for (i = 0; i < p; i++) { - if (i == rank) { continue; } - /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } + if (inplace) { + res = a2av_sched_inplace(rank, p, schedule, recvbuf, recvcounts, + rdispls, rcvext, recvtype, gap); + } else { + res = a2av_sched_linear(rank, p, schedule, + sendbuf, sendcounts, sdispls, sndext, sendtype, + recvbuf, recvcounts, rdispls, rcvext, recvtype); } + if (OPAL_UNLIKELY(NBC_OK != res)) { return res; } - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); + res = NBC_Sched_commit (schedule); if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } res = NBC_Start(handle, schedule); @@ -94,7 +117,6 @@ int ompi_coll_libnbc_ialltoallv_inter (void* sendbuf, int *sendcounts, int *sdis int rank, res, i, rsize; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; - char *rbuf, *sbuf; NBC_Handle *handle; ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; @@ -112,7 +134,7 @@ int ompi_coll_libnbc_ialltoallv_inter (void* sendbuf, int *sendcounts, int *sdis MPI_Comm_remote_size (comm, &rsize); schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } + if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } handle->tmpbuf=NULL; @@ -122,13 +144,13 @@ int ompi_coll_libnbc_ialltoallv_inter (void* sendbuf, int *sendcounts, int *sdis for (i = 0; i < rsize; i++) { /* post all sends */ if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); + char *sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } } /* post all receives */ if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); + char *rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } } @@ -144,3 +166,122 @@ int ompi_coll_libnbc_ialltoallv_inter (void* sendbuf, int *sendcounts, int *sdis return NBC_OK; } + +static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + MPI_Aint sndext, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, const int *rdispls, + MPI_Aint rcvext, MPI_Datatype recvtype) { + int res; + + for (int i = 0 ; i < p ; ++i) { + if (i == rank) { + continue; + } + + /* post send */ + if (sendcounts[i] != 0) { + char *sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); + res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + } + + /* post receive */ + if (recvcounts[i] != 0) { + char *rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); + res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + } + } + + return NBC_OK; +} + +static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + MPI_Aint sndext, MPI_Datatype sendtype, + void *recvbuf, const int *recvcounts, const int *rdispls, + MPI_Aint rcvext, MPI_Datatype recvtype) { + int res; + + for (int i = 1 ; i < p ; ++i) { + int sndpeer = (rank + i) % p; + int rcvpeer = (rank + p - i) %p; + + /* post send */ + if (sendcounts[sndpeer] != 0) { + char *sbuf = ((char *) sendbuf) + (sdispls[sndpeer] * sndext); + res = NBC_Sched_send(sbuf, false, sendcounts[sndpeer], sendtype, sndpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + } + + /* post receive */ + if (recvcounts[rcvpeer] != 0) { + char *rbuf = ((char *) recvbuf) + (rdispls[rcvpeer] * rcvext); + res = NBC_Sched_recv(rbuf, false, recvcounts[rcvpeer], recvtype, rcvpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + } + + return NBC_OK; +} + +static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, + void *buf, const int *counts, const int *displs, + MPI_Aint ext, MPI_Datatype type, ptrdiff_t gap) { + int res; + + for (int i = 1; i < (p+1)/2; i++) { + int speer = (rank + i) % p; + int rpeer = (rank + p - i) % p; + char *sbuf = (char *) buf + displs[speer] * ext; + char *rbuf = (char *) buf + displs[rpeer] * ext; + + if (0 != counts[rpeer]) { + res = NBC_Sched_copy (rbuf, false, counts[rpeer], type, (void *)(-gap), true, counts[rpeer], type, schedule); + if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + if (0 != counts[speer]) { + res = NBC_Sched_send (sbuf, false , counts[speer], type, speer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + } + if (0 != counts[rpeer]) { + res = NBC_Sched_recv (rbuf, false , counts[rpeer], type, rpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + + if (0 != counts[rpeer]) { + res = NBC_Sched_send ((void *)(-gap), true, counts[rpeer], type, rpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + } + if (0 != counts[speer]) { + res = NBC_Sched_recv (sbuf, false, counts[speer], type, speer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + } + if (0 == (p%2)) { + int peer = (rank + p/2) % p; + + char *tbuf = (char *) buf + displs[peer] * ext; + res = NBC_Sched_copy (tbuf, false, counts[peer], type, (void *)(-gap), true, counts[peer], type, schedule); + if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + res = NBC_Sched_send ((void *)(-gap), true , counts[peer], type, peer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_recv (tbuf, false , counts[peer], type, peer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + + return NBC_OK; +} diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallw.c b/ompi/mca/coll/libnbc/nbc_ialltoallw.c index 4927f614823..7627108bd26 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallw.c @@ -1,15 +1,33 @@ /* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. + * Copyright (c) 2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 The Technical University of Chemnitz. All + * rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler * */ #include "nbc_internal.h" +static inline int a2aw_sched_linear(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const * sendtypes, + void *recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const * recvtypes); + +static inline int a2aw_sched_pairwise(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const * sendtypes, + void *recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const * recvtypes); + +static inline int a2aw_sched_inplace(int rank, int p, NBC_Schedule *schedule, + void *buf, const int *counts, const int *displs, + struct ompi_datatype_t * const * types); + /* an alltoallw schedule can not be cached easily because the contents * ot the recvcounts array may change, so a comparison of the address * would not be sufficient ... we simply do not cache it */ @@ -20,9 +38,10 @@ int ompi_coll_libnbc_ialltoallw(void* sendbuf, int *sendcounts, int *sdispls, MPI_Datatype recvtypes[], struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_0_0_t *module) { - int rank, p, res, i; + int rank, p, res; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; + ptrdiff_t span=0; NBC_Handle *handle; ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; @@ -46,32 +65,37 @@ int ompi_coll_libnbc_ialltoallw(void* sendbuf, int *sendcounts, int *sdispls, if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } /* copy data to receivbuffer */ - if((sendcounts[rank] != 0) && !inplace) { - rbuf = ((char *) recvbuf) + rdispls[rank]; - sbuf = ((char *) sendbuf) + sdispls[rank]; + if (inplace) { + ptrdiff_t lgap, lspan; + for (int i = 0; i < p; i++) { + lspan = opal_datatype_span(&recvtypes[i]->super, recvcounts[i], &lgap); + if (lspan > span) { + span = lspan; + } + } + handle->tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { printf("Error in malloc()\n"); return NBC_OOR; } + sendcounts = recvcounts; + sdispls = rdispls; + sendtypes = recvtypes; + } else if (sendcounts[rank] != 0) { + rbuf = (char *) recvbuf + rdispls[rank]; + sbuf = (char *) sendbuf + sdispls[rank]; res = NBC_Copy(sbuf, sendcounts[rank], sendtypes[rank], rbuf, recvcounts[rank], recvtypes[rank], comm); if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } } - for (i = 0; i < p; i++) { - if (i == rank) { continue; } - /* post all sends */ - if(sendcounts[i] != 0) { - sbuf = ((char *) sendbuf) + sdispls[i]; - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - /* post all receives */ - if(recvcounts[i] != 0) { - rbuf = ((char *) recvbuf) + rdispls[i]; - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtypes[i], i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } + if (inplace) { + res = a2aw_sched_inplace(rank, p, schedule, recvbuf, + recvcounts, rdispls, recvtypes); + } else { + res = a2aw_sched_linear(rank, p, schedule, + sendbuf, sendcounts, sdispls, sendtypes, + recvbuf, recvcounts, rdispls, recvtypes); } + if (OPAL_UNLIKELY(NBC_OK != res)) { return res; } - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); + res = NBC_Sched_commit (schedule); if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } res = NBC_Start(handle, schedule); @@ -134,3 +158,123 @@ int ompi_coll_libnbc_ialltoallw_inter (void* sendbuf, int *sendcounts, int *sdis return NBC_OK; } + +static inline int a2aw_sched_linear(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const * sendtypes, + void *recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const * recvtypes) { + int res; + + for (int i = 0; i < p; i++) { + if (i == rank) { + continue; + } + + /* post send */ + if (sendcounts[i] != 0) { + char *sbuf = (char *) sendbuf + sdispls[i]; + res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtypes[i], i, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + } + /* post receive */ + if (recvcounts[i] != 0) { + char *rbuf = (char *) recvbuf + rdispls[i]; + res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtypes[i], i, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + } + } + + return NBC_OK; +} + +static inline int a2aw_sched_pairwise(int rank, int p, NBC_Schedule *schedule, + const void *sendbuf, const int *sendcounts, const int *sdispls, + struct ompi_datatype_t * const * sendtypes, + void *recvbuf, const int *recvcounts, const int *rdispls, + struct ompi_datatype_t * const * recvtypes) { + int res; + + for (int i = 1; i < p; i++) { + int sndpeer = (rank + i) % p; + int rcvpeer = (rank + p - i) % p; + + /* post send */ + if (sendcounts[sndpeer] != 0) { + char *sbuf = (char *) sendbuf + sdispls[sndpeer]; + res = NBC_Sched_send (sbuf, false, sendcounts[sndpeer], sendtypes[sndpeer], sndpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + } + /* post receive */ + if (recvcounts[rcvpeer] != 0) { + char *rbuf = (char *) recvbuf + rdispls[rcvpeer]; + res = NBC_Sched_recv (rbuf, false, recvcounts[rcvpeer], recvtypes[rcvpeer], rcvpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + } + + return NBC_OK; +} + +static inline int a2aw_sched_inplace(int rank, int p, NBC_Schedule *schedule, + void *buf, const int *counts, const int *displs, + struct ompi_datatype_t * const * types) { + ptrdiff_t gap; + int res; + + for (int i = 1; i < (p+1)/2; i++) { + int speer = (rank + i) % p; + int rpeer = (rank + p - i) % p; + char *sbuf = (char *) buf + displs[speer]; + char *rbuf = (char *) buf + displs[rpeer]; + + if (0 != counts[rpeer]) { + (void)opal_datatype_span(&types[rpeer]->super, counts[rpeer], &gap); + res = NBC_Sched_copy (rbuf, false, counts[rpeer], types[rpeer], (void *)(-gap), true, counts[rpeer], types[rpeer], schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + if (0 != counts[speer]) { + res = NBC_Sched_send (sbuf, false , counts[speer], types[speer], speer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + } + if (0 != counts[rpeer]) { + res = NBC_Sched_recv (rbuf, false , counts[rpeer], types[rpeer], rpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + + if (0 != counts[rpeer]) { + res = NBC_Sched_send ((void *)(-gap), true, counts[rpeer], types[rpeer], rpeer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + } + if (0 != counts[speer]) { + res = NBC_Sched_recv (sbuf, false, counts[speer], types[speer], speer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + } + if (0 == (p%2)) { + int peer = (rank + p/2) % p; + + char *tbuf = (char *) buf + displs[peer]; + (void)opal_datatype_span(&types[peer]->super, counts[peer], &gap); + res = NBC_Sched_copy (tbuf, false, counts[peer], types[peer], (void *)(-gap), true, counts[peer], types[peer], schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + res = NBC_Sched_send ((void *)(-gap), true , counts[peer], types[peer], peer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } + res = NBC_Sched_recv (tbuf, false , counts[peer], types[peer], peer, schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } + res = NBC_Sched_barrier(schedule); + if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; } + } + + return NBC_OK; +} diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index c7223b731df..2873458b399 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -515,7 +515,7 @@ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void * } if (0 == size) { - return OMPI_SUCCESS; + return NBC_OK; } packbuf = malloc(size); if (NULL == packbuf) { printf("Error in malloc()\n"); return res; } diff --git a/ompi/mpi/c/ialltoall.c b/ompi/mpi/c/ialltoall.c index 9cde56a6ec7..5d08eeb50e4 100644 --- a/ompi/mpi/c/ialltoall.c +++ b/ompi/mpi/c/ialltoall.c @@ -14,7 +14,7 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -74,11 +74,6 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, } else if (MPI_IN_PLACE == recvbuf) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME); - } else if (MPI_IN_PLACE == sendbuf) { - /* MPI_IN_PLACE is not fully implemented yet, - return MPI_ERR_INTERN for now */ - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, - FUNC_NAME); } else { OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcount); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); diff --git a/ompi/mpi/c/ialltoallv.c b/ompi/mpi/c/ialltoallv.c index b10e7dceea8..f3569059846 100644 --- a/ompi/mpi/c/ialltoallv.c +++ b/ompi/mpi/c/ialltoallv.c @@ -13,7 +13,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -101,11 +101,6 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl (NULL == recvcounts) || (NULL == rdispls) || MPI_IN_PLACE == recvbuf) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); - } else if (MPI_IN_PLACE == sendbuf) { - /* MPI_IN_PLACE is not fully implemented yet, - return MPI_ERR_INTERN for now */ - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, - FUNC_NAME); } size = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); diff --git a/ompi/mpi/c/ialltoallw.c b/ompi/mpi/c/ialltoallw.c index 43da873a0b9..011dc628afe 100644 --- a/ompi/mpi/c/ialltoallw.c +++ b/ompi/mpi/c/ialltoallw.c @@ -13,7 +13,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -97,11 +97,6 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl (NULL == recvcounts) || (NULL == rdispls) || (NULL == recvtypes) || MPI_IN_PLACE == recvbuf) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); - } else if (MPI_IN_PLACE == sendbuf) { - /* MPI_IN_PLACE is not fully implemented yet, - return MPI_ERR_INTERN for now */ - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INTERN, - FUNC_NAME); } size = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm);