@@ -43,17 +43,16 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
4343 int i , err , rank , size ;
4444 char * wkg = NULL , * workbuf = NULL ;
4545 MPI_Status status ;
46- MPI_Aint incr , extent , lb ;
4746 MPI_Aint sextent , sgap = 0 , ssize ;
48- MPI_Aint rextent , rgap = 0 , rsize ;
47+ MPI_Aint rextent ;
4948 int total_recv = 0 ;
5049 int sg_cnt , node_cnt ;
5150 int cur_sg , root_sg ;
5251 int cur_node , root_node ;
5352 int is_base , is_local_root ;
5453 int startr , endr , inc ;
55- int startn , endn , incn ;
56- int num_nodes , node_id ;
54+ int startn , endn ;
55+ int num_nodes ;
5756 mca_coll_acoll_module_t * acoll_module = (mca_coll_acoll_module_t * ) module ;
5857 coll_acoll_reserve_mem_t * reserve_mem_gather = & (acoll_module -> reserve_mem_s );
5958
@@ -70,17 +69,13 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
7069 num_nodes = 1 ;
7170 }
7271
73- ompi_datatype_get_extent (rdtype , & lb , & extent );
74- incr = extent * (ptrdiff_t ) rcount ;
75-
76- /* Setup root for reveive */
72+ /* Setup root for receive */
7773 if (rank == root ) {
7874 ompi_datatype_type_extent (rdtype , & rextent );
79- rsize = opal_datatype_span (& rdtype -> super , (int64_t ) rcount * size , & rgap );
8075 /* Just use the recv buffer */
8176 wkg = (char * ) rbuf ;
8277 if (sbuf != MPI_IN_PLACE ) {
83- MPI_Aint root_ofst = extent * (ptrdiff_t ) (rcount * root );
78+ MPI_Aint root_ofst = rextent * (ptrdiff_t ) (rcount * root );
8479 err = ompi_datatype_sndrcv ((void * ) sbuf , scount , sdtype , wkg + (ptrdiff_t ) root_ofst ,
8580 rcount , rdtype );
8681 if (MPI_SUCCESS != err ) {
@@ -100,7 +95,7 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
10095 is_local_root = (rank % node_cnt == 0 ) && (cur_node != root_node );
10196 startn = (rank / node_cnt ) * node_cnt ;
10297
103- if (is_base || ( rank == root ) ) {
98+ if (is_base ) {
10499 int64_t buf_size = is_local_root ? (int64_t ) scount * node_cnt : (int64_t ) scount * sg_cnt ;
105100 ompi_datatype_type_extent (sdtype , & sextent );
106101 ssize = opal_datatype_span (& sdtype -> super , buf_size , & sgap );
@@ -111,7 +106,7 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
111106 return OMPI_ERR_OUT_OF_RESOURCE ;
112107 }
113108 wkg = workbuf - sgap ;
114- tmprecv = wkg + extent * (ptrdiff_t ) (rcount * (rank - startr ));
109+ tmprecv = wkg + sextent * (ptrdiff_t ) (rcount * (rank - startr ));
115110 /* local copy to workbuf */
116111 err = ompi_datatype_sndrcv ((void * ) sbuf , scount , sdtype , tmprecv , scount , sdtype );
117112 if (MPI_SUCCESS != err ) {
@@ -123,7 +118,7 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
123118 rcount = scount ;
124119 rextent = sextent ;
125120 total_recv = rcount ;
126- } else {
121+ } else if ( rank != root ) {
127122 wkg = (char * ) sbuf ;
128123 total_recv = scount ;
129124 }
@@ -141,9 +136,9 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
141136 continue ;
142137 }
143138 if (rank == root ) {
144- tmprecv = wkg + extent * (ptrdiff_t ) (rcount * i );
139+ tmprecv = wkg + rextent * (ptrdiff_t ) (rcount * i );
145140 } else {
146- tmprecv = wkg + extent * (ptrdiff_t ) (rcount * (i - startr ));
141+ tmprecv = wkg + rextent * (ptrdiff_t ) (rcount * (i - startr ));
147142 }
148143 err = MCA_PML_CALL (
149144 recv (tmprecv , rcount , rdtype , i , MCA_COLL_BASE_TAG_GATHER , comm , & status ));
@@ -161,10 +156,9 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
161156 if (endn > size ) {
162157 endn = size ;
163158 }
164- incn = (rank == root ) ? ((root != startn ) ? 0 : sg_cnt ) : sg_cnt ;
165159 if (sg_cnt < size ) {
166160 int local_root = (root_node == cur_node ) ? root : startn ;
167- for (i = startn + incn ; i < endn ; i += sg_cnt ) {
161+ for (i = startn ; i < endn ; i += sg_cnt ) {
168162 int i_sg = i / sg_cnt ;
169163 if ((rank != local_root ) && (rank == i ) && is_base ) {
170164 err = MCA_PML_CALL (send (workbuf - sgap , total_recv , sdtype , local_root ,
@@ -173,7 +167,7 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
173167 }
174168 if ((rank == local_root ) && (rank != i ) && (i_sg != root_sg )) {
175169 int recv_amt = (i + sg_cnt > size ) ? rcount * (size - i ) : rcount * sg_cnt ;
176- MPI_Aint rcv_ofst = extent * (ptrdiff_t ) (rcount * (i - startn ));
170+ MPI_Aint rcv_ofst = rextent * (ptrdiff_t ) (rcount * (i - startn ));
177171
178172 err = MCA_PML_CALL (recv (wkg + (ptrdiff_t ) rcv_ofst , recv_amt , rdtype , i ,
179173 MCA_COLL_BASE_TAG_GATHER , comm , & status ));
@@ -189,7 +183,7 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
189183 }
190184
191185 /* All local roots ranks send to root */
192- if (node_cnt < size ) {
186+ if (node_cnt < size && num_nodes > 1 ) {
193187 for (i = 0 ; i < size ; i += node_cnt ) {
194188 int i_node = i / node_cnt ;
195189 if ((rank != root ) && (rank == i ) && is_base ) {
@@ -199,7 +193,7 @@ int mca_coll_acoll_gather_intra(const void *sbuf, int scount, struct ompi_dataty
199193 }
200194 if ((rank == root ) && (rank != i ) && (i_node != root_node )) {
201195 int recv_amt = (i + node_cnt > size ) ? rcount * (size - i ) : rcount * node_cnt ;
202- MPI_Aint rcv_ofst = extent * (ptrdiff_t ) (rcount * i );
196+ MPI_Aint rcv_ofst = rextent * (ptrdiff_t ) (rcount * i );
203197
204198 err = MCA_PML_CALL (recv ((char * ) rbuf + (ptrdiff_t ) rcv_ofst , recv_amt , rdtype , i ,
205199 MCA_COLL_BASE_TAG_GATHER , comm , & status ));
0 commit comments