Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 37 additions & 42 deletions ompi/mca/coll/libnbc/nbc_iallreduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,14 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI
return OMPI_ERR_OUT_OF_RESOURCE;
}

alg = NBC_ARED_RING; /* default generic selection */
/* algorithm selection */
int nprocs_pof2 = opal_next_poweroftwo(p) >> 1;
if (libnbc_iallreduce_algorithm == 0) {
if(p < 4 || size*count < 65536 || !ompi_op_is_commute(op) || inplace) {
alg = NBC_ARED_BINOMIAL;
} else if (count >= nprocs_pof2 && ompi_op_is_commute(op)) {
alg = NBC_ARED_REDSCAT_ALLGATHER;
} else {
alg = NBC_ARED_RING;
}
} else {
if (libnbc_iallreduce_algorithm == 1)
Expand All @@ -131,8 +130,6 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI
alg = NBC_ARED_REDSCAT_ALLGATHER;
else if (libnbc_iallreduce_algorithm == 4)
alg = NBC_ARED_RDBL;
else
alg = NBC_ARED_RING;
}
#ifdef NBC_CACHE_SCHEDULE
/* search schedule in communicator specific tree */
Expand Down Expand Up @@ -633,38 +630,37 @@ static inline int allred_sched_recursivedoubling(int rank, int p, const void *se
return OMPI_SUCCESS;
}

static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf, MPI_Op op,
int size, int ext, NBC_Schedule *schedule, void *tmpbuf) {
static inline int
allred_sched_ring(int r, int p,
int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf,
MPI_Op op, int size, int ext, NBC_Schedule *schedule, void *tmpbuf)
{
int segsize, *segsizes, *segoffsets; /* segment sizes and offsets per segment (number of segments == number of nodes */
int speer, rpeer; /* send and recvpeer */
int speer, rpeer; /* send and recv peers */
int res = OMPI_SUCCESS;

if (count == 0) {
if (0 == count) {
return OMPI_SUCCESS;
}

segsizes = (int *) malloc (sizeof (int) * p);
segoffsets = (int *) malloc (sizeof (int) * p);
if (NULL == segsizes || NULL == segoffsets) {
free (segsizes);
free (segoffsets);
segsizes = (int *) malloc((2 * p + 1 ) *sizeof (int));
if (NULL == segsizes) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
segoffsets = segsizes + p;

segsize = (count + p - 1) / p; /* size of the segments */
segsize = count / p; /* size of the segments across the last ranks.
The remainder will be evenly distributed across the smaller ranks */

segoffsets[0] = 0;
for (int i = 0, mycount = count ; i < p ; ++i) {
mycount -= segsize;
for (int i = 0, mycount = count % p; i < p ; ++i) {
segsizes[i] = segsize;
if (mycount < 0) {
segsizes[i] = segsize + mycount;
mycount = 0;
if( mycount > 0 ) { /* We have extra segments to distribute */
segsizes[i]++;
mycount--;
}

if (i) {
segoffsets[i] = segoffsets[i-1] + segsizes[i-1];
}
segoffsets[i+1] = segoffsets[i] + segsizes[i];
}

/* reduce peers */
Expand Down Expand Up @@ -786,28 +782,29 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat
}

if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
break;
goto free_and_return;
}

res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer,
schedule, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
break;
if( recvbuf != sendbuf ) { /* check for MPI_IN_PLACE */
res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer,
schedule, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
goto free_and_return;
}
res = NBC_Sched_op ((char *) sendbuf + roffset, false, (char *) recvbuf + roffset, false,
segsizes[relement], datatype, op, schedule, true);
} else {
res = NBC_Sched_recv ((char *) tmpbuf, false, segsizes[relement], datatype, rpeer,
schedule, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
goto free_and_return;
}
res = NBC_Sched_op ((char *) tmpbuf, false, (char *) recvbuf + roffset, false,
segsizes[relement], datatype, op, schedule, true);
}

res = NBC_Sched_op ((char *) sendbuf + roffset, false, (char *) recvbuf + roffset, false,
segsizes[relement], datatype, op, schedule, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
break;
goto free_and_return;
}
}

if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
free (segsizes);
free (segoffsets);
return res;
}

for (int round = p - 1 ; round < 2 * p - 2 ; ++round) {
int selement = (r+1-round + 2*p /*2*p avoids negative mod*/)%p; /* the element I am sending */
int soffset = segoffsets[selement]*ext;
Expand All @@ -819,16 +816,14 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
break;
}

res = NBC_Sched_recv ((char *) recvbuf + roffset, false, segsizes[relement], datatype, rpeer,
schedule, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
break;
}
}

free_and_return:
free (segsizes);
free (segoffsets);

return res;
}
Expand Down