coll: merge barrier dissemination to the high radix dissemination

Make the optimized version of dissemination algorithm to be a
special case of the high radix dissemination algorithm.
Esse commit está contido em:
Gengbin Zheng
2022-02-23 22:27:08 -06:00
commit 7e03fe8544
9 arquivos alterados com 45 adições e 56 exclusões
+3 -1
Ver Arquivo
@@ -421,7 +421,9 @@
},
"comm_hierarchy=any":
{
"algorithm=MPIR_Barrier_intra_dissemination":{}
"algorithm=MPIR_Barrier_intra_k_dissemination":{
"k=2": {}
}
}
},
"comm_type=inter":
+2
Ver Arquivo
@@ -53,6 +53,8 @@ int MPIC_Waitall(int numreq, MPIR_Request * requests[], MPI_Status statuses[],
int MPIR_Reduce_local(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Datatype datatype,
MPI_Op op);
int MPIR_Barrier_intra_dissemination(MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag);
/* TSP auto */
int MPIR_TSP_Iallreduce_sched_intra_tsp_auto(const void *sendbuf, void *recvbuf, MPI_Aint count,
MPI_Datatype datatype, MPI_Op op,
-1
Ver Arquivo
@@ -9,7 +9,6 @@
mpi_core_sources += \
src/mpi/coll/barrier/barrier_allcomm_nb.c \
src/mpi/coll/barrier/barrier_intra_dissemination.c \
src/mpi/coll/barrier/barrier_intra_k_dissemination.c \
src/mpi/coll/barrier/barrier_intra_recexch.c \
src/mpi/coll/barrier/barrier_intra_smp.c \
@@ -1,50 +0,0 @@
/*
* Copyright (C) by Argonne National Laboratory
* See COPYRIGHT in top-level directory
*/
#include "mpiimpl.h"
/* Algorithm: MPI_Barrier
*
* We use the dissemination algorithm described in:
* Debra Hensgen, Raphael Finkel, and Udi Manbet, "Two Algorithms for
* Barrier Synchronization," International Journal of Parallel
* Programming, 17(1):1-17, 1988.
*
* It uses ceiling(lgp) steps. In step k, 0 <= k <= (ceiling(lgp)-1),
* process i sends to process (i + 2^k) % p and receives from process
* (i - 2^k + p) % p.
*/
int MPIR_Barrier_intra_dissemination(MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag)
{
int size, rank, src, dst, mask, mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
size = comm_ptr->local_size;
rank = comm_ptr->rank;
mask = 0x1;
while (mask < size) {
dst = (rank + mask) % size;
src = (rank - mask + size) % size;
mpi_errno = MPIC_Sendrecv(NULL, 0, MPI_BYTE, dst,
MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
src, MPIR_BARRIER_TAG, comm_ptr, MPI_STATUS_IGNORE, errflag);
if (mpi_errno) {
/* for communication errors, just record the error but continue */
*errflag =
MPIX_ERR_PROC_FAILED ==
MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
}
mask <<= 1;
}
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag != MPIR_ERR_NONE)
MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");
return mpi_errno;
}
@@ -5,6 +5,42 @@
#include "mpiimpl.h"
/* Algorithm: MPI_Barrier
*
* We use the dissemination algorithm described in:
* Debra Hensgen, Raphael Finkel, and Udi Manbet, "Two Algorithms for
* Barrier Synchronization," International Journal of Parallel
* Programming, 17(1):1-17, 1988.
*
* It uses ceiling(lgp) steps. In step k, 0 <= k <= (ceiling(lgp)-1),
* process i sends to process (i + 2^k) % p and receives from process
* (i - 2^k + p) % p.
*/
int MPIR_Barrier_intra_dissemination(MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag)
{
int size, rank, src, dst, mask, mpi_errno = MPI_SUCCESS;
int mpi_errno_ret = MPI_SUCCESS;
size = comm_ptr->local_size;
rank = comm_ptr->rank;
mask = 0x1;
while (mask < size) {
dst = (rank + mask) % size;
src = (rank - mask + size) % size;
mpi_errno = MPIC_Sendrecv(NULL, 0, MPI_BYTE, dst,
MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
src, MPIR_BARRIER_TAG, comm_ptr, MPI_STATUS_IGNORE, errflag);
MPIR_ERR_COLL_CHECKANDCONT(mpi_errno, *errflag);
mask <<= 1;
}
if (mpi_errno_ret)
mpi_errno = mpi_errno_ret;
else if (*errflag != MPIR_ERR_NONE)
MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");
return mpi_errno;
}
/* Algorithm: high radix dissemination
* Similar to dissemination algorithm, but generalized with high radix k
@@ -28,6 +64,10 @@ int MPIR_Barrier_intra_k_dissemination(MPIR_Comm * comm, int k, MPIR_Errflag_t *
if (nranks < k)
k = nranks;
if (k == 2) {
return MPIR_Barrier_intra_dissemination(comm, errflag);
}
/* If k value is greater than the maximum radix defined by MAX_RADIX macro,
* we allocate memory for requests here. Otherwise we use the requests defined
* in the communicator for allreduce/barrier recexch */
-1
Ver Arquivo
@@ -41,7 +41,6 @@
# Some algorithm use another algorithm or use a different function name.
barrier-intra:
dissemination
k_dissemination
extra_params: k
cvar_params: DISSEM_KVAL
-1
Ver Arquivo
@@ -19,7 +19,6 @@ cvars:
auto - Internal algorithm selection (can be overridden with MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE)
nb - Force nonblocking algorithm
smp - Force smp algorithm
dissemination - Force dissemination algorithm
k_dissemination - Force high radix dissemination algorithm
recexch - Force recursive exchange algorithm
-1
Ver Arquivo
@@ -44,7 +44,6 @@ typedef enum {
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_scattered,
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_inter_pairwise_exchange,
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_allcomm_nb,
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_dissemination,
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination,
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch,
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_smp,
-1
Ver Arquivo
@@ -308,7 +308,6 @@ algorithms:
inter-nonblocking:
sched_bcast
intra-blocking:
dissemination
k_dissemination
.MPIR_CVAR_BARRIER_DISSEM_KVAL=2,3,4,8
recexch