coll: merge barrier dissemination to the high radix dissemination
Make the optimized version of dissemination algorithm to be a special case of the high radix dissemination algorithm.
Esse commit está contido em:
@@ -421,7 +421,9 @@
|
||||
},
|
||||
"comm_hierarchy=any":
|
||||
{
|
||||
"algorithm=MPIR_Barrier_intra_dissemination":{}
|
||||
"algorithm=MPIR_Barrier_intra_k_dissemination":{
|
||||
"k=2": {}
|
||||
}
|
||||
}
|
||||
},
|
||||
"comm_type=inter":
|
||||
|
||||
@@ -53,6 +53,8 @@ int MPIC_Waitall(int numreq, MPIR_Request * requests[], MPI_Status statuses[],
|
||||
int MPIR_Reduce_local(const void *inbuf, void *inoutbuf, MPI_Aint count, MPI_Datatype datatype,
|
||||
MPI_Op op);
|
||||
|
||||
int MPIR_Barrier_intra_dissemination(MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag);
|
||||
|
||||
/* TSP auto */
|
||||
int MPIR_TSP_Iallreduce_sched_intra_tsp_auto(const void *sendbuf, void *recvbuf, MPI_Aint count,
|
||||
MPI_Datatype datatype, MPI_Op op,
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
mpi_core_sources += \
|
||||
src/mpi/coll/barrier/barrier_allcomm_nb.c \
|
||||
src/mpi/coll/barrier/barrier_intra_dissemination.c \
|
||||
src/mpi/coll/barrier/barrier_intra_k_dissemination.c \
|
||||
src/mpi/coll/barrier/barrier_intra_recexch.c \
|
||||
src/mpi/coll/barrier/barrier_intra_smp.c \
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) by Argonne National Laboratory
|
||||
* See COPYRIGHT in top-level directory
|
||||
*/
|
||||
|
||||
#include "mpiimpl.h"
|
||||
|
||||
/* Algorithm: MPI_Barrier
|
||||
*
|
||||
* We use the dissemination algorithm described in:
|
||||
* Debra Hensgen, Raphael Finkel, and Udi Manbet, "Two Algorithms for
|
||||
* Barrier Synchronization," International Journal of Parallel
|
||||
* Programming, 17(1):1-17, 1988.
|
||||
*
|
||||
* It uses ceiling(lgp) steps. In step k, 0 <= k <= (ceiling(lgp)-1),
|
||||
* process i sends to process (i + 2^k) % p and receives from process
|
||||
* (i - 2^k + p) % p.
|
||||
*/
|
||||
int MPIR_Barrier_intra_dissemination(MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag)
|
||||
{
|
||||
int size, rank, src, dst, mask, mpi_errno = MPI_SUCCESS;
|
||||
int mpi_errno_ret = MPI_SUCCESS;
|
||||
|
||||
size = comm_ptr->local_size;
|
||||
rank = comm_ptr->rank;
|
||||
|
||||
mask = 0x1;
|
||||
while (mask < size) {
|
||||
dst = (rank + mask) % size;
|
||||
src = (rank - mask + size) % size;
|
||||
mpi_errno = MPIC_Sendrecv(NULL, 0, MPI_BYTE, dst,
|
||||
MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
|
||||
src, MPIR_BARRIER_TAG, comm_ptr, MPI_STATUS_IGNORE, errflag);
|
||||
if (mpi_errno) {
|
||||
/* for communication errors, just record the error but continue */
|
||||
*errflag =
|
||||
MPIX_ERR_PROC_FAILED ==
|
||||
MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
|
||||
MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
|
||||
MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
|
||||
}
|
||||
mask <<= 1;
|
||||
}
|
||||
|
||||
if (mpi_errno_ret)
|
||||
mpi_errno = mpi_errno_ret;
|
||||
else if (*errflag != MPIR_ERR_NONE)
|
||||
MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");
|
||||
return mpi_errno;
|
||||
}
|
||||
@@ -5,6 +5,42 @@
|
||||
|
||||
#include "mpiimpl.h"
|
||||
|
||||
/* Algorithm: MPI_Barrier
|
||||
*
|
||||
* We use the dissemination algorithm described in:
|
||||
* Debra Hensgen, Raphael Finkel, and Udi Manbet, "Two Algorithms for
|
||||
* Barrier Synchronization," International Journal of Parallel
|
||||
* Programming, 17(1):1-17, 1988.
|
||||
*
|
||||
* It uses ceiling(lgp) steps. In step k, 0 <= k <= (ceiling(lgp)-1),
|
||||
* process i sends to process (i + 2^k) % p and receives from process
|
||||
* (i - 2^k + p) % p.
|
||||
*/
|
||||
int MPIR_Barrier_intra_dissemination(MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag)
|
||||
{
|
||||
int size, rank, src, dst, mask, mpi_errno = MPI_SUCCESS;
|
||||
int mpi_errno_ret = MPI_SUCCESS;
|
||||
|
||||
size = comm_ptr->local_size;
|
||||
rank = comm_ptr->rank;
|
||||
|
||||
mask = 0x1;
|
||||
while (mask < size) {
|
||||
dst = (rank + mask) % size;
|
||||
src = (rank - mask + size) % size;
|
||||
mpi_errno = MPIC_Sendrecv(NULL, 0, MPI_BYTE, dst,
|
||||
MPIR_BARRIER_TAG, NULL, 0, MPI_BYTE,
|
||||
src, MPIR_BARRIER_TAG, comm_ptr, MPI_STATUS_IGNORE, errflag);
|
||||
MPIR_ERR_COLL_CHECKANDCONT(mpi_errno, *errflag);
|
||||
mask <<= 1;
|
||||
}
|
||||
|
||||
if (mpi_errno_ret)
|
||||
mpi_errno = mpi_errno_ret;
|
||||
else if (*errflag != MPIR_ERR_NONE)
|
||||
MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");
|
||||
return mpi_errno;
|
||||
}
|
||||
|
||||
/* Algorithm: high radix dissemination
|
||||
* Similar to dissemination algorithm, but generalized with high radix k
|
||||
@@ -28,6 +64,10 @@ int MPIR_Barrier_intra_k_dissemination(MPIR_Comm * comm, int k, MPIR_Errflag_t *
|
||||
if (nranks < k)
|
||||
k = nranks;
|
||||
|
||||
if (k == 2) {
|
||||
return MPIR_Barrier_intra_dissemination(comm, errflag);
|
||||
}
|
||||
|
||||
/* If k value is greater than the maximum radix defined by MAX_RADIX macro,
|
||||
* we allocate memory for requests here. Otherwise we use the requests defined
|
||||
* in the communicator for allreduce/barrier recexch */
|
||||
|
||||
@@ -41,7 +41,6 @@
|
||||
# Some algorithm use another algorithm or use a different function name.
|
||||
|
||||
barrier-intra:
|
||||
dissemination
|
||||
k_dissemination
|
||||
extra_params: k
|
||||
cvar_params: DISSEM_KVAL
|
||||
|
||||
@@ -19,7 +19,6 @@ cvars:
|
||||
auto - Internal algorithm selection (can be overridden with MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE)
|
||||
nb - Force nonblocking algorithm
|
||||
smp - Force smp algorithm
|
||||
dissemination - Force dissemination algorithm
|
||||
k_dissemination - Force high radix dissemination algorithm
|
||||
recexch - Force recursive exchange algorithm
|
||||
|
||||
|
||||
@@ -44,7 +44,6 @@ typedef enum {
|
||||
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_intra_scattered,
|
||||
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_inter_pairwise_exchange,
|
||||
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Alltoallw_allcomm_nb,
|
||||
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_dissemination,
|
||||
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_k_dissemination,
|
||||
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_recexch,
|
||||
MPII_CSEL_CONTAINER_TYPE__ALGORITHM__MPIR_Barrier_intra_smp,
|
||||
|
||||
@@ -308,7 +308,6 @@ algorithms:
|
||||
inter-nonblocking:
|
||||
sched_bcast
|
||||
intra-blocking:
|
||||
dissemination
|
||||
k_dissemination
|
||||
.MPIR_CVAR_BARRIER_DISSEM_KVAL=2,3,4,8
|
||||
recexch
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário