misc: Replace all direct usage of MPL_GPU_POINTER_DEV
Always use MPL_gpu_attr_is_{dev,strict_dev} and the MPIR_GPU_
equivalents. The subtleties of non strict device buffer (e.g. ZE) is not
obvious. Using the attr query wrappers makes the semantics explicit.
Esse commit está contido em:
@@ -41,7 +41,8 @@ MPL_STATIC_INLINE_PREFIX void *MPIR_gpu_host_alloc(const void *buf,
|
||||
MPL_pointer_attr_t attr;
|
||||
MPIR_GPU_query_pointer_attr(buf, &attr);
|
||||
|
||||
if (attr.type != MPL_GPU_POINTER_DEV) {
|
||||
/* FIXME: do we allocate buffer for non-strict dev buffer? */
|
||||
if (!MPL_gpu_attr_is_strict_dev(&attr)) {
|
||||
return NULL;
|
||||
} else {
|
||||
return MPIR_alloc_buffer(count, datatype);
|
||||
|
||||
@@ -25,7 +25,7 @@ yaksa_type_t MPII_Typerep_get_yaksa_op(MPI_Op op);
|
||||
static inline yaksa_info_t MPII_yaksa_get_info(MPL_pointer_attr_t * inattr,
|
||||
MPL_pointer_attr_t * outattr)
|
||||
{
|
||||
if (inattr->type != MPL_GPU_POINTER_DEV && outattr->type != MPL_GPU_POINTER_DEV) {
|
||||
if (!MPL_gpu_attr_is_dev(inattr) && !MPL_gpu_attr_is_dev(outattr)) {
|
||||
return MPII_yaksa_info_nogpu;
|
||||
}
|
||||
|
||||
|
||||
@@ -134,9 +134,9 @@ static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
|
||||
MPIR_GPU_query_pointer_attr(sendbuf, &send_attr);
|
||||
MPIR_GPU_query_pointer_attr(recvbuf, &recv_attr);
|
||||
|
||||
if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&send_attr) && MPL_gpu_attr_is_strict_dev(&recv_attr)) {
|
||||
MPL_gpu_malloc((void **) &buf, COPY_BUFFER_SZ, recv_attr.device);
|
||||
} else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
} else if (MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) {
|
||||
MPL_gpu_malloc_host((void **) &buf, COPY_BUFFER_SZ);
|
||||
} else {
|
||||
MPIR_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf", MPL_MEM_BUFFER);
|
||||
@@ -179,9 +179,9 @@ static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
|
||||
}
|
||||
}
|
||||
|
||||
if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&send_attr) && MPL_gpu_attr_is_strict_dev(&recv_attr)) {
|
||||
MPL_gpu_free(buf);
|
||||
} else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
} else if (MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) {
|
||||
MPL_gpu_free_host(buf);
|
||||
}
|
||||
}
|
||||
@@ -192,9 +192,9 @@ static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
|
||||
return mpi_errno;
|
||||
fn_fail:
|
||||
if (buf) {
|
||||
if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&send_attr) && MPL_gpu_attr_is_strict_dev(&recv_attr)) {
|
||||
MPL_gpu_free(buf);
|
||||
} else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
} else if (MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) {
|
||||
MPL_gpu_free_host(buf);
|
||||
}
|
||||
}
|
||||
@@ -275,12 +275,12 @@ static int do_localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatyp
|
||||
gpu_req->type = MPIR_NULL_REQUEST;
|
||||
}
|
||||
} else {
|
||||
if (send_attr && send_attr->type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(send_attr)) {
|
||||
dev_id = MPL_gpu_get_dev_id_from_attr(send_attr);
|
||||
}
|
||||
|
||||
if (dev_id == -1) {
|
||||
if (recv_attr->type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(recv_attr)) {
|
||||
dev_id = MPL_gpu_get_dev_id_from_attr(recv_attr);
|
||||
} else {
|
||||
/* fallback to do_localcopy */
|
||||
|
||||
@@ -484,9 +484,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_am_isend_eager(int rank, MPIR_Comm * c
|
||||
|
||||
MPL_pointer_attr_t attr;
|
||||
MPIR_GPU_query_pointer_attr(buf, &attr);
|
||||
if (attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_dev(&attr)) {
|
||||
MPIDI_OFI_register_am_bufs();
|
||||
if (!MPIDI_OFI_ENABLE_HMEM) {
|
||||
if (!MPIDI_OFI_ENABLE_HMEM || !MPL_gpu_attr_is_strict_dev(&attr)) {
|
||||
/* Force packing of GPU buffer in host memory */
|
||||
need_packing = true;
|
||||
}
|
||||
@@ -641,9 +641,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_am_isend_pipeline(int rank, MPIR_Comm
|
||||
|
||||
MPL_pointer_attr_t attr;
|
||||
MPIR_GPU_query_pointer_attr(buf, &attr);
|
||||
if (attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_dev(&attr)) {
|
||||
MPIDI_OFI_register_am_bufs();
|
||||
if (!MPIDI_OFI_ENABLE_HMEM) {
|
||||
if (!MPIDI_OFI_ENABLE_HMEM || !MPL_gpu_attr_is_strict_dev(&attr)) {
|
||||
/* Force packing of GPU buffer in host memory */
|
||||
need_packing = true;
|
||||
}
|
||||
@@ -740,9 +740,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_am_isend_rdma_read(int rank, MPIR_Comm
|
||||
|
||||
MPL_pointer_attr_t attr;
|
||||
MPIR_GPU_query_pointer_attr(buf, &attr);
|
||||
if (attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_dev(&attr)) {
|
||||
MPIDI_OFI_register_am_bufs();
|
||||
if (!MPIDI_OFI_ENABLE_HMEM) {
|
||||
if (!MPIDI_OFI_ENABLE_HMEM || !MPL_gpu_attr_is_strict_dev(&attr)) {
|
||||
/* Force packing of GPU buffer in host memory */
|
||||
need_packing = true;
|
||||
}
|
||||
|
||||
@@ -705,17 +705,21 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_register_memory(char *send_buf, size_t da
|
||||
mr_attr.requested_key = rkey;
|
||||
mr_attr.offset = 0;
|
||||
mr_attr.context = NULL;
|
||||
if (MPL_gpu_attr_is_strict_dev(attr)) {
|
||||
#ifdef MPL_HAVE_CUDA
|
||||
mr_attr.iface = (attr->type != MPL_GPU_POINTER_DEV) ? FI_HMEM_SYSTEM : FI_HMEM_CUDA;
|
||||
mr_attr.device.cuda =
|
||||
(attr->type != MPL_GPU_POINTER_DEV) ? 0 : MPL_gpu_get_dev_id_from_attr(attr);
|
||||
mr_attr.iface = FI_HMEM_CUDA;
|
||||
mr_attr.device.cuda = MPL_gpu_get_dev_id_from_attr(attr);
|
||||
#elif defined MPL_HAVE_ZE
|
||||
/* OFI does not support tiles yet, need to pass the root device. */
|
||||
mr_attr.iface = (attr->type != MPL_GPU_POINTER_DEV) ? FI_HMEM_SYSTEM : FI_HMEM_ZE;
|
||||
mr_attr.device.ze =
|
||||
(attr->type !=
|
||||
MPL_GPU_POINTER_DEV) ? 0 : MPL_gpu_get_root_device(MPL_gpu_get_dev_id_from_attr(attr));
|
||||
/* OFI does not support tiles yet, need to pass the root device. */
|
||||
mr_attr.iface = FI_HMEM_ZE;
|
||||
mr_attr.device.ze = MPL_gpu_get_root_device(MPL_gpu_get_dev_id_from_attr(attr));
|
||||
#else
|
||||
/* FIXME: add support for MPL_HAVE_HIP (FI_HMEM_ROCR) */
|
||||
mr_attr.iface = FI_HMEM_SYSTEM;
|
||||
#endif
|
||||
} else {
|
||||
mr_attr.iface = FI_HMEM_SYSTEM;
|
||||
}
|
||||
MPIDI_OFI_CALL(fi_mr_regattr
|
||||
(MPIDI_OFI_global.ctx[ctx_idx].domain, &mr_attr, 0, mr), mr_regattr);
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_irecv(void *buf,
|
||||
|
||||
if (MPIDI_OFI_ENABLE_HMEM && data_sz >= MPIR_CVAR_CH4_OFI_GPU_RDMA_THRESHOLD &&
|
||||
MPIDI_OFI_ENABLE_MR_HMEM && dt_contig) {
|
||||
if (attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&attr)) {
|
||||
register_mem = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -563,7 +563,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_send(const void *buf, MPI_Aint count, MPI
|
||||
if (!MPIDI_OFI_ENABLE_HMEM) {
|
||||
/* HMEM (any kind) not supported */
|
||||
need_pack = true;
|
||||
} else if (attr.type != MPL_GPU_POINTER_DEV) {
|
||||
} else if (!MPL_gpu_attr_is_strict_dev(&attr)) {
|
||||
/* non-strict gpu ptr (ZE shared host) */
|
||||
need_pack = true;
|
||||
} else {
|
||||
|
||||
@@ -95,7 +95,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_isend(int rank,
|
||||
|
||||
MPL_pointer_attr_t attr;
|
||||
MPIR_GPU_query_pointer_attr(data, &attr);
|
||||
if (attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_dev(&attr)) {
|
||||
/* Force packing of GPU buffer in host memory */
|
||||
dt_contig = 0;
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ static void ipc_handle_free_hook(void *dptr)
|
||||
MPIR_Assert(mpl_err == MPL_SUCCESS);
|
||||
|
||||
MPIR_GPU_query_pointer_attr(pbase, &gpu_attr);
|
||||
if (gpu_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&gpu_attr)) {
|
||||
local_dev_id = MPL_gpu_get_dev_id_from_attr(&gpu_attr);
|
||||
|
||||
for (int i = 0; i < MPIR_Process.local_size; ++i) {
|
||||
|
||||
@@ -324,7 +324,7 @@ int MPIDI_GPU_get_ipc_attr(const void *buf, MPI_Aint count, MPI_Datatype datatyp
|
||||
/* if it's a device buffer, we cannot do XPMEM or CMA IPC, so set default to SKIP */
|
||||
ipc_attr->ipc_type = MPIDI_IPCI_TYPE__SKIP;
|
||||
}
|
||||
if (ipc_attr->u.gpu.gpu_attr.type != MPL_GPU_POINTER_DEV) {
|
||||
if (!MPL_gpu_attr_is_strict_dev(&ipc_attr->u.gpu.gpu_attr)) {
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
|
||||
@@ -240,15 +240,13 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast(void *buffer, MPI_Aint count,
|
||||
case MPIR_CVAR_BCAST_POSIX_INTRA_ALGORITHM_auto:
|
||||
if (MPIR_CVAR_COLL_HYBRID_MEMORY) {
|
||||
cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/* In no hybird case, local memory type can be used to select algorithm */
|
||||
MPL_pointer_attr_t pointer_attr;
|
||||
MPIR_GPU_query_pointer_attr(buffer, &pointer_attr);
|
||||
if (pointer_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&pointer_attr)) {
|
||||
cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm_gpu), coll_sig);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -203,7 +203,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_allcomm_composition_json(void *buffer,
|
||||
/* In no hybird case, local memory type can be used to select algorithm */
|
||||
MPL_pointer_attr_t pointer_attr;
|
||||
MPIR_GPU_query_pointer_attr(buffer, &pointer_attr);
|
||||
if (pointer_attr.type == MPL_GPU_POINTER_DEV) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&pointer_attr)) {
|
||||
cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm_gpu), coll_sig);
|
||||
} else {
|
||||
cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig);
|
||||
|
||||
@@ -271,7 +271,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_alpha(void *buffer, M
|
||||
|
||||
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
|
||||
|
||||
if (attr.type == MPL_GPU_POINTER_DEV && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&attr) && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
|
||||
MPIDU_genq_private_pool_alloc_cell(MPIDI_global.gpu_coll_pool, (void **) &host_buffer);
|
||||
if (host_buffer != NULL) {
|
||||
host_buffer = (char *) host_buffer - shift;
|
||||
@@ -327,7 +327,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_beta(void *buffer, MP
|
||||
|
||||
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
|
||||
|
||||
if (attr.type == MPL_GPU_POINTER_DEV && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&attr) && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
|
||||
MPIDU_genq_private_pool_alloc_cell(MPIDI_global.gpu_coll_pool, (void **) &host_buffer);
|
||||
if (host_buffer != NULL) {
|
||||
host_buffer = (char *) host_buffer - shift;
|
||||
@@ -396,7 +396,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_gamma(void *buffer, M
|
||||
|
||||
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
|
||||
|
||||
if (attr.type == MPL_GPU_POINTER_DEV && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
|
||||
if (MPL_gpu_attr_is_strict_dev(&attr) && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
|
||||
MPIDU_genq_private_pool_alloc_cell(MPIDI_global.gpu_coll_pool, (void **) &host_buffer);
|
||||
if (host_buffer != NULL) {
|
||||
host_buffer = (char *) host_buffer - shift;
|
||||
@@ -486,7 +486,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_delta(void *buffer, M
|
||||
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
|
||||
|
||||
/* only node leaders need to allocate a host buffer */
|
||||
if (attr.type == MPL_GPU_POINTER_DEV && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ
|
||||
if (MPL_gpu_attr_is_strict_dev(&attr) && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ
|
||||
&& comm->node_roots_comm != NULL) {
|
||||
MPIDU_genq_private_pool_alloc_cell(MPIDI_global.gpu_coll_pool, (void **) &host_buffer);
|
||||
if (host_buffer != NULL) {
|
||||
@@ -551,7 +551,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_alpha(const void
|
||||
|
||||
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
|
||||
|
||||
if ((send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) &&
|
||||
if ((MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) &&
|
||||
(size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ)) {
|
||||
MPIDI_Coll_host_buffer_genq_alloc(sendbuf, recvbuf, count, datatype, &host_sendbuf,
|
||||
&host_recvbuf, send_attr, recv_attr, shift);
|
||||
@@ -647,7 +647,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_beta(const void *
|
||||
|
||||
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
|
||||
|
||||
if ((send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) &&
|
||||
if ((MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) &&
|
||||
(size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ)) {
|
||||
MPIDI_Coll_host_buffer_genq_alloc(sendbuf, recvbuf, count, datatype, &host_sendbuf,
|
||||
&host_recvbuf, send_attr, recv_attr, shift);
|
||||
@@ -696,7 +696,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_gamma(const void
|
||||
|
||||
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
|
||||
|
||||
if ((send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) &&
|
||||
if ((MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) &&
|
||||
(size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ)) {
|
||||
MPIDI_Coll_host_buffer_genq_alloc(sendbuf, recvbuf, count, datatype, &host_sendbuf,
|
||||
&host_recvbuf, send_attr, recv_attr, shift);
|
||||
|
||||
@@ -21,11 +21,8 @@
|
||||
|
||||
MPL_STATIC_INLINE_PREFIX void MPIDIG_recv_set_buffer_attr(MPIR_Request * rreq)
|
||||
{
|
||||
MPL_pointer_attr_t attr;
|
||||
MPIR_GPU_query_pointer_attr(MPIDIG_REQUEST(rreq, buffer), &attr);
|
||||
|
||||
MPIDIG_rreq_async_t *p = &(MPIDIG_REQUEST(rreq, req->recv_async));
|
||||
p->is_device_buffer = (attr.type == MPL_GPU_POINTER_DEV);
|
||||
p->is_device_buffer = MPIR_GPU_query_pointer_is_dev(MPIDIG_REQUEST(rreq, buffer));
|
||||
}
|
||||
|
||||
MPL_STATIC_INLINE_PREFIX int MPIDIG_recv_check_rndv_cb(MPIR_Request * rreq)
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário