misc: Replace all direct usage of MPL_GPU_POINTER_DEV

Always use MPL_gpu_attr_is_{dev,strict_dev} and the MPIR_GPU_
equivalents. The subtleties of non strict device buffer (e.g. ZE) is not
obvious. Using the attr query wrappers makes the semantics explicit.
Esse commit está contido em:
Hui Zhou
2024-09-30 17:41:06 -05:00
commit a372db3fe6
14 arquivos alterados com 46 adições e 46 exclusões
+2 -1
Ver Arquivo
@@ -41,7 +41,8 @@ MPL_STATIC_INLINE_PREFIX void *MPIR_gpu_host_alloc(const void *buf,
MPL_pointer_attr_t attr;
MPIR_GPU_query_pointer_attr(buf, &attr);
if (attr.type != MPL_GPU_POINTER_DEV) {
/* FIXME: do we allocate buffer for non-strict dev buffer? */
if (!MPL_gpu_attr_is_strict_dev(&attr)) {
return NULL;
} else {
return MPIR_alloc_buffer(count, datatype);
+1 -1
Ver Arquivo
@@ -25,7 +25,7 @@ yaksa_type_t MPII_Typerep_get_yaksa_op(MPI_Op op);
static inline yaksa_info_t MPII_yaksa_get_info(MPL_pointer_attr_t * inattr,
MPL_pointer_attr_t * outattr)
{
if (inattr->type != MPL_GPU_POINTER_DEV && outattr->type != MPL_GPU_POINTER_DEV) {
if (!MPL_gpu_attr_is_dev(inattr) && !MPL_gpu_attr_is_dev(outattr)) {
return MPII_yaksa_info_nogpu;
}
+8 -8
Ver Arquivo
@@ -134,9 +134,9 @@ static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
MPIR_GPU_query_pointer_attr(sendbuf, &send_attr);
MPIR_GPU_query_pointer_attr(recvbuf, &recv_attr);
if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(&send_attr) && MPL_gpu_attr_is_strict_dev(&recv_attr)) {
MPL_gpu_malloc((void **) &buf, COPY_BUFFER_SZ, recv_attr.device);
} else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
} else if (MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) {
MPL_gpu_malloc_host((void **) &buf, COPY_BUFFER_SZ);
} else {
MPIR_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf", MPL_MEM_BUFFER);
@@ -179,9 +179,9 @@ static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
}
}
if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(&send_attr) && MPL_gpu_attr_is_strict_dev(&recv_attr)) {
MPL_gpu_free(buf);
} else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
} else if (MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) {
MPL_gpu_free_host(buf);
}
}
@@ -192,9 +192,9 @@ static int do_localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
return mpi_errno;
fn_fail:
if (buf) {
if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(&send_attr) && MPL_gpu_attr_is_strict_dev(&recv_attr)) {
MPL_gpu_free(buf);
} else if (send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) {
} else if (MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) {
MPL_gpu_free_host(buf);
}
}
@@ -275,12 +275,12 @@ static int do_localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatyp
gpu_req->type = MPIR_NULL_REQUEST;
}
} else {
if (send_attr && send_attr->type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(send_attr)) {
dev_id = MPL_gpu_get_dev_id_from_attr(send_attr);
}
if (dev_id == -1) {
if (recv_attr->type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(recv_attr)) {
dev_id = MPL_gpu_get_dev_id_from_attr(recv_attr);
} else {
/* fallback to do_localcopy */
+6 -6
Ver Arquivo
@@ -484,9 +484,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_am_isend_eager(int rank, MPIR_Comm * c
MPL_pointer_attr_t attr;
MPIR_GPU_query_pointer_attr(buf, &attr);
if (attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_dev(&attr)) {
MPIDI_OFI_register_am_bufs();
if (!MPIDI_OFI_ENABLE_HMEM) {
if (!MPIDI_OFI_ENABLE_HMEM || !MPL_gpu_attr_is_strict_dev(&attr)) {
/* Force packing of GPU buffer in host memory */
need_packing = true;
}
@@ -641,9 +641,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_am_isend_pipeline(int rank, MPIR_Comm
MPL_pointer_attr_t attr;
MPIR_GPU_query_pointer_attr(buf, &attr);
if (attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_dev(&attr)) {
MPIDI_OFI_register_am_bufs();
if (!MPIDI_OFI_ENABLE_HMEM) {
if (!MPIDI_OFI_ENABLE_HMEM || !MPL_gpu_attr_is_strict_dev(&attr)) {
/* Force packing of GPU buffer in host memory */
need_packing = true;
}
@@ -740,9 +740,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_am_isend_rdma_read(int rank, MPIR_Comm
MPL_pointer_attr_t attr;
MPIR_GPU_query_pointer_attr(buf, &attr);
if (attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_dev(&attr)) {
MPIDI_OFI_register_am_bufs();
if (!MPIDI_OFI_ENABLE_HMEM) {
if (!MPIDI_OFI_ENABLE_HMEM || !MPL_gpu_attr_is_strict_dev(&attr)) {
/* Force packing of GPU buffer in host memory */
need_packing = true;
}
+12 -8
Ver Arquivo
@@ -705,17 +705,21 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_register_memory(char *send_buf, size_t da
mr_attr.requested_key = rkey;
mr_attr.offset = 0;
mr_attr.context = NULL;
if (MPL_gpu_attr_is_strict_dev(attr)) {
#ifdef MPL_HAVE_CUDA
mr_attr.iface = (attr->type != MPL_GPU_POINTER_DEV) ? FI_HMEM_SYSTEM : FI_HMEM_CUDA;
mr_attr.device.cuda =
(attr->type != MPL_GPU_POINTER_DEV) ? 0 : MPL_gpu_get_dev_id_from_attr(attr);
mr_attr.iface = FI_HMEM_CUDA;
mr_attr.device.cuda = MPL_gpu_get_dev_id_from_attr(attr);
#elif defined MPL_HAVE_ZE
/* OFI does not support tiles yet, need to pass the root device. */
mr_attr.iface = (attr->type != MPL_GPU_POINTER_DEV) ? FI_HMEM_SYSTEM : FI_HMEM_ZE;
mr_attr.device.ze =
(attr->type !=
MPL_GPU_POINTER_DEV) ? 0 : MPL_gpu_get_root_device(MPL_gpu_get_dev_id_from_attr(attr));
/* OFI does not support tiles yet, need to pass the root device. */
mr_attr.iface = FI_HMEM_ZE;
mr_attr.device.ze = MPL_gpu_get_root_device(MPL_gpu_get_dev_id_from_attr(attr));
#else
/* FIXME: add support for MPL_HAVE_HIP (FI_HMEM_ROCR) */
mr_attr.iface = FI_HMEM_SYSTEM;
#endif
} else {
mr_attr.iface = FI_HMEM_SYSTEM;
}
MPIDI_OFI_CALL(fi_mr_regattr
(MPIDI_OFI_global.ctx[ctx_idx].domain, &mr_attr, 0, mr), mr_regattr);
+1 -1
Ver Arquivo
@@ -171,7 +171,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_do_irecv(void *buf,
if (MPIDI_OFI_ENABLE_HMEM && data_sz >= MPIR_CVAR_CH4_OFI_GPU_RDMA_THRESHOLD &&
MPIDI_OFI_ENABLE_MR_HMEM && dt_contig) {
if (attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(&attr)) {
register_mem = true;
}
}
+1 -1
Ver Arquivo
@@ -563,7 +563,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_send(const void *buf, MPI_Aint count, MPI
if (!MPIDI_OFI_ENABLE_HMEM) {
/* HMEM (any kind) not supported */
need_pack = true;
} else if (attr.type != MPL_GPU_POINTER_DEV) {
} else if (!MPL_gpu_attr_is_strict_dev(&attr)) {
/* non-strict gpu ptr (ZE shared host) */
need_pack = true;
} else {
+1 -1
Ver Arquivo
@@ -95,7 +95,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_NM_am_isend(int rank,
MPL_pointer_attr_t attr;
MPIR_GPU_query_pointer_attr(data, &attr);
if (attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_dev(&attr)) {
/* Force packing of GPU buffer in host memory */
dt_contig = 0;
}
+1 -1
Ver Arquivo
@@ -22,7 +22,7 @@ static void ipc_handle_free_hook(void *dptr)
MPIR_Assert(mpl_err == MPL_SUCCESS);
MPIR_GPU_query_pointer_attr(pbase, &gpu_attr);
if (gpu_attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(&gpu_attr)) {
local_dev_id = MPL_gpu_get_dev_id_from_attr(&gpu_attr);
for (int i = 0; i < MPIR_Process.local_size; ++i) {
+1 -1
Ver Arquivo
@@ -324,7 +324,7 @@ int MPIDI_GPU_get_ipc_attr(const void *buf, MPI_Aint count, MPI_Datatype datatyp
/* if it's a device buffer, we cannot do XPMEM or CMA IPC, so set default to SKIP */
ipc_attr->ipc_type = MPIDI_IPCI_TYPE__SKIP;
}
if (ipc_attr->u.gpu.gpu_attr.type != MPL_GPU_POINTER_DEV) {
if (!MPL_gpu_attr_is_strict_dev(&ipc_attr->u.gpu.gpu_attr)) {
goto fn_exit;
}
+3 -5
Ver Arquivo
@@ -240,15 +240,13 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_POSIX_mpi_bcast(void *buffer, MPI_Aint count,
case MPIR_CVAR_BCAST_POSIX_INTRA_ALGORITHM_auto:
if (MPIR_CVAR_COLL_HYBRID_MEMORY) {
cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig);
}
else {
} else {
/* In no hybird case, local memory type can be used to select algorithm */
MPL_pointer_attr_t pointer_attr;
MPIR_GPU_query_pointer_attr(buffer, &pointer_attr);
if (pointer_attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(&pointer_attr)) {
cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm_gpu), coll_sig);
}
else {
} else {
cnt = MPIR_Csel_search(MPIDI_POSIX_COMM(comm, csel_comm), coll_sig);
}
}
+1 -1
Ver Arquivo
@@ -203,7 +203,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_allcomm_composition_json(void *buffer,
/* In no hybird case, local memory type can be used to select algorithm */
MPL_pointer_attr_t pointer_attr;
MPIR_GPU_query_pointer_attr(buffer, &pointer_attr);
if (pointer_attr.type == MPL_GPU_POINTER_DEV) {
if (MPL_gpu_attr_is_strict_dev(&pointer_attr)) {
cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm_gpu), coll_sig);
} else {
cnt = MPIR_Csel_search(MPIDI_COMM(comm, csel_comm), coll_sig);
+7 -7
Ver Arquivo
@@ -271,7 +271,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_alpha(void *buffer, M
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
if (attr.type == MPL_GPU_POINTER_DEV && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
if (MPL_gpu_attr_is_strict_dev(&attr) && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
MPIDU_genq_private_pool_alloc_cell(MPIDI_global.gpu_coll_pool, (void **) &host_buffer);
if (host_buffer != NULL) {
host_buffer = (char *) host_buffer - shift;
@@ -327,7 +327,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_beta(void *buffer, MP
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
if (attr.type == MPL_GPU_POINTER_DEV && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
if (MPL_gpu_attr_is_strict_dev(&attr) && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
MPIDU_genq_private_pool_alloc_cell(MPIDI_global.gpu_coll_pool, (void **) &host_buffer);
if (host_buffer != NULL) {
host_buffer = (char *) host_buffer - shift;
@@ -396,7 +396,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_gamma(void *buffer, M
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
if (attr.type == MPL_GPU_POINTER_DEV && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
if (MPL_gpu_attr_is_strict_dev(&attr) && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ) {
MPIDU_genq_private_pool_alloc_cell(MPIDI_global.gpu_coll_pool, (void **) &host_buffer);
if (host_buffer != NULL) {
host_buffer = (char *) host_buffer - shift;
@@ -486,7 +486,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Bcast_intra_composition_delta(void *buffer, M
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
/* only node leaders need to allocate a host buffer */
if (attr.type == MPL_GPU_POINTER_DEV && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ
if (MPL_gpu_attr_is_strict_dev(&attr) && size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ
&& comm->node_roots_comm != NULL) {
MPIDU_genq_private_pool_alloc_cell(MPIDI_global.gpu_coll_pool, (void **) &host_buffer);
if (host_buffer != NULL) {
@@ -551,7 +551,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_alpha(const void
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
if ((send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) &&
if ((MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) &&
(size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ)) {
MPIDI_Coll_host_buffer_genq_alloc(sendbuf, recvbuf, count, datatype, &host_sendbuf,
&host_recvbuf, send_attr, recv_attr, shift);
@@ -647,7 +647,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_beta(const void *
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
if ((send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) &&
if ((MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) &&
(size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ)) {
MPIDI_Coll_host_buffer_genq_alloc(sendbuf, recvbuf, count, datatype, &host_sendbuf,
&host_recvbuf, send_attr, recv_attr, shift);
@@ -696,7 +696,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_Allreduce_intra_composition_gamma(const void
MPIDI_Coll_calculate_size_shift(count, datatype, &size, &shift);
if ((send_attr.type == MPL_GPU_POINTER_DEV || recv_attr.type == MPL_GPU_POINTER_DEV) &&
if ((MPL_gpu_attr_is_strict_dev(&send_attr) || MPL_gpu_attr_is_strict_dev(&recv_attr)) &&
(size <= MPIR_CVAR_CH4_GPU_COLL_SWAP_BUFFER_SZ)) {
MPIDI_Coll_host_buffer_genq_alloc(sendbuf, recvbuf, count, datatype, &host_sendbuf,
&host_recvbuf, send_attr, recv_attr, shift);
+1 -4
Ver Arquivo
@@ -21,11 +21,8 @@
MPL_STATIC_INLINE_PREFIX void MPIDIG_recv_set_buffer_attr(MPIR_Request * rreq)
{
MPL_pointer_attr_t attr;
MPIR_GPU_query_pointer_attr(MPIDIG_REQUEST(rreq, buffer), &attr);
MPIDIG_rreq_async_t *p = &(MPIDIG_REQUEST(rreq, req->recv_async));
p->is_device_buffer = (attr.type == MPL_GPU_POINTER_DEV);
p->is_device_buffer = MPIR_GPU_query_pointer_is_dev(MPIDIG_REQUEST(rreq, buffer));
}
MPL_STATIC_INLINE_PREFIX int MPIDIG_recv_check_rndv_cb(MPIR_Request * rreq)