misc: add a CVAR for GPU fast copy threshold for D2H copy direction

Increase MPIR_CVAR_GPU_FAST_COPY_MAX_SIZE due to the improvement
of fast GPU copy by using stream load/store. For D2H, which is
used in send path when packing from send buffer to pack_buffer,
the threshold can be set higher to benefit from fast GPU copy.
Esse commit está contido em:
Gengbin Zheng
2024-10-10 09:25:03 -07:00
commit de Yanfei Guo
commit 05883b6a6c
2 arquivos alterados com 15 adições e 3 exclusões
+13 -1
Ver Arquivo
@@ -9,7 +9,7 @@ cvars:
- name : MPIR_CVAR_GPU_FAST_COPY_MAX_SIZE
category : CH4
type : int
default : 1024
default : 4096
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
@@ -29,6 +29,16 @@ cvars:
If a receive message size is less than or equal to MPIR_CVAR_GPU_FAST_COPY_MAX_SIZE_H2D (in
bytes), then enable GPU-based fast memcpy.
- name : MPIR_CVAR_GPU_FAST_COPY_MAX_SIZE_D2H
category : CH4
type : int
default : 32768
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
description : >-
If a send message size is less than or equal to MPIR_CVAR_GPU_FAST_COPY_MAX_SIZE_D2H (in
bytes), then enable GPU-based fast memcpy.
=== END_MPI_T_CVAR_INFO_BLOCK ===
*/
@@ -285,6 +295,8 @@ static int do_localcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatyp
if (dir == MPL_GPU_COPY_H2D) {
/* Used in ofi_events.h when unpacking from received pack_buffer to original device buffer */
fast_copy_threshold = MPIR_CVAR_GPU_FAST_COPY_MAX_SIZE_H2D;
} else if (dir == MPL_GPU_COPY_D2H) {
fast_copy_threshold = MPIR_CVAR_GPU_FAST_COPY_MAX_SIZE_D2H;
}
if (copy_sz <= fast_copy_threshold) {
mpl_errno = MPL_gpu_fast_memcpy(send_ptr, send_attr, recv_ptr, recv_attr, copy_sz);
+2 -2
Ver Arquivo
@@ -628,7 +628,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_send(const void *buf, MPI_Aint count, MPI
pack_buf = MPL_aligned_alloc(64, data_sz, MPL_MEM_OTHER);
mpi_errno = MPIR_Localcopy_gpu(buf, count, datatype, 0, &attr,
pack_buf, data_sz, MPI_BYTE, 0, MPIR_GPU_ATTR_HOST,
MPL_GPU_COPY_DIRECTION_NONE,
MPL_GPU_COPY_D2H,
MPIDI_OFI_gpu_get_send_engine_type(), true);
MPIR_ERR_CHECK(mpi_errno);
send_buf = pack_buf;
@@ -655,7 +655,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_send(const void *buf, MPI_Aint count, MPI
mpi_errno = MPIR_Localcopy_gpu(buf, count, datatype, 0, &attr,
pack_buf, data_sz, MPI_BYTE, 0, MPIR_GPU_ATTR_HOST,
MPL_GPU_COPY_DIRECTION_NONE,
MPL_GPU_COPY_D2H,
MPIDI_OFI_gpu_get_send_engine_type(), true);
MPIR_ERR_CHECK(mpi_errno);