@@ -1,7 +1,7 @@
|
||||
import subprocess
|
||||
from typing import TYPE_CHECKING, Dict, Union
|
||||
|
||||
from .hub import get_gpu_count_for_vllm, get_param_count_from_repo_id
|
||||
from .hub import get_gpu_count_for_vllm
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -70,12 +70,13 @@ def run_lighteval_job(
|
||||
model_revision = training_args.hub_model_revision
|
||||
# For large models >= 30b params or those running the MATH benchmark, we need to shard them across the GPUs to avoid OOM
|
||||
num_gpus = get_gpu_count_for_vllm(model_name, model_revision)
|
||||
if get_param_count_from_repo_id(model_name) >= 30_000_000_000:
|
||||
tensor_parallel = True
|
||||
# FIXME: vLLM 0.8.3 hangs with lighteval and DP > 1, so we disable it for now and use TP for all evals. See https://github.com/huggingface/lighteval/issues/670
|
||||
# if get_param_count_from_repo_id(model_name) >= 30_000_000_000:
|
||||
# tensor_parallel = True
|
||||
# else:
|
||||
# num_gpus = 8
|
||||
# tensor_parallel = False
|
||||
tensor_parallel = True
|
||||
|
||||
cmd = VLLM_SLURM_PREFIX.copy()
|
||||
cmd_args = [
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário