Fix TP once and for all :) (#613)

* Update evaluation.py

* Fix import
Esse commit está contido em:
lewtun
2025-04-17 15:25:59 +02:00
commit de GitHub
commit 4c9b0f25d9
+4 -3
Ver Arquivo
@@ -1,7 +1,7 @@
import subprocess
from typing import TYPE_CHECKING, Dict, Union
from .hub import get_gpu_count_for_vllm, get_param_count_from_repo_id
from .hub import get_gpu_count_for_vllm
if TYPE_CHECKING:
@@ -70,12 +70,13 @@ def run_lighteval_job(
model_revision = training_args.hub_model_revision
# For large models >= 30b params or those running the MATH benchmark, we need to shard them across the GPUs to avoid OOM
num_gpus = get_gpu_count_for_vllm(model_name, model_revision)
if get_param_count_from_repo_id(model_name) >= 30_000_000_000:
tensor_parallel = True
# FIXME: vLLM 0.8.3 hangs with lighteval and DP > 1, so we disable it for now and use TP for all evals. See https://github.com/huggingface/lighteval/issues/670
# if get_param_count_from_repo_id(model_name) >= 30_000_000_000:
# tensor_parallel = True
# else:
# num_gpus = 8
# tensor_parallel = False
tensor_parallel = True
cmd = VLLM_SLURM_PREFIX.copy()
cmd_args = [