Fix TP once and for all :) (#613)

* Update evaluation.py * Fix import
2025-04-17 15:25:59 +02:00
commit 4c9b0f25d9
@@ -1,7 +1,7 @@
 import subprocess
 from typing import TYPE_CHECKING, Dict, Union

-from .hub import get_gpu_count_for_vllm, get_param_count_from_repo_id
+from .hub import get_gpu_count_for_vllm


 if TYPE_CHECKING:
@@ -70,12 +70,13 @@ def run_lighteval_job(
    model_revision = training_args.hub_model_revision
    # For large models >= 30b params or those running the MATH benchmark, we need to shard them across the GPUs to avoid OOM
    num_gpus = get_gpu_count_for_vllm(model_name, model_revision)
-    if get_param_count_from_repo_id(model_name) >= 30_000_000_000:
-        tensor_parallel = True
    # FIXME: vLLM 0.8.3 hangs with lighteval and DP > 1, so we disable it for now and use TP for all evals. See https://github.com/huggingface/lighteval/issues/670
+    # if get_param_count_from_repo_id(model_name) >= 30_000_000_000:
+    #     tensor_parallel = True
    # else:
    #     num_gpus = 8
    #     tensor_parallel = False
+    tensor_parallel = True

    cmd = VLLM_SLURM_PREFIX.copy()
    cmd_args = [