updated docstring

clefourrier · clefourrier · commit 50acc39e9d08 · 2025-04-30T10:25:55.000+02:00
diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py
@@ -437,21 +437,10 @@ def cleanup(self):
 
     def _create_auto_model(self, config: VLLMModelConfig) -> Optional[AsyncLLM]:
         """
-        Creates an instance of the async vllm model loaded from HF.
-
-        Args:
-            pretrained (str): The name or path of the pretrained model.
-            revision (str): The revision of the model.
-            subfolder (Optional[str], optional): The subfolder within the model. Defaults to None.
-            max_memory (Optional[dict], optional): The maximum memory to allocate for the model per GPU. Defaults to None.
-            device_map (Optional[dict], optional): The device mapping for the model. Defaults to None.
-            torch_dtype (Optional[Union[str, torch.dtype]], optional): The torch data type for the model. Defaults to None.
-            quantization_config (Optional[Union[BitsAndBytesConfig, GPTQConfig]], optional): The quantization configuration for the model. Defaults to None.
-            trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
-            cache_dir (str, optional): The cache directory for the model. Defaults to "/scratch".
+        Creates an instance of the async vllm model loaded from HF. Requires using the v1 of VLLM.
 
         Returns:
-            transformers.PreTrainedModel: The created auto model instance.
+            AsyncLLM: The created async VLLM instance
         """
         self.model_args = {
             "model": config.model_name,
@@ -519,7 +508,7 @@ async def _async_batch(self, requests: list[GreedyUntilRequest | LoglikelihoodRe
     async def greedy_until(
         self,
         requests: list[GreedyUntilRequest],
-        override_bs: Optional[int] = None,
+        **kwargs,
     ) -> list[GenerativeResponse]:
         """
         Generates responses using a greedy decoding strategy until certain ending conditions are met.
@@ -564,9 +553,8 @@ async def greedy_until(
     async def loglikelihood(
         self,
         requests: list[LoglikelihoodRequest],
-        override_bs: Optional[int] = None,
         return_bool_score: bool = True,
-        rolling: bool = False,
+        **kwargs,
     ) -> list[LoglikelihoodResponse]:
         for request in requests:
             if request.context == "":