Skip to content

Commit 50acc39

Browse files
committed
updated docstring
1 parent 35a9fd5 commit 50acc39

File tree

1 file changed

+4
-16
lines changed

1 file changed

+4
-16
lines changed

src/lighteval/models/vllm/vllm_model.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -437,21 +437,10 @@ def cleanup(self):
437437

438438
def _create_auto_model(self, config: VLLMModelConfig) -> Optional[AsyncLLM]:
439439
"""
440-
Creates an instance of the async vllm model loaded from HF.
441-
442-
Args:
443-
pretrained (str): The name or path of the pretrained model.
444-
revision (str): The revision of the model.
445-
subfolder (Optional[str], optional): The subfolder within the model. Defaults to None.
446-
max_memory (Optional[dict], optional): The maximum memory to allocate for the model per GPU. Defaults to None.
447-
device_map (Optional[dict], optional): The device mapping for the model. Defaults to None.
448-
torch_dtype (Optional[Union[str, torch.dtype]], optional): The torch data type for the model. Defaults to None.
449-
quantization_config (Optional[Union[BitsAndBytesConfig, GPTQConfig]], optional): The quantization configuration for the model. Defaults to None.
450-
trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
451-
cache_dir (str, optional): The cache directory for the model. Defaults to "/scratch".
440+
Creates an instance of the async vllm model loaded from HF. Requires using the v1 of VLLM.
452441
453442
Returns:
454-
transformers.PreTrainedModel: The created auto model instance.
443+
AsyncLLM: The created async VLLM instance
455444
"""
456445
self.model_args = {
457446
"model": config.model_name,
@@ -519,7 +508,7 @@ async def _async_batch(self, requests: list[GreedyUntilRequest | LoglikelihoodRe
519508
async def greedy_until(
520509
self,
521510
requests: list[GreedyUntilRequest],
522-
override_bs: Optional[int] = None,
511+
**kwargs,
523512
) -> list[GenerativeResponse]:
524513
"""
525514
Generates responses using a greedy decoding strategy until certain ending conditions are met.
@@ -564,9 +553,8 @@ async def greedy_until(
564553
async def loglikelihood(
565554
self,
566555
requests: list[LoglikelihoodRequest],
567-
override_bs: Optional[int] = None,
568556
return_bool_score: bool = True,
569-
rolling: bool = False,
557+
**kwargs,
570558
) -> list[LoglikelihoodResponse]:
571559
for request in requests:
572560
if request.context == "":

0 commit comments

Comments
 (0)