Skip to content

Commit d3dddb5

Browse files
committed
updated docstring
1 parent 35a9fd5 commit d3dddb5

File tree

1 file changed

+15
-17
lines changed

1 file changed

+15
-17
lines changed

src/lighteval/models/vllm/vllm_model.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -437,21 +437,10 @@ def cleanup(self):
437437

438438
def _create_auto_model(self, config: VLLMModelConfig) -> Optional[AsyncLLM]:
439439
"""
440-
Creates an instance of the async vllm model loaded from HF.
441-
442-
Args:
443-
pretrained (str): The name or path of the pretrained model.
444-
revision (str): The revision of the model.
445-
subfolder (Optional[str], optional): The subfolder within the model. Defaults to None.
446-
max_memory (Optional[dict], optional): The maximum memory to allocate for the model per GPU. Defaults to None.
447-
device_map (Optional[dict], optional): The device mapping for the model. Defaults to None.
448-
torch_dtype (Optional[Union[str, torch.dtype]], optional): The torch data type for the model. Defaults to None.
449-
quantization_config (Optional[Union[BitsAndBytesConfig, GPTQConfig]], optional): The quantization configuration for the model. Defaults to None.
450-
trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
451-
cache_dir (str, optional): The cache directory for the model. Defaults to "/scratch".
440+
Creates an instance of the async vllm model loaded from HF. Requires using the v1 of VLLM.
452441
453442
Returns:
454-
transformers.PreTrainedModel: The created auto model instance.
443+
AsyncLLM: The created async VLLM instance
455444
"""
456445
self.model_args = {
457446
"model": config.model_name,
@@ -519,14 +508,13 @@ async def _async_batch(self, requests: list[GreedyUntilRequest | LoglikelihoodRe
519508
async def greedy_until(
520509
self,
521510
requests: list[GreedyUntilRequest],
522-
override_bs: Optional[int] = None,
511+
**kwargs,
523512
) -> list[GenerativeResponse]:
524513
"""
525514
Generates responses using a greedy decoding strategy until certain ending conditions are met.
526515
527516
Args:
528517
requests (list[Request]): list of requests containing the context and ending conditions.
529-
override_bs (int, optional): Override the batch size for generation. Defaults to None.
530518
531519
Returns:
532520
list[GenerateReturn]: list of generated responses.
@@ -564,10 +552,20 @@ async def greedy_until(
564552
async def loglikelihood(
565553
self,
566554
requests: list[LoglikelihoodRequest],
567-
override_bs: Optional[int] = None,
568555
return_bool_score: bool = True,
569-
rolling: bool = False,
556+
**kwargs,
570557
) -> list[LoglikelihoodResponse]:
558+
"""
559+
Generates responses using a greedy decoding strategy until certain ending conditions are met and
560+
stores the logprobs.
561+
562+
Args:
563+
requests (list[Request]): list of requests containing the context and ending conditions.
564+
565+
Returns:
566+
list[LoglikelihoodResponse]: list of generated responses.
567+
"""
568+
571569
for request in requests:
572570
if request.context == "":
573571
request.tokenized_context = [self.tokenizer.eos_token_id]

0 commit comments

Comments
 (0)