From b7ba200cfd6ec9d840a035f98cbd636fa22791ea Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Wed, 28 May 2025 05:46:05 +0000 Subject: [PATCH 1/2] Update vLLM compatibility Signed-off-by: DarkLight1337 --- lm_eval/models/vllm_causallms.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py index b35dcb3ba67..72ce5a025b3 100644 --- a/lm_eval/models/vllm_causallms.py +++ b/lm_eval/models/vllm_causallms.py @@ -31,7 +31,7 @@ try: import ray - from vllm import LLM, SamplingParams + from vllm import LLM, SamplingParams, TokensPrompt from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.utils import get_open_port @@ -77,7 +77,7 @@ def _vllm_mp_worker( try: llm = LLM(**model_args) res = llm.generate( - prompt_token_ids=requests, + [TokensPrompt(prompt_token_ids=request) for request in requests], sampling_params=sampling_params, lora_request=lora_request, ) @@ -219,13 +219,6 @@ def __init__( kwargs_resolve_hf_chat_template["model_config"] = ( self.model.llm_engine.model_config ) - - # https://github.com/vllm-project/vllm/pull/18259 - if ( - "trsut_remote_code" - in inspect.signature(resolve_hf_chat_template).parameters - ): - kwargs_resolve_hf_chat_template["trsut_remote_code"] = trust_remote_code else: kwargs_resolve_hf_chat_template["trust_remote_code"] = trust_remote_code From bf94dd929976f44e73cf192247fa709918a5628e Mon Sep 17 00:00:00 2001 From: Baber Abbasi Date: Sat, 2 Aug 2025 17:31:55 +0000 Subject: [PATCH 2/2] add TokensPrompt to all generate calls --- lm_eval/models/vllm_causallms.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py index 3eaf5665aee..e35cac2a079 100644 --- a/lm_eval/models/vllm_causallms.py +++ b/lm_eval/models/vllm_causallms.py @@ -1,6 +1,5 @@ import copy import gc -import inspect import logging import os from importlib.metadata import version @@ -388,7 +387,7 @@ def run_inference_one_model( ): llm = LLM(**model_args) return llm.generate( - prompt_token_ids=requests, + [TokensPrompt(prompt_token_ids=request) for request in requests], sampling_params=sampling_params, lora_request=lora_request, ) @@ -477,7 +476,7 @@ def run_inference_one_model( else: outputs = self.model.generate( - prompt_token_ids=requests, + [TokensPrompt(prompt_token_ids=request) for request in requests], sampling_params=sampling_params, use_tqdm=True if self.batch_size == "auto" else False, lora_request=self.lora_request,