diff --git a/examples/offline_inference/eagle.py b/examples/offline_inference/eagle.py index 615f67e9f8d8..1cb44334e39c 100644 --- a/examples/offline_inference/eagle.py +++ b/examples/offline_inference/eagle.py @@ -3,8 +3,6 @@ import json import os -from transformers import AutoTokenizer - from vllm import LLM, SamplingParams @@ -67,19 +65,8 @@ def main(): max_model_len = 2048 - tokenizer = AutoTokenizer.from_pretrained(model_dir) - prompts = load_prompts(args.dataset, args.num_prompts) - prompt_ids = [ - tokenizer.apply_chat_template([{ - "role": "user", - "content": prompt - }], - add_generation_prompt=True) - for prompt in prompts - ] - llm = LLM( model=model_dir, trust_remote_code=True, @@ -102,8 +89,7 @@ def main(): sampling_params = SamplingParams(temperature=args.temp, max_tokens=256) - outputs = llm.generate(prompt_token_ids=prompt_ids, - sampling_params=sampling_params) + outputs = llm.generate(prompts=prompts, sampling_params=sampling_params) # print the generated text for output in outputs: