You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When I run lora request using num_scheduler_steps > 1, even if I give a wrong lora path, it doesn't report an error, but gives wrong result.
The bug can be reproduced by this code.
import os
import torch
from typing import List, Optional, Tuple
import pandas as pd
from huggingface_hub import snapshot_download
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
from vllm.lora.request import LoRARequest
def create_test_prompts(
lora_path: str
) -> List[Tuple[str, SamplingParams, Optional[LoRARequest]]]:
return [
(
"What happens when you put oil into water?", # noqa: E501
SamplingParams(temperature=1e-7,
max_tokens=128,
top_k =1,
top_p = 1e-5,
stop_token_ids=[32003]),
LoRARequest("sql-lora", 1, lora_path)),
]
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, SamplingParams,
Optional[LoRARequest]]]):
"""Continuously process a list of prompts and handle the outputs."""
request_id = 0
while test_prompts or engine.has_unfinished_requests():
if test_prompts:
prompt, sampling_params, lora_request = test_prompts.pop(0)
engine.add_request(str(request_id),
prompt,
sampling_params,
lora_request=lora_request)
request_id += 1
request_outputs: List[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
print(request_output)
def initialize_engine() -> LLMEngine:
"""Initialize the LLMEngine."""
model_path = "/data/quant_fp8/vicuna-13b-v1.5-fp8/"
engine_args = EngineArgs(model=model_path,
enable_lora=True,
max_loras=16,
max_lora_rank=64,
max_cpu_loras=16,
max_num_seqs=256,
trust_remote_code=True,
enforce_eager=True,
num_scheduler_steps=8)
return LLMEngine.from_engine_args(engine_args)
def main():
engine = initialize_engine()
lora_path = '/the/path/does/not/exist/'
test_prompts = create_test_prompts(lora_path)
torch.cuda.cudart().cudaProfilerStart()
process_requests(engine, test_prompts)
torch.cuda.cudart().cudaProfilerStop()
if __name__ == '__main__':
main()
Model Input Dumps
No response
Before submitting a new issue...
Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
The text was updated successfully, but these errors were encountered:
sleepwalker2017
changed the title
[Bug]: No error report when passing wrong lora path using num_scheduler_steps=8
[Bug]: lora is not loaded using num_scheduler_steps=8Oct 29, 2024
🐛 Describe the bug
When I run lora request using
num_scheduler_steps
> 1, even if I give a wrong lora path, it doesn't report an error, but gives wrong result.The bug can be reproduced by this code.
Model Input Dumps
No response
Before submitting a new issue...
The text was updated successfully, but these errors were encountered: