-
-
Notifications
You must be signed in to change notification settings - Fork 11.4k
Closed
Labels
usageHow to use vllmHow to use vllm
Description
Your current environment
The output of `python collect_env.py`
when running with beam search, error occured
the running script :
device: Tesla T4
import time
from vllm import LLM, SamplingParams
from vllm.sampling_params import BeamSearchParams
from vllm.assets.audio import AudioAsset
import librosa
# Create a Whisper encoder/decoder model instance
llm = LLM(
model="openai/whisper-medium",
max_model_len=256,
max_num_seqs=32,
limit_mm_per_prompt={"audio": 1},
kv_cache_dtype="fp8",
)
audio_path = "xxx.wav"
prompts = [
{
"prompt": "<|startoftranscript|>",
"multi_modal_data": {
"audio": librosa.load(audio_path, sr=None),
},
}
] * 1
# Create a sampling params object.
sampling_params = SamplingParams(
temperature=0,
top_p=1.0,
max_tokens=200,
)
sampling_params = BeamSearchParams(beam_width=5, max_tokens=50)
start = time.time()
# Generate output tokens from the prompts. The output is a list of
# RequestOutput objects that contain the prompt, generated
# text, and other information.
#outputs = llm.generate(prompts, sampling_params)
outputs = llm.beam_search(prompts, sampling_params)
# Print the outputs.
for output in outputs:
prompt = output.prompt
encoder_prompt = output.encoder_prompt
generated_text = output.outputs[0].text
print(f"Encoder prompt: {encoder_prompt!r}, "
f"Decoder prompt: {prompt!r}, "
f"Generated text: {generated_text!r}")
duration = time.time() - start
print("Duration:", duration)
print("RPS:", len(prompts) / duration)
Metadata
Metadata
Assignees
Labels
usageHow to use vllmHow to use vllm