Skip to content

[Usage]: Does whisper model support beam search? #17258

@sleepy-dev-bin

Description

@sleepy-dev-bin

Your current environment

The output of `python collect_env.py`

when running with beam search, error occured
the running script :
device: Tesla T4

import time

from vllm import LLM, SamplingParams
from vllm.sampling_params import BeamSearchParams
from vllm.assets.audio import AudioAsset
import librosa

# Create a Whisper encoder/decoder model instance
llm = LLM(
    model="openai/whisper-medium",
    max_model_len=256,
    max_num_seqs=32,
    limit_mm_per_prompt={"audio": 1},
    kv_cache_dtype="fp8",
)
audio_path = "xxx.wav"
prompts = [
    {
        "prompt": "<|startoftranscript|>",
        "multi_modal_data": {
            "audio": librosa.load(audio_path, sr=None),
        },
    }
] * 1

# Create a sampling params object.
sampling_params = SamplingParams(
    temperature=0,
    top_p=1.0,
    max_tokens=200,
)

sampling_params = BeamSearchParams(beam_width=5, max_tokens=50)

start = time.time()

# Generate output tokens from the prompts. The output is a list of
# RequestOutput objects that contain the prompt, generated
# text, and other information.
#outputs = llm.generate(prompts, sampling_params)
outputs = llm.beam_search(prompts, sampling_params)

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    encoder_prompt = output.encoder_prompt
    generated_text = output.outputs[0].text
    print(f"Encoder prompt: {encoder_prompt!r}, "
          f"Decoder prompt: {prompt!r}, "
          f"Generated text: {generated_text!r}")

duration = time.time() - start

print("Duration:", duration)
print("RPS:", len(prompts) / duration)

Metadata

Metadata

Assignees

No one assigned

    Labels

    usageHow to use vllm

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions