Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
/vllm/entrypoints/cli @hmellor @mgoin @DarkLight1337 @russellb
/vllm/entrypoints/mcp @heheda12345
/vllm/entrypoints/openai @aarnphm @chaunceyjiang @DarkLight1337 @russellb
/vllm/entrypoints/openai/realtime @njhill
/vllm/entrypoints/openai/speech_to_text @NickLucche
/vllm/entrypoints/speech_to_text/realtime @njhill
/vllm/entrypoints/speech_to_text @NickLucche
/vllm/entrypoints/pooling @noooop
/vllm/entrypoints/sagemaker @DarkLight1337
/vllm/entrypoints/serve @njhill
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

import pytest

from vllm.entrypoints.openai.speech_to_text.protocol import TranscriptionResponse
from vllm.entrypoints.openai.speech_to_text.speech_to_text import OpenAISpeechToText
from vllm.entrypoints.speech_to_text.base.serving import OpenAISpeechToText
from vllm.entrypoints.speech_to_text.transcription.protocol import TranscriptionResponse


async def _never_finishes():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@
RequestResponseMetadata,
)
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.entrypoints.openai.speech_to_text.protocol import TranscriptionRequest
from vllm.entrypoints.openai.speech_to_text.serving import OpenAIServingTranscription
from vllm.entrypoints.openai.speech_to_text.speech_to_text import (
from vllm.entrypoints.speech_to_text.base.serving import (
OpenAISpeechToText,
asr_inter_chunk_separator,
)
from vllm.entrypoints.speech_to_text.transcription.protocol import TranscriptionRequest
from vllm.entrypoints.speech_to_text.transcription.serving import (
OpenAIServingTranscription,
)
from vllm.model_executor.models.interfaces import SupportsTranscription
from vllm.outputs import CompletionOutput, RequestOutput

Expand Down
34 changes: 10 additions & 24 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,19 +233,12 @@ def build_app(

attach_render_router(app)

if "transcription" in supported_tasks:
from vllm.entrypoints.openai.speech_to_text.api_router import (
attach_router as register_speech_to_text_api_router,
if "transcription" in supported_tasks or "realtime" in supported_tasks:
from vllm.entrypoints.speech_to_text.factories import (
register_speech_to_text_api_routers,
)

register_speech_to_text_api_router(app)

if "realtime" in supported_tasks:
from vllm.entrypoints.openai.realtime.api_router import (
attach_router as register_realtime_api_router,
)

register_realtime_api_router(app)
register_speech_to_text_api_routers(app, supported_tasks)

if any(task in POOLING_TASKS for task in supported_tasks):
from vllm.entrypoints.pooling.factories import register_pooling_api_routers
Expand Down Expand Up @@ -284,11 +277,11 @@ def build_app(

if "realtime" in supported_tasks:
# Add WebSocket metrics middleware
from vllm.entrypoints.openai.realtime.metrics import (
WebSocketMetricsMiddleware,
from vllm.entrypoints.speech_to_text.factories import (
add_websocket_metrics_middleware,
)

app.add_middleware(WebSocketMetricsMiddleware)
add_websocket_metrics_middleware(app)

if envs.VLLM_DEBUG_LOG_API_SERVER_RESPONSE:
logger.warning(
Expand Down Expand Up @@ -421,20 +414,13 @@ async def init_app_state(

await init_generative_scoring_state(engine_client, state, args, request_logger)

if "transcription" in supported_tasks:
from vllm.entrypoints.openai.speech_to_text.api_router import (
init_transcription_state,
)
if "transcription" in supported_tasks or "realtime" in supported_tasks:
from vllm.entrypoints.speech_to_text.factories import init_speech_to_text_state

init_transcription_state(
init_speech_to_text_state(
engine_client, state, args, request_logger, supported_tasks
)

if "realtime" in supported_tasks:
from vllm.entrypoints.openai.realtime.api_router import init_realtime_state

init_realtime_state(engine_client, state, args, request_logger, supported_tasks)

if any(task in POOLING_TASKS for task in supported_tasks):
from vllm.entrypoints.pooling.factories import init_pooling_state

Expand Down
10 changes: 5 additions & 5 deletions vllm/entrypoints/openai/engine/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,18 @@
)
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
from vllm.entrypoints.openai.speech_to_text.protocol import (
TranscriptionRequest,
TranscriptionResponse,
TranslationRequest,
)
from vllm.entrypoints.serve.disagg.protocol import GenerateRequest, GenerateResponse
from vllm.entrypoints.serve.tokenize.protocol import (
DetokenizeRequest,
TokenizeChatRequest,
TokenizeCompletionRequest,
TokenizeResponse,
)
from vllm.entrypoints.speech_to_text.transcription.protocol import (
TranscriptionRequest,
TranscriptionResponse,
)
from vllm.entrypoints.speech_to_text.translation.protocol import TranslationRequest
from vllm.entrypoints.utils import create_error_response
from vllm.inputs import EngineInput, PromptType
from vllm.logger import init_logger
Expand Down
18 changes: 10 additions & 8 deletions vllm/entrypoints/openai/run_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,6 @@
ErrorResponse,
OpenAIBaseModel,
)
from vllm.entrypoints.openai.speech_to_text.protocol import (
TranscriptionRequest,
TranscriptionResponse,
TranscriptionResponseVerbose,
TranslationRequest,
TranslationResponse,
TranslationResponseVerbose,
)
from vllm.entrypoints.pooling.embed.protocol import (
EmbeddingRequest,
EmbeddingResponse,
Expand All @@ -59,6 +51,16 @@
ScoreRequest,
ScoreResponse,
)
from vllm.entrypoints.speech_to_text.transcription.protocol import (
TranscriptionRequest,
TranscriptionResponse,
TranscriptionResponseVerbose,
)
from vllm.entrypoints.speech_to_text.translation.protocol import (
TranslationRequest,
TranslationResponse,
TranslationResponseVerbose,
)
from vllm.entrypoints.utils import create_error_response
from vllm.exceptions import VLLMValidationError
from vllm.logger import init_logger
Expand Down
148 changes: 0 additions & 148 deletions vllm/entrypoints/openai/speech_to_text/api_router.py

This file was deleted.

Empty file.
11 changes: 11 additions & 0 deletions vllm/entrypoints/speech_to_text/base/protocol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project


from typing import Literal, TypeAlias

import torch

## Protocols for Audio
AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt"]
_LONG_INFO = torch.iinfo(torch.long)
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,6 @@
)
from vllm.entrypoints.openai.engine.serving import OpenAIServing, SpeechToTextRequest
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.entrypoints.openai.speech_to_text.protocol import (
TranscriptionResponse,
TranscriptionResponseStreamChoice,
TranscriptionResponseVerbose,
TranscriptionSegment,
TranscriptionStreamResponse,
TranslationResponse,
TranslationResponseStreamChoice,
TranslationResponseVerbose,
TranslationSegment,
TranslationStreamResponse,
)
from vllm.entrypoints.utils import get_max_tokens
from vllm.exceptions import VLLMValidationError
from vllm.inputs import EncoderDecoderInput, EngineInput
Expand All @@ -51,6 +39,21 @@
from vllm.tokenizers import get_tokenizer
from vllm.utils.async_utils import merge_async_iterators

from ..transcription.protocol import (
TranscriptionResponse,
TranscriptionResponseStreamChoice,
TranscriptionResponseVerbose,
TranscriptionSegment,
TranscriptionStreamResponse,
)
from ..translation.protocol import (
TranslationResponse,
TranslationResponseStreamChoice,
TranslationResponseVerbose,
TranslationSegment,
TranslationStreamResponse,
)

SpeechToTextResponse: TypeAlias = TranscriptionResponse | TranslationResponse
SpeechToTextResponseVerbose: TypeAlias = (
TranscriptionResponseVerbose | TranslationResponseVerbose
Expand Down
Loading
Loading