Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion components/src/dynamo/frontend/vllm_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from typing import Any

from vllm.config import CacheConfig, LoadConfig, ModelConfig, VllmConfig
from vllm.inputs.data import TokensPrompt
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.sampling_params import RequestOutputKind, SamplingParams
from vllm.tasks import GENERATION_TASKS
Expand All @@ -39,6 +38,13 @@
from .prepost import StreamingPostProcessor, preprocess_chat_request
from .utils import random_uuid

# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/35182), fallback to old structure
try:
from vllm.inputs.llm import TokensPrompt
except ImportError:
from vllm.inputs.data import TokensPrompt


logger = logging.getLogger(__name__)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from vllm.config import ModelConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.chat_utils import ConversationMessage
from vllm.inputs.data import TokensPrompt
from vllm.sampling_params import SamplingParams
from vllm.tokenizers import TokenizerLike as AnyTokenizer

Expand All @@ -46,6 +45,12 @@
OpenAIServingModels,
)

# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/35182), fallback to old structure
try:
from vllm.inputs.llm import TokensPrompt
except ImportError:
from vllm.inputs.data import TokensPrompt


class StubEngineClient:
"""
Expand Down
10 changes: 8 additions & 2 deletions components/src/dynamo/vllm/multimodal_utils/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,21 @@
from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
from pydantic_core import core_schema
from typing_extensions import NotRequired
from vllm.inputs.data import TokensPrompt
from vllm.logprobs import PromptLogprobs
from vllm.multimodal.inputs import MultiModalUUIDDict # noqa: F401
from vllm.outputs import CompletionOutput
from vllm.sampling_params import SamplingParams
from vllm.v1.metrics.stats import RequestStateStats

from dynamo.common.multimodal.embedding_transfer import TransferRequest

# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/35182), fallback to old structure
try:
from vllm.inputs.llm import MultiModalUUIDDict # noqa: F401
from vllm.inputs.llm import TokensPrompt
except ImportError:
from vllm.inputs.data import TokensPrompt
from vllm.multimodal.inputs import MultiModalUUIDDict # noqa: F401
Comment thread
richardhuo-nv marked this conversation as resolved.


class Request(BaseModel):
prompt: str
Expand Down
10 changes: 8 additions & 2 deletions components/src/dynamo/vllm/tests/test_vllm_renderer_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
"vllm.entrypoints.openai.chat_completion.protocol"
)
_engine_protocol = importlib.import_module("vllm.entrypoints.openai.engine.protocol")
_inputs_data = importlib.import_module("vllm.inputs.data")
_reasoning = importlib.import_module("vllm.reasoning")
_sampling_params = importlib.import_module("vllm.sampling_params")
_tool_parsers = importlib.import_module("vllm.tool_parsers")
Expand All @@ -34,7 +33,6 @@
ChatCompletionRequest = _chat_protocol.ChatCompletionRequest
DeltaMessage = _engine_protocol.DeltaMessage
DeltaToolCall = _engine_protocol.DeltaToolCall
TokensPrompt = _inputs_data.TokensPrompt
ReasoningParser = _reasoning.ReasoningParser
ReasoningParserManager = _reasoning.ReasoningParserManager
RequestOutputKind = _sampling_params.RequestOutputKind
Expand All @@ -48,6 +46,14 @@
OutputProcessor = _output_processor_mod.OutputProcessor
OutputProcessorOutput = _output_processor_mod.OutputProcessorOutput

# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/35182), fallback to legacy location
try:
_inputs_mod = importlib.import_module("vllm.inputs.llm")
except (ImportError, ModuleNotFoundError):
_inputs_mod = importlib.import_module("vllm.inputs.data")

TokensPrompt = _inputs_mod.TokensPrompt

pytestmark = [
pytest.mark.vllm,
pytest.mark.unit,
Expand Down
7 changes: 6 additions & 1 deletion examples/multimodal/components/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import torch
import uvloop
from vllm.distributed.kv_events import ZmqEventPublisher
from vllm.inputs.data import TokensPrompt
from vllm.usage.usage_lib import UsageContext
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.v1.engine.async_llm import AsyncLLM
Expand All @@ -40,6 +39,12 @@
from utils.model import construct_mm_data
from utils.protocol import MyRequestOutput, vLLMMultimodalRequest

# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/35182), fallback to old structure
try:
from vllm.inputs.llm import TokensPrompt
except ImportError:
from vllm.inputs.data import TokensPrompt

configure_dynamo_logging()
logger = logging.getLogger(__name__)

Expand Down
7 changes: 6 additions & 1 deletion examples/multimodal/utils/chat_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,16 @@
from vllm.entrypoints.openai.engine.protocol import RequestResponseMetadata
from vllm.entrypoints.openai.models.protocol import BaseModelPath
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.inputs.data import TokensPrompt
from vllm.renderers.registry import renderer_from_config
from vllm.sampling_params import SamplingParams
from vllm.tokenizers import TokenizerLike as AnyTokenizer

# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/35182), fallback to old structure
try:
from vllm.inputs.llm import TokensPrompt
except ImportError:
from vllm.inputs.data import TokensPrompt


class StubEngineClient:
"""
Expand Down
10 changes: 8 additions & 2 deletions examples/multimodal/utils/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,21 @@
from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
from pydantic_core import core_schema
from typing_extensions import NotRequired
from vllm.inputs.data import TokensPrompt
from vllm.logprobs import PromptLogprobs
from vllm.multimodal.inputs import MultiModalUUIDDict # noqa: F401
from vllm.outputs import CompletionOutput
from vllm.sampling_params import SamplingParams
from vllm.v1.metrics.stats import RequestStateStats

import dynamo.nixl_connect as connect

# Try importing from new vLLM (https://github.com/vllm-project/vllm/pull/35182), fallback to old structure
try:
from vllm.inputs.llm import MultiModalUUIDDict # noqa: F401
from vllm.inputs.llm import TokensPrompt
except ImportError:
from vllm.inputs.data import TokensPrompt
from vllm.multimodal.inputs import MultiModalUUIDDict # noqa: F401


class Request(BaseModel):
prompt: str
Expand Down
Loading