Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion grpc_servicer/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ classifiers = [
]

[project.optional-dependencies]
vllm = ["vllm>=0.17.0"]
vllm = ["vllm>=0.19.0"]
sglang = ["sglang>=0.5.10rc0"]

[project.urls]
Expand Down
15 changes: 6 additions & 9 deletions grpc_servicer/smg_grpc_servicer/vllm/servicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,14 @@
from transformers import BatchFeature
from vllm import PoolingParams, SamplingParams, TokensPrompt
from vllm.engine.protocol import EngineClient
from vllm.inputs import token_inputs
from vllm.inputs.engine import MultiModalInput as VllmMultiModalInput
from vllm.inputs.engine import mm_input, tokens_input
Comment thread
coderabbitai[bot] marked this conversation as resolved.
from vllm.logger import init_logger
from vllm.logprobs import PromptLogprobs, SampleLogprobs
from vllm.multimodal.inputs import (
MultiModalFieldConfig,
MultiModalKwargsItems,
PlaceholderRange,
mm_inputs,
)
from vllm.multimodal.inputs import (
MultiModalInputs as VllmMultiModalInputs,
)
from vllm.outputs import CompletionOutput, RequestOutput
from vllm.sampling_params import RequestOutputKind, StructuredOutputsParams
Expand Down Expand Up @@ -210,7 +207,7 @@ async def Embed(
if not request.HasField("tokenized"):
raise ValueError("EmbedRequest requires tokenized input")

prompt = token_inputs(
prompt = tokens_input(
prompt_token_ids=list(request.tokenized.input_ids),
prompt=request.tokenized.original_text or None,
)
Expand Down Expand Up @@ -367,8 +364,8 @@ def _build_preprocessed_mm_inputs(
self,
tokenized: vllm_engine_pb2.TokenizedInput,
mm_proto: vllm_engine_pb2.MultimodalInputs,
) -> VllmMultiModalInputs:
"""Build vLLM MultiModalInputs from preprocessed proto data.
) -> VllmMultiModalInput:
"""Build vLLM MultiModalInput from preprocessed proto data.

Comment thread
CatherineSue marked this conversation as resolved.
Bypasses HF processor entirely — pixel values and model-specific
tensors were already computed by the Rust router. Field layouts
Expand Down Expand Up @@ -450,7 +447,7 @@ def _build_preprocessed_mm_inputs(
)
mm_placeholders["image"] = placeholders

return mm_inputs(
return mm_input(
prompt_token_ids=prompt_token_ids,
mm_kwargs=mm_kwargs,
mm_hashes=mm_hashes,
Expand Down
Loading