diff --git a/grpc_servicer/pyproject.toml b/grpc_servicer/pyproject.toml index 28b0c3120..0f37d04ec 100644 --- a/grpc_servicer/pyproject.toml +++ b/grpc_servicer/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ ] [project.optional-dependencies] -vllm = ["vllm>=0.17.0"] +vllm = ["vllm>=0.19.0"] sglang = ["sglang>=0.5.10rc0"] [project.urls] diff --git a/grpc_servicer/smg_grpc_servicer/vllm/servicer.py b/grpc_servicer/smg_grpc_servicer/vllm/servicer.py index 2a4acfe01..31d1be63e 100755 --- a/grpc_servicer/smg_grpc_servicer/vllm/servicer.py +++ b/grpc_servicer/smg_grpc_servicer/vllm/servicer.py @@ -15,17 +15,14 @@ from transformers import BatchFeature from vllm import PoolingParams, SamplingParams, TokensPrompt from vllm.engine.protocol import EngineClient -from vllm.inputs import token_inputs +from vllm.inputs.engine import MultiModalInput as VllmMultiModalInput +from vllm.inputs.engine import mm_input, tokens_input from vllm.logger import init_logger from vllm.logprobs import PromptLogprobs, SampleLogprobs from vllm.multimodal.inputs import ( MultiModalFieldConfig, MultiModalKwargsItems, PlaceholderRange, - mm_inputs, -) -from vllm.multimodal.inputs import ( - MultiModalInputs as VllmMultiModalInputs, ) from vllm.outputs import CompletionOutput, RequestOutput from vllm.sampling_params import RequestOutputKind, StructuredOutputsParams @@ -210,7 +207,7 @@ async def Embed( if not request.HasField("tokenized"): raise ValueError("EmbedRequest requires tokenized input") - prompt = token_inputs( + prompt = tokens_input( prompt_token_ids=list(request.tokenized.input_ids), prompt=request.tokenized.original_text or None, ) @@ -367,8 +364,8 @@ def _build_preprocessed_mm_inputs( self, tokenized: vllm_engine_pb2.TokenizedInput, mm_proto: vllm_engine_pb2.MultimodalInputs, - ) -> VllmMultiModalInputs: - """Build vLLM MultiModalInputs from preprocessed proto data. + ) -> VllmMultiModalInput: + """Build vLLM MultiModalInput from preprocessed proto data. Bypasses HF processor entirely — pixel values and model-specific tensors were already computed by the Rust router. Field layouts @@ -450,7 +447,7 @@ def _build_preprocessed_mm_inputs( ) mm_placeholders["image"] = placeholders - return mm_inputs( + return mm_input( prompt_token_ids=prompt_token_ids, mm_kwargs=mm_kwargs, mm_hashes=mm_hashes,