Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/entrypoints/openai/test_lora_resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ async def mock_generate(*args, **kwargs):
mock_engine.add_lora.reset_mock()

mock_engine.model_config = MockModelConfig()
mock_engine.processor = MagicMock()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

models = OpenAIServingModels(
Expand Down
14 changes: 7 additions & 7 deletions tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ async def test_serving_chat_returns_correct_model_name():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.processor = MagicMock()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

serving_chat = _build_serving_chat(mock_engine)
Expand Down Expand Up @@ -459,7 +459,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.processor = MagicMock()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

serving_chat = _build_serving_chat(mock_engine)
Expand Down Expand Up @@ -492,7 +492,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.processor = MagicMock()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

# Initialize the serving chat
Expand Down Expand Up @@ -537,7 +537,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.processor = MagicMock()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

# Initialize the serving chat
Expand Down Expand Up @@ -583,7 +583,7 @@ async def test_serving_chat_could_load_correct_generation_config():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.processor = MagicMock()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

# Initialize the serving chat
Expand Down Expand Up @@ -629,7 +629,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.processor = MagicMock()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

serving_chat = _build_serving_chat(mock_engine)
Expand Down Expand Up @@ -662,7 +662,7 @@ async def test_serving_chat_data_parallel_rank_extraction():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.processor = MagicMock()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

# Mock the generate method to return an async generator
Expand Down
2 changes: 1 addition & 1 deletion tests/entrypoints/openai/test_serving_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def serving() -> OpenAIServing:
model_config.max_model_len = 32768
models = Mock(spec=OpenAIServingModels)
models.model_config = model_config
models.processor = Mock()
models.input_processor = Mock()
models.io_processor = Mock()

serving = OpenAIServing(
Expand Down
2 changes: 1 addition & 1 deletion tests/entrypoints/openai/test_serving_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async def _async_serving_models_init() -> OpenAIServingModels:
mock_model_config = MagicMock(spec=ModelConfig)
mock_model_config.max_model_len = 2048
mock_engine_client.model_config = mock_model_config
mock_engine_client.processor = MagicMock()
mock_engine_client.input_processor = MagicMock()
mock_engine_client.io_processor = MagicMock()

serving_models = OpenAIServingModels(
Expand Down
4 changes: 2 additions & 2 deletions tests/entrypoints/openai/test_serving_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ async def serving_responses_instance(self):
model_config.get_diff_sampling_param.return_value = {}
engine_client.model_config = model_config

engine_client.processor = MagicMock()
engine_client.input_processor = MagicMock()
engine_client.io_processor = MagicMock()

models = MagicMock()
Expand Down Expand Up @@ -213,7 +213,7 @@ async def serving_responses_instance(self):
model_config.get_diff_sampling_param.return_value = {}
engine_client.model_config = model_config

engine_client.processor = MagicMock()
engine_client.input_processor = MagicMock()
engine_client.io_processor = MagicMock()

models = MagicMock()
Expand Down
8 changes: 4 additions & 4 deletions tests/v1/engine/test_processor_multi_modal_uuids.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from vllm.assets.video import VideoAsset
from vllm.config import CacheConfig, DeviceConfig, ModelConfig, VllmConfig
from vllm.sampling_params import SamplingParams
from vllm.v1.engine import processor as processor_mod
from vllm.v1.engine.processor import Processor
from vllm.v1.engine import input_processor as processor_mod
from vllm.v1.engine.input_processor import InputProcessor

cherry_pil_image = ImageAsset("cherry_blossom").pil_image
stop_pil_image = ImageAsset("stop_sign").pil_image
Expand All @@ -18,7 +18,7 @@
# Mock processor for testing
def _mk_processor(
monkeypatch, *, mm_cache_gb: float = 4.0, enable_prefix_caching: bool = True
) -> Processor:
) -> InputProcessor:
"""
Create a Processor instance with minimal configuration suitable for unit
tests without accessing external resources.
Expand Down Expand Up @@ -65,7 +65,7 @@ def __init__(self, gb: float):
device_config=DeviceConfig(device="cpu"),
)

return Processor(vllm_config, tokenizer=None)
return InputProcessor(vllm_config, tokenizer=None)


def test_multi_modal_uuids_length_mismatch_raises(monkeypatch):
Expand Down
4 changes: 2 additions & 2 deletions vllm/engine/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
from vllm.tasks import SupportedTask
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.processor import Processor
from vllm.v1.engine.input_processor import InputProcessor


class EngineClient(ABC):
"""Protocol class for Clients to Engine"""

vllm_config: VllmConfig
model_config: ModelConfig
processor: Processor
input_processor: InputProcessor
io_processor: IOProcessor | None

@property
Expand Down
6 changes: 3 additions & 3 deletions vllm/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def __init__(
self.supported_tasks = supported_tasks

self.model_config = self.llm_engine.model_config
self.processor = self.llm_engine.processor
self.input_processor = self.llm_engine.input_processor
self.io_processor = self.llm_engine.io_processor

def get_tokenizer(self) -> AnyTokenizer:
Expand All @@ -364,7 +364,7 @@ def set_tokenizer(self, tokenizer: AnyTokenizer) -> None:
self.llm_engine.tokenizer = get_cached_tokenizer(tokenizer)

def reset_mm_cache(self) -> None:
self.processor.clear_mm_cache()
self.input_processor.clear_mm_cache()
self.llm_engine.reset_mm_cache()

def get_default_sampling_params(self) -> SamplingParams:
Expand Down Expand Up @@ -1674,7 +1674,7 @@ def _process_inputs(
tokenization_kwargs,
)

engine_request = self.processor.process_inputs(
engine_request = self.input_processor.process_inputs(
request_id,
engine_prompt,
params,
Expand Down
8 changes: 4 additions & 4 deletions vllm/entrypoints/openai/serving_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def __init__(
self._async_tokenizer_pool: dict[AnyTokenizer, AsyncMicrobatchTokenizer] = {}
self.log_error_stack = log_error_stack

self.processor = self.models.processor
self.input_processor = self.models.input_processor
self.io_processor = self.models.io_processor
self.model_config = self.models.model_config
self.max_model_len = self.model_config.max_model_len
Expand Down Expand Up @@ -330,7 +330,7 @@ def _get_reasoning_parser(
return parser

async def reset_mm_cache(self) -> None:
self.processor.clear_mm_cache()
self.input_processor.clear_mm_cache()
await self.engine_client.reset_mm_cache()

async def beam_search(
Expand All @@ -348,7 +348,7 @@ async def beam_search(
length_penalty = params.length_penalty
include_stop_str_in_output = params.include_stop_str_in_output

processor = self.processor
processor = self.input_processor
tokenizer = processor.tokenizer
if tokenizer is None:
raise ValueError(
Expand Down Expand Up @@ -1214,7 +1214,7 @@ async def _process_inputs(
self.max_model_len, params.truncate_prompt_tokens, tokenization_kwargs
)

engine_request = self.processor.process_inputs(
engine_request = self.input_processor.process_inputs(
request_id,
engine_prompt,
params,
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(
)
self.lora_resolver_lock: dict[str, Lock] = defaultdict(Lock)

self.processor = self.engine_client.processor
self.input_processor = self.engine_client.input_processor
self.io_processor = self.engine_client.io_processor
self.model_config = self.engine_client.model_config
self.max_model_len = self.model_config.max_model_len
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/nemotron_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from vllm.multimodal.image import convert_image_mode
from vllm.multimodal.processing import PromptUpdateDetails
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.processor import cached_image_processor_from_config
from vllm.transformers_utils import cached_image_processor_from_config
Comment thread
DarkLight1337 marked this conversation as resolved.
Outdated
from vllm.transformers_utils.tokenizer import AnyTokenizer

from .interfaces import (
Expand Down
14 changes: 7 additions & 7 deletions vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
from vllm.v1.engine.input_processor import InputProcessor
from vllm.v1.engine.output_processor import OutputProcessor, RequestOutputCollector
from vllm.v1.engine.parallel_sampling import ParentRequest
from vllm.v1.engine.processor import Processor
from vllm.v1.executor import Executor
from vllm.v1.metrics.loggers import (
StatLoggerFactory,
Expand Down Expand Up @@ -112,7 +112,7 @@ def __init__(
else:
tokenizer = init_tokenizer_from_configs(self.model_config)

self.processor = Processor(self.vllm_config, tokenizer)
self.input_processor = InputProcessor(self.vllm_config, tokenizer)
self.io_processor = get_io_processor(
self.vllm_config,
self.model_config.io_processor_plugin,
Expand Down Expand Up @@ -297,7 +297,7 @@ async def add_request(
"Processor has been moved under OpenAIServing and will "
"be removed from AsyncLLM in v0.13."
)
request = self.processor.process_inputs(
request = self.input_processor.process_inputs(
request_id,
prompt,
params,
Expand Down Expand Up @@ -481,7 +481,7 @@ def _run_output_handler(self):
output_processor = self.output_processor
log_stats = self.log_stats
logger_manager = self.logger_manager
processor = self.processor
processor = self.input_processor

async def output_handler():
try:
Expand Down Expand Up @@ -699,11 +699,11 @@ async def encode(

@property
def tokenizer(self) -> AnyTokenizer | None:
return self.processor.tokenizer
return self.input_processor.tokenizer

@tokenizer.setter
def tokenizer(self, tokenizer: AnyTokenizer | None) -> None:
self.processor.tokenizer = tokenizer
self.input_processor.tokenizer = tokenizer

async def get_tokenizer(self) -> AnyTokenizer:
if self.tokenizer is None:
Expand Down Expand Up @@ -738,7 +738,7 @@ async def stop_profile(self) -> None:
await asyncio.gather(*coros)

async def reset_mm_cache(self) -> None:
self.processor.clear_mm_cache()
self.input_processor.clear_mm_cache()
await self.engine_core.reset_mm_cache_async()

async def reset_prefix_cache(self) -> None:
Expand Down
Loading
Loading