Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 0 additions & 122 deletions python/sglang/srt/managers/async_mm_data_processor.py

This file was deleted.

14 changes: 4 additions & 10 deletions python/sglang/srt/managers/tokenizer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
from sglang.srt.environ import envs
from sglang.srt.lora.lora_registry import LoRARef, LoRARegistry
from sglang.srt.managers.async_dynamic_batch_tokenizer import AsyncDynamicbatchTokenizer
from sglang.srt.managers.async_mm_data_processor import AsyncMMDataProcessor
from sglang.srt.managers.disagg_service import start_disagg_service
from sglang.srt.managers.io_struct import (
AbortReq,
Expand Down Expand Up @@ -271,11 +270,6 @@ def init_tokenizer_and_processor(self):
self.mm_processor = get_mm_processor(
self.model_config.hf_config, server_args, _processor, transport_mode
)
self.mm_data_processor = AsyncMMDataProcessor(
self.mm_processor,
max_concurrent_calls=self.server_args.mm_max_concurrent_calls,
timeout_s=self.server_args.mm_per_request_timeout,
)

if server_args.skip_tokenizer_init:
self.tokenizer = self.processor = None
Expand Down Expand Up @@ -734,10 +728,10 @@ async def _tokenize_one_request(
need_wait_for_mm_inputs=obj.need_wait_for_mm_inputs,
)
if mm_inputs is None:
mm_inputs: Dict = await self.mm_data_processor.process(
mm_inputs: Dict = await self.mm_processor.process_mm_data_async(
image_data=obj.image_data,
audio_data=obj.audio_data,
input_text_or_ids=(input_text or input_ids),
input_text=(input_text or input_ids),
request_obj=obj,
max_req_input_len=self.max_req_input_len,
)
Expand All @@ -748,10 +742,10 @@ async def _tokenize_one_request(
):
# In language_only mode with zmq_to_scheduler, if we didn't dispatch
# to encoder (e.g., only one image), process locally like non-language_only mode
mm_inputs: Dict = await self.mm_data_processor.process(
mm_inputs: Dict = await self.mm_processor.process_mm_data_async(
image_data=obj.image_data,
audio_data=obj.audio_data,
input_text_or_ids=(input_text or input_ids),
input_text=(input_text or input_ids),
request_obj=obj,
max_req_input_len=self.max_req_input_len,
)
Expand Down
5 changes: 4 additions & 1 deletion python/sglang/srt/multimodal/processors/llava.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import os
from typing import Dict, List, Optional, Union

import numpy as np
Expand Down Expand Up @@ -96,14 +97,16 @@ async def _process_single_image(
):
if self.cpu_executor is not None:
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
fut = loop.run_in_executor(
self.cpu_executor,
LlavaImageProcessor._process_single_image_task,
image_data,
aspect_ratio,
grid_pinpoints,
self._processor,
)
timeout = int(os.environ.get("REQUEST_TIMEOUT", "10"))
return await asyncio.wait_for(fut, timeout=timeout)
else:
return self._process_single_image_task(
image_data,
Expand Down
14 changes: 0 additions & 14 deletions python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,8 +720,6 @@ class ServerArgs:
sm_group_num: int = 8

# For Multi-Modal
mm_max_concurrent_calls: int = 32
mm_per_request_timeout: float = 10.0
enable_broadcast_mm_inputs_process: bool = False
enable_prefix_mm_cache: bool = False
mm_enable_dp_encoder: bool = False
Expand Down Expand Up @@ -5809,18 +5807,6 @@ def add_cli_args(parser: argparse.ArgumentParser):
)

# For Multi-Modal
parser.add_argument(
"--mm-max-concurrent-calls",
type=int,
default=ServerArgs.mm_max_concurrent_calls,
help="The max concurrent calls for async mm data processing.",
)
parser.add_argument(
"--mm-per-request-timeout",
type=int,
default=ServerArgs.mm_per_request_timeout,
help="The timeout for each multi-modal request in seconds.",
)
parser.add_argument(
"--enable-broadcast-mm-inputs-process",
action="store_true",
Expand Down
Loading
Loading