Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions vllm/renderers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,17 @@ def warmup(self, chat_params: ChatParams) -> None:
from vllm.entrypoints.chat_utils import ChatTemplateResolutionError

try:
logger.info("Warming up chat template processing...")
logger.debug("Warming up chat template processing...")
start_time = time.perf_counter()

self.render_chat([[{"role": "user", "content": "warmup"}]], chat_params)

elapsed = time.perf_counter() - start_time
logger.info("Chat template warmup completed in %.3fs", elapsed)
logger.debug("Chat template warmup completed in %.3fs", elapsed)
except ChatTemplateResolutionError:
logger.info("This model does not support chat template.")
logger.debug("This model does not support chat template.")
except Exception:
logger.exception("Chat template warmup failed")
logger.warning("Chat template warmup failed", exc_info=True)

if self.mm_processor:
from vllm.multimodal.processing import TimingContext
Expand All @@ -200,7 +200,7 @@ def warmup(self, chat_params: ChatParams) -> None:
mm_limits = processor.info.allowed_mm_limits

try:
logger.info("Warming up multi-modal processing...")
logger.debug("Warming up multi-modal processing...")
start_time = time.perf_counter()

processor_inputs = processor.dummy_inputs.get_dummy_processor_inputs(
Expand All @@ -209,14 +209,13 @@ def warmup(self, chat_params: ChatParams) -> None:
mm_options=mm_config.limit_per_prompt,
)
_ = processor.apply(
processor_inputs,
timing_ctx=TimingContext(enabled=False),
processor_inputs, timing_ctx=TimingContext(enabled=False)
)

elapsed = time.perf_counter() - start_time
logger.info("Multi-modal warmup completed in %.3fs", elapsed)
except Exception:
logger.exception("Multi-modal warmup failed")
logger.warning("Multi-modal warmup failed")
finally:
self.clear_mm_cache()

Expand Down
Loading