-
Notifications
You must be signed in to change notification settings - Fork 1.1k
[Log] Wire stat loggers into AsyncOmniEngine to match AsyncLLM #2551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
016a29f
1adf3f5
c097462
7619dee
152a8b0
ff957d9
41d5dbc
7dfe1c6
b678295
8941aca
7ac4bfe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| """Guard tests for AsyncOmniEngine.do_log_stats edge cases. | ||
|
|
||
| These are pure-Python tests that bypass __init__ and only exercise the | ||
| no-op branches of do_log_stats, so no stage cores / threads are needed. | ||
| """ | ||
|
|
||
| import asyncio | ||
|
|
||
| import pytest | ||
|
|
||
| from vllm_omni.engine.async_omni_engine import AsyncOmniEngine | ||
|
|
||
| pytestmark = [pytest.mark.core_model, pytest.mark.cpu] | ||
|
|
||
|
|
||
| def _make_bare_engine() -> AsyncOmniEngine: | ||
| # Bypass __init__ so we don't spin up stage cores; we only need the | ||
| # attributes do_log_stats touches. | ||
| return AsyncOmniEngine.__new__(AsyncOmniEngine) | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_do_log_stats_noop_when_manager_missing(): | ||
| engine = _make_bare_engine() | ||
| engine.logger_manager = None | ||
| engine.orchestrator_loop = None | ||
| await engine.do_log_stats() # should silently return | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_do_log_stats_noop_when_loop_missing(): | ||
| engine = _make_bare_engine() | ||
|
|
||
| class _Manager: | ||
| def log(self) -> None: # pragma: no cover - must not be called | ||
| raise AssertionError("log() should not be called without a loop") | ||
|
|
||
| engine.logger_manager = _Manager() | ||
| engine.orchestrator_loop = None | ||
| await engine.do_log_stats() | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_do_log_stats_noop_when_loop_not_running(): | ||
| engine = _make_bare_engine() | ||
|
|
||
| class _Manager: | ||
| def log(self) -> None: # pragma: no cover - must not be called | ||
| raise AssertionError("log() should not be called on a stopped loop") | ||
|
|
||
| dead_loop = asyncio.new_event_loop() | ||
| dead_loop.close() | ||
|
|
||
| engine.logger_manager = _Manager() | ||
| engine.orchestrator_loop = dead_loop | ||
| await engine.do_log_stats() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,8 @@ | |
| from vllm.pooling_params import PoolingParams | ||
| from vllm.sampling_params import SamplingParams | ||
| from vllm.v1.engine import EngineCoreOutputs | ||
| from vllm.v1.metrics.loggers import StatLoggerManager | ||
| from vllm.v1.metrics.stats import IterationStats | ||
|
|
||
| from vllm_omni.distributed.omni_connectors.adapter import compute_talker_prompt_ids_length | ||
| from vllm_omni.engine import ( | ||
|
|
@@ -122,6 +124,7 @@ def __init__( | |
| stage_vllm_configs: list[Any], | ||
| *, | ||
| async_chunk: bool = False, | ||
| logger_manager: StatLoggerManager | None = None, | ||
| ) -> None: | ||
| self.request_async_queue = request_async_queue | ||
| self.output_async_queue = output_async_queue | ||
|
|
@@ -133,6 +136,8 @@ def __init__( | |
| self.stage_clients: list[Any] = stage_clients | ||
| self.output_processors: list[Any] = output_processors | ||
| self.stage_vllm_configs: list[Any] = stage_vllm_configs | ||
| self.logger_manager: StatLoggerManager | None = logger_manager | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is log_stats still useful in orchestrator? Could we just |
||
| self.log_stats = self.logger_manager is not None | ||
|
|
||
| # Per-request state | ||
| self.request_states: dict[str, OrchestratorRequestState] = {} | ||
|
|
@@ -624,10 +629,13 @@ async def _process_stage_outputs(self, stage_id: int, raw_outputs: EngineCoreOut | |
| """ | ||
| processor = self.output_processors[stage_id] | ||
|
|
||
| num_outputs = len(raw_outputs.outputs) | ||
| iteration_stats = IterationStats() if (self.log_stats and num_outputs) else None | ||
|
|
||
| processed = processor.process_outputs( | ||
| raw_outputs.outputs, | ||
| raw_outputs.timestamp, | ||
| None, | ||
| iteration_stats, | ||
| ) | ||
|
|
||
| if processed.reqs_to_abort: | ||
|
|
@@ -636,6 +644,22 @@ async def _process_stage_outputs(self, stage_id: int, raw_outputs: EngineCoreOut | |
| if raw_outputs.scheduler_stats is not None: | ||
| processor.update_scheduler_stats(raw_outputs.scheduler_stats) | ||
|
|
||
| # Mirror vLLM AsyncLLM output_handler: feed stats to the logger | ||
| # manager so LoggingStatLogger can periodically print KV cache / | ||
| # prefix cache hit rate, and PrometheusStatLogger can publish. | ||
| if self.logger_manager is not None: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The diffusion engine don't go into this branch. Do we have any plan for diffusion?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, no idea about diffusion logger. Reusing vLLM's logger makes this PR simple. But something like KV cache isn't appropriate to diffusion. |
||
| try: | ||
| self.logger_manager.record( | ||
| engine_idx=stage_id, | ||
| scheduler_stats=raw_outputs.scheduler_stats, | ||
| iteration_stats=iteration_stats, | ||
| ) | ||
| except Exception: | ||
| logger.exception( | ||
| "[Orchestrator] stat logger record failed for stage-%s", | ||
| stage_id, | ||
| ) | ||
|
|
||
| return processed.request_outputs | ||
|
|
||
| async def _handle_add_request(self, msg: dict[str, Any]) -> None: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall this looks fine to me. If the
StatLoggerManagerconcurrency issue has been properly resolved, I don't have other blockers.One small nit: this seems to rely too heavily on the
stage0configuration, which feels somewhat awkward. Probably okay for now, but worth cleaning up later. cc @yinpeiqiAlso, it may be worth taking another look at the logging/stat system for the diffusion path in a follow-up as well, since it seems not fully covered by the current branch yet. @chickeyton