[UX][Metrics] improving vllm-omni metrics by allgather · Pull Request #3042 · vllm-project/vllm-omni

allgather · 2026-04-22T20:49:34Z

Motivation

#3039 proposed changes for omni metrics based on pain points discovered in #2834

Changes

Changes are based off of the proposal in #3039

Testing

ut passed


from __future__ import annotations

import importlib.util
import logging
import sys
import types
from pathlib import Path

import pytest

pytestmark = [pytest.mark.core_model, pytest.mark.cpu]

REPO_ROOT = Path(__file__).resolve().parents[2]
UTILS_PATH = REPO_ROOT / "vllm_omni/metrics/utils.py"
STATS_PATH = REPO_ROOT / "vllm_omni/metrics/stats.py"


def load_stats_module():
    if "vllm_omni.metrics.stats" in sys.modules:
        return sys.modules["vllm_omni.metrics.stats"]

    prettytable_module = types.ModuleType("prettytable")

    class PrettyTable:
        def __init__(self) -> None:
            self.field_names: list[str] = []
            self.align: dict[str, str] = {}
            self.rows: list[list[str]] = []

        def add_row(self, row: list[str]) -> None:
            self.rows.append(row)

        def get_string(self) -> str:
            table_lines = [" | ".join(map(str, self.field_names))]
            table_lines.extend(" | ".join(map(str, row)) for row in self.rows)
            return "\n".join(table_lines)

    prettytable_module.PrettyTable = PrettyTable
    sys.modules.setdefault("prettytable", prettytable_module)

    vllm_module = types.ModuleType("vllm")
    logger_module = types.ModuleType("vllm.logger")
    logger_module.init_logger = logging.getLogger
    vllm_module.logger = logger_module
    sys.modules.setdefault("vllm", vllm_module)
    sys.modules["vllm.logger"] = logger_module

    vllm_omni_pkg = sys.modules.setdefault("vllm_omni", types.ModuleType("vllm_omni"))
    vllm_omni_pkg.__path__ = [str(REPO_ROOT / "vllm_omni")]
    metrics_pkg = sys.modules.setdefault("vllm_omni.metrics", types.ModuleType("vllm_omni.metrics"))
    metrics_pkg.__path__ = [str(REPO_ROOT / "vllm_omni/metrics")]

    utils_spec = importlib.util.spec_from_file_location("vllm_omni.metrics.utils", UTILS_PATH)
    assert utils_spec is not None and utils_spec.loader is not None
    utils_module = importlib.util.module_from_spec(utils_spec)
    sys.modules["vllm_omni.metrics.utils"] = utils_module
    utils_spec.loader.exec_module(utils_module)

    stats_spec = importlib.util.spec_from_file_location("vllm_omni.metrics.stats", STATS_PATH)
    assert stats_spec is not None and stats_spec.loader is not None
    stats_module = importlib.util.module_from_spec(stats_spec)
    sys.modules["vllm_omni.metrics.stats"] = stats_module
    stats_spec.loader.exec_module(stats_module)
    return stats_module


def get_request_entry(table: list[dict], request_id: str) -> dict:
    for entry in table:
        if entry.get("request_id") == request_id:
            return entry
    raise AssertionError(f"request_id={request_id} not found")


def test_orchestrator_aggregator_reports_omni_timing_fields_and_line() -> None:
    stats_module = load_stats_module()
    OrchestratorAggregator = stats_module.OrchestratorAggregator
    RequestE2EStats = stats_module.RequestE2EStats
    StageRequestStats = stats_module.StageRequestStats
    StageStats = stats_module.StageStats

    agg = OrchestratorAggregator(
        num_stages=2,
        log_stats=True,
        wall_start_ts=0.0,
        final_stage_id_for_e2e=1,
        stage_labels={0: "0:ar", 1: "1:diffusion"},
    )
    agg.input_preprocess_time_ms = 2796.015
    agg.build_add_request_message_time_ms = 3010.125
    agg.e2e_total_ms = 30189.544
    agg.e2e_total_tokens = 1281
    agg.e2e_count = 1
    agg.last_finish_ts = 32.985559
    agg.stage_first_ts[0] = 0.0
    agg.stage_last_ts[0] = 16.227201
    agg.stage_first_ts[1] = 16.227201
    agg.stage_last_ts[1] = 30.187782

    agg.on_stage_metrics(
        0,
        "r1",
        StageRequestStats(
            batch_id=1,
            batch_size=1,
            num_tokens_in=1281,
            num_tokens_out=1281,
            stage_gen_time_ms=16227.201,
            rx_transfer_bytes=0,
            rx_decode_time_ms=0.0,
            rx_in_flight_time_ms=0.0,
            stage_stats=StageStats(),
        ),
    )
    agg.on_stage_metrics(
        1,
        "r1",
        StageRequestStats(
            batch_id=1,
            batch_size=1,
            num_tokens_in=0,
            num_tokens_out=0,
            stage_gen_time_ms=13960.581,
            rx_transfer_bytes=0,
            rx_decode_time_ms=0.0,
            rx_in_flight_time_ms=0.0,
            stage_stats=StageStats(),
        ),
    )
    agg.on_forward(0, 1, "r1", size_bytes=1024, tx_ms=0.78, used_shm=False)
    agg.e2e_events.append(
        RequestE2EStats(
            request_id="r1",
            e2e_total_ms=30189.544,
            e2e_total_tokens=1281,
            transfers_total_time_ms=0.78,
            transfers_total_bytes=1024,
        )
    )

    summary = agg.build_and_log_summary()
    overall = summary["overall_summary"]
    assert overall["request_wall_time_ms"] == pytest.approx(32985.559)
    assert overall["input_preprocess_time_ms"] == pytest.approx(2796.015)
    assert overall["build_add_request_message_time_ms"] == pytest.approx(3010.125)
    assert overall["engine_pipeline_time_ms"] == pytest.approx(30189.544)

    e2e_entry = get_request_entry(summary["e2e_table"], "r1")
    assert e2e_entry["request_wall_time_ms"] == pytest.approx(32985.559)
    assert e2e_entry["input_preprocess_time_ms"] == pytest.approx(2796.015)
    assert e2e_entry["build_add_request_message_time_ms"] == pytest.approx(3010.125)
    assert e2e_entry["engine_pipeline_time_ms"] == pytest.approx(30189.544)

    assert agg.format_request_timing_line("r1") == (
        "[OmniTiming] req=r1 total=32.99s preprocess=2.80s engine=30.19s "
        "stages=[0:ar=16.23s,1:diffusion=13.96s] transfers=[0->1=0.78ms]"
    )

Signed-off-by: allgather <all2allops@gmail.com>

chatgpt-codex-connector · 2026-04-22T20:49:38Z

Codex usage limits have been reached for code reviews. Please check with the admins of this repo to increase the limits by adding credits.
Credits must be used to enable repository wide code reviews.

Signed-off-by: allgather <all2allops@gmail.com>

hsliuustc0106

BLOCKING:

Correctness — In async_omni_engine.py, both input_preprocess_time_ms and build_add_request_message_time_ms are set to the same variable (build_add_request_message_time_ms). RFC #3039 specifies these should be separate: input_preprocess_time_ms for InputProcessor.process_inputs() and build_add_request_message_time_ms for the full function.
Test Coverage — PR description states "I need someone to test bc I can't find available compute rn." For metrics changes that affect timing reporting, add a unit test verifying the timing format and field values are correct.

Signed-off-by: allgather <all2allops@gmail.com>

lishunyang12 · 2026-04-23T04:56:25Z

Thanks for your contribution! Would you mind showing log output before and after?

Signed-off-by: allgather <all2allops@gmail.com>

allgather · 2026-04-23T22:23:32Z

@lishunyang12 @hsliuustc0106

Tested GLM-Image, Qwen3 Omni, Wan-2.2 all on 8xh100\

GLM-Image log output

Server command:

CUDA_VISIBLE_DEVICES=0,1 HF_HOME=$HF_HOME HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN HF_TOKEN=$HF_TOKEN vllm serve zai-org/GLM-Image --omni --port 8091 --log-stats

Prompt / request:

A small red robot holding a timing chart
POST /v1/chat/completions with height=1024, width=1024, num_inference_steps=20, true_cfg_scale=4.0, seed=42

OmniTiming:

(APIServer pid=10017) INFO 04-23 21:44:20 [omni_base.py:253] [OmniTiming] req=chatcmpl-a1a0742ab3ad9655 total=27.55s preprocess=2.69s engine=24.86s stages=[0:ar=19.01s,1:diffusion=5.85s]

StageTiming:

INFO 04-23 21:44:20 [diffusion_engine.py:177] [StageTiming stage=1 diffusion] total=5.83s preprocess=0.01ms exec=5.80s postprocess=28.10ms

Overall Summary:

(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] [Overall Summary]
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | Field                             |      Value |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_requests                      |          1 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | request_wall_time_ms              | 27,549.833 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | input_preprocess_time_ms          |  2,693.649 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | build_add_request_message_time_ms |  2,693.754 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | engine_pipeline_time_ms           | 24,856.184 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_wall_time_ms                  | 27,550.202 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_total_tokens                  |      1,300 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_avg_time_per_request_ms       | 27,550.202 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_avg_tokens_per_s              |     47.187 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_stage_0_wall_time_ms          | 19,006.905 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_stage_1_wall_time_ms          |  5,848.652 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] +-----------------------------------+------------+

Timing Composition:

(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] [Omni Timing Composition]
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] | Field   |              input_preprocess | engine_pipeline |                       request_wall |           request_message_build |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] | time_ms |                     2,693.649 |      24,856.184 |                         27,549.833 |                       2,693.754 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] | scope   | InputProcessor.process_inputs |  stage pipeline | input_preprocess + engine_pipeline | full _build_add_request_message |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634]

RequestE2EStats excerpt:

(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] [RequestE2EStats [request_id=chatcmpl-a1a0742ab3ad9655]]
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | Field                             |      Value |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | build_add_request_message_time_ms |  2,693.754 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | e2e_total_ms                      | 24,856.184 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | e2e_total_tokens                  |      1,300 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | engine_pipeline_time_ms           | 24,856.184 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | input_preprocess_time_ms          |  2,693.649 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | request_wall_time_ms              | 27,549.833 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:691] 
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:691] [StageRequestStats [request_id=chatcmpl-a1a0742ab3ad9655]]
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:691] +-------------------+------------+-------------+

Qwen3-Omni log output

Model: Qwen/Qwen3-Omni-30B-A3B-Instruct
Server command:

CUDA_VISIBLE_DEVICES=0,1 HF_HOME=$HF_HOME HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN HF_TOKEN=$HF_TOKEN vllm serve Qwen/Qwen3-Omni-30B-A3B-Instruct --omni --port 8091 --log-stats

Prompt / request:

Say one short sentence about timing instrumentation.
POST /v1/chat/completions with short text-only request and bounded sampling_params_list for thinker/talker/code2wav

OmniTiming:

(APIServer pid=12672) INFO 04-23 21:47:29 [omni_base.py:253] [OmniTiming] req=chatcmpl-ac59cea85b422f04 total=1.40s preprocess=0.01s engine=1.39s stages=[0:thinker=0.22s,1:talker=1.33s,2:code2wav=1.39s]

Overall Summary:

(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] [Overall Summary]
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | Field                             |     Value |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_requests                      |         1 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | request_wall_time_ms              | 1,396.428 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | input_preprocess_time_ms          |     8.002 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | build_add_request_message_time_ms |     8.120 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | engine_pipeline_time_ms           | 1,388.426 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_wall_time_ms                  | 1,396.649 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_total_tokens                  |       148 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_avg_time_per_request_ms       | 1,396.649 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_avg_tokens_per_s              |   105.968 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_stage_0_wall_time_ms          |   224.958 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_stage_1_wall_time_ms          | 1,331.344 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_stage_2_wall_time_ms          | 1,387.487 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] +-----------------------------------+-----------+

Timing Composition:

(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] [Omni Timing Composition]
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] | Field   |              input_preprocess | engine_pipeline |                       request_wall |           request_message_build |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] | time_ms |                         8.002 |       1,388.426 |                          1,396.428 |                           8.120 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] | scope   | InputProcessor.process_inputs |  stage pipeline | input_preprocess + engine_pipeline | full _build_add_request_message |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634]

RequestE2EStats excerpt:

(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] [RequestE2EStats [request_id=chatcmpl-ac59cea85b422f04]]
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | Field                             |     Value |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | build_add_request_message_time_ms |     8.120 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | e2e_total_ms                      | 1,388.426 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | e2e_total_tokens                  |       148 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | engine_pipeline_time_ms           | 1,388.426 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | input_preprocess_time_ms          |     8.002 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | request_wall_time_ms              | 1,396.428 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:691] 
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:691] [StageRequestStats [request_id=chatcmpl-ac59cea85b422f04]]
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:691] +-------------------+-----------+-----------+------------+

WAN2.2 T2V log output

Model: Wan-AI/Wan2.2-T2V-A14B-Diffusers
Artifact: final_pr_wan22.mp4
Server command:

CUDA_VISIBLE_DEVICES=0 HF_HOME=$HF_HOME HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN HF_TOKEN=$HF_TOKEN vllm serve Wan-AI/Wan2.2-T2V-A14B-Diffusers --omni --port 8091 --log-stats

Prompt / request:

A compact robot checks a timing dashboard
POST /v1/videos/sync with size=832x480, num_frames=33, fps=16, num_inference_steps=4, seed=42

StageTiming:

(APIServer pid=14495) INFO 04-23 21:50:45 [diffusion_engine.py:177] [StageTiming stage=0 diffusion] total=7.70s preprocess=0.01ms exec=7.61s postprocess=89.97ms

Overall Summary:

(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] [Overall Summary]
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | Field                             |     Value |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | e2e_requests                      |         1 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | request_wall_time_ms              | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | input_preprocess_time_ms          |     0.000 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | build_add_request_message_time_ms |     0.002 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | engine_pipeline_time_ms           | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | e2e_wall_time_ms                  | 7,712.232 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | e2e_avg_time_per_request_ms       | 7,712.232 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | e2e_stage_0_wall_time_ms          | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] +-----------------------------------+-----------+

Timing Composition:

(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] [Omni Timing Composition]
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] | Field   |              input_preprocess | engine_pipeline |                       request_wall |           request_message_build |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] | time_ms |                         0.000 |       7,712.092 |                          7,712.092 |                           0.002 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] | scope   | InputProcessor.process_inputs |  stage pipeline | input_preprocess + engine_pipeline | full _build_add_request_message |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634]

RequestE2EStats excerpt:

(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] [RequestE2EStats [request_id=video_sync-ac6b136d7c23149d]]
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | Field                             |     Value |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | build_add_request_message_time_ms |     0.002 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | e2e_total_ms                      | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | engine_pipeline_time_ms           | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | request_wall_time_ms              | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] 
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] [StageRequestStats [request_id=video_sync-ac6b136d7c23149d]]
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] +-------------------+-------------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] | Field             | 0:diffusion |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] +-------------------+-------------+

hsliuustc0106 · 2026-04-28T02:06:23Z

can we follow the style of #3149 for metrics?

hsliuustc0106 · 2026-04-28T05:12:22Z

can you coordinate with @bjf-frz' PR #3069 ? I think you are working on the same issue

allgather · 2026-04-28T07:04:11Z

@hsliuustc0106 this topic has gotten very competitive. I think 3069 is on its way to get merged so ill close this one, its fine. Thanks for the review effort. I appreciate your suggestions as well.

feat/improve vllm omni metrics

2bedf1b

Signed-off-by: allgather <all2allops@gmail.com>

allgather requested a review from hsliuustc0106 as a code owner April 22, 2026 20:49

allgather mentioned this pull request Apr 22, 2026

[RFC]: [UX][Metrics] Improve vLLM-Omni Metrics UX #3039

Open

1 task

update

08329ed

Signed-off-by: allgather <all2allops@gmail.com>

hsliuustc0106 reviewed Apr 22, 2026

View reviewed changes

separate input process time and request time

b3b57ad

Signed-off-by: allgather <all2allops@gmail.com>

allgather requested a review from hsliuustc0106 April 22, 2026 21:28

allgather added 2 commits April 23, 2026 14:04

timing bdwn

d1fdd37

Signed-off-by: allgather <all2allops@gmail.com>

patch (found while testing WAN)

9c91715

Signed-off-by: allgather <all2allops@gmail.com>

allgather closed this Apr 28, 2026

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[UX][Metrics] improving vllm-omni metrics#3042

[UX][Metrics] improving vllm-omni metrics#3042
allgather wants to merge 5 commits into
vllm-project:mainfrom
allgather:1

allgather commented Apr 22, 2026 •

edited

Loading

Uh oh!

chatgpt-codex-connector Bot commented Apr 22, 2026

Uh oh!

hsliuustc0106 left a comment

Uh oh!

lishunyang12 commented Apr 23, 2026

Uh oh!

allgather commented Apr 23, 2026 •

edited

Loading

Uh oh!

hsliuustc0106 commented Apr 28, 2026

Uh oh!

hsliuustc0106 commented Apr 28, 2026

Uh oh!

allgather commented Apr 28, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

Conversation

allgather commented Apr 22, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Motivation

Changes

Testing

Uh oh!

chatgpt-codex-connector Bot commented Apr 22, 2026

Uh oh!

hsliuustc0106 left a comment

Choose a reason for hiding this comment

Uh oh!

lishunyang12 commented Apr 23, 2026

Uh oh!

allgather commented Apr 23, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

hsliuustc0106 commented Apr 28, 2026

Uh oh!

hsliuustc0106 commented Apr 28, 2026

Uh oh!

allgather commented Apr 28, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

allgather commented Apr 22, 2026 •

edited

Loading

allgather commented Apr 23, 2026 •

edited

Loading