Skip to content

[UX][Metrics] improving vllm-omni metrics#3042

Closed
allgather wants to merge 5 commits into
vllm-project:mainfrom
allgather:1
Closed

[UX][Metrics] improving vllm-omni metrics#3042
allgather wants to merge 5 commits into
vllm-project:mainfrom
allgather:1

Conversation

@allgather
Copy link
Copy Markdown

@allgather allgather commented Apr 22, 2026

Motivation

#3039 proposed changes for omni metrics based on pain points discovered in #2834

Changes

Changes are based off of the proposal in #3039

Testing

ut passed


from __future__ import annotations

import importlib.util
import logging
import sys
import types
from pathlib import Path

import pytest

pytestmark = [pytest.mark.core_model, pytest.mark.cpu]

REPO_ROOT = Path(__file__).resolve().parents[2]
UTILS_PATH = REPO_ROOT / "vllm_omni/metrics/utils.py"
STATS_PATH = REPO_ROOT / "vllm_omni/metrics/stats.py"


def load_stats_module():
    if "vllm_omni.metrics.stats" in sys.modules:
        return sys.modules["vllm_omni.metrics.stats"]

    prettytable_module = types.ModuleType("prettytable")

    class PrettyTable:
        def __init__(self) -> None:
            self.field_names: list[str] = []
            self.align: dict[str, str] = {}
            self.rows: list[list[str]] = []

        def add_row(self, row: list[str]) -> None:
            self.rows.append(row)

        def get_string(self) -> str:
            table_lines = [" | ".join(map(str, self.field_names))]
            table_lines.extend(" | ".join(map(str, row)) for row in self.rows)
            return "\n".join(table_lines)

    prettytable_module.PrettyTable = PrettyTable
    sys.modules.setdefault("prettytable", prettytable_module)

    vllm_module = types.ModuleType("vllm")
    logger_module = types.ModuleType("vllm.logger")
    logger_module.init_logger = logging.getLogger
    vllm_module.logger = logger_module
    sys.modules.setdefault("vllm", vllm_module)
    sys.modules["vllm.logger"] = logger_module

    vllm_omni_pkg = sys.modules.setdefault("vllm_omni", types.ModuleType("vllm_omni"))
    vllm_omni_pkg.__path__ = [str(REPO_ROOT / "vllm_omni")]
    metrics_pkg = sys.modules.setdefault("vllm_omni.metrics", types.ModuleType("vllm_omni.metrics"))
    metrics_pkg.__path__ = [str(REPO_ROOT / "vllm_omni/metrics")]

    utils_spec = importlib.util.spec_from_file_location("vllm_omni.metrics.utils", UTILS_PATH)
    assert utils_spec is not None and utils_spec.loader is not None
    utils_module = importlib.util.module_from_spec(utils_spec)
    sys.modules["vllm_omni.metrics.utils"] = utils_module
    utils_spec.loader.exec_module(utils_module)

    stats_spec = importlib.util.spec_from_file_location("vllm_omni.metrics.stats", STATS_PATH)
    assert stats_spec is not None and stats_spec.loader is not None
    stats_module = importlib.util.module_from_spec(stats_spec)
    sys.modules["vllm_omni.metrics.stats"] = stats_module
    stats_spec.loader.exec_module(stats_module)
    return stats_module


def get_request_entry(table: list[dict], request_id: str) -> dict:
    for entry in table:
        if entry.get("request_id") == request_id:
            return entry
    raise AssertionError(f"request_id={request_id} not found")


def test_orchestrator_aggregator_reports_omni_timing_fields_and_line() -> None:
    stats_module = load_stats_module()
    OrchestratorAggregator = stats_module.OrchestratorAggregator
    RequestE2EStats = stats_module.RequestE2EStats
    StageRequestStats = stats_module.StageRequestStats
    StageStats = stats_module.StageStats

    agg = OrchestratorAggregator(
        num_stages=2,
        log_stats=True,
        wall_start_ts=0.0,
        final_stage_id_for_e2e=1,
        stage_labels={0: "0:ar", 1: "1:diffusion"},
    )
    agg.input_preprocess_time_ms = 2796.015
    agg.build_add_request_message_time_ms = 3010.125
    agg.e2e_total_ms = 30189.544
    agg.e2e_total_tokens = 1281
    agg.e2e_count = 1
    agg.last_finish_ts = 32.985559
    agg.stage_first_ts[0] = 0.0
    agg.stage_last_ts[0] = 16.227201
    agg.stage_first_ts[1] = 16.227201
    agg.stage_last_ts[1] = 30.187782

    agg.on_stage_metrics(
        0,
        "r1",
        StageRequestStats(
            batch_id=1,
            batch_size=1,
            num_tokens_in=1281,
            num_tokens_out=1281,
            stage_gen_time_ms=16227.201,
            rx_transfer_bytes=0,
            rx_decode_time_ms=0.0,
            rx_in_flight_time_ms=0.0,
            stage_stats=StageStats(),
        ),
    )
    agg.on_stage_metrics(
        1,
        "r1",
        StageRequestStats(
            batch_id=1,
            batch_size=1,
            num_tokens_in=0,
            num_tokens_out=0,
            stage_gen_time_ms=13960.581,
            rx_transfer_bytes=0,
            rx_decode_time_ms=0.0,
            rx_in_flight_time_ms=0.0,
            stage_stats=StageStats(),
        ),
    )
    agg.on_forward(0, 1, "r1", size_bytes=1024, tx_ms=0.78, used_shm=False)
    agg.e2e_events.append(
        RequestE2EStats(
            request_id="r1",
            e2e_total_ms=30189.544,
            e2e_total_tokens=1281,
            transfers_total_time_ms=0.78,
            transfers_total_bytes=1024,
        )
    )

    summary = agg.build_and_log_summary()
    overall = summary["overall_summary"]
    assert overall["request_wall_time_ms"] == pytest.approx(32985.559)
    assert overall["input_preprocess_time_ms"] == pytest.approx(2796.015)
    assert overall["build_add_request_message_time_ms"] == pytest.approx(3010.125)
    assert overall["engine_pipeline_time_ms"] == pytest.approx(30189.544)

    e2e_entry = get_request_entry(summary["e2e_table"], "r1")
    assert e2e_entry["request_wall_time_ms"] == pytest.approx(32985.559)
    assert e2e_entry["input_preprocess_time_ms"] == pytest.approx(2796.015)
    assert e2e_entry["build_add_request_message_time_ms"] == pytest.approx(3010.125)
    assert e2e_entry["engine_pipeline_time_ms"] == pytest.approx(30189.544)

    assert agg.format_request_timing_line("r1") == (
        "[OmniTiming] req=r1 total=32.99s preprocess=2.80s engine=30.19s "
        "stages=[0:ar=16.23s,1:diffusion=13.96s] transfers=[0->1=0.78ms]"
    )



Signed-off-by: allgather <all2allops@gmail.com>
@chatgpt-codex-connector
Copy link
Copy Markdown

Codex usage limits have been reached for code reviews. Please check with the admins of this repo to increase the limits by adding credits.
Credits must be used to enable repository wide code reviews.

Signed-off-by: allgather <all2allops@gmail.com>
Copy link
Copy Markdown
Collaborator

@hsliuustc0106 hsliuustc0106 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BLOCKING:

  • Correctness — In async_omni_engine.py, both input_preprocess_time_ms and build_add_request_message_time_ms are set to the same variable (build_add_request_message_time_ms). RFC #3039 specifies these should be separate: input_preprocess_time_ms for InputProcessor.process_inputs() and build_add_request_message_time_ms for the full function.

  • Test Coverage — PR description states "I need someone to test bc I can't find available compute rn." For metrics changes that affect timing reporting, add a unit test verifying the timing format and field values are correct.

Signed-off-by: allgather <all2allops@gmail.com>
@allgather allgather requested a review from hsliuustc0106 April 22, 2026 21:28
@lishunyang12
Copy link
Copy Markdown
Collaborator

Thanks for your contribution! Would you mind showing log output before and after?

Signed-off-by: allgather <all2allops@gmail.com>
Signed-off-by: allgather <all2allops@gmail.com>
@allgather
Copy link
Copy Markdown
Author

allgather commented Apr 23, 2026

@lishunyang12 @hsliuustc0106

Tested GLM-Image, Qwen3 Omni, Wan-2.2 all on 8xh100\

GLM-Image log output

Server command:

CUDA_VISIBLE_DEVICES=0,1 HF_HOME=$HF_HOME HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN HF_TOKEN=$HF_TOKEN vllm serve zai-org/GLM-Image --omni --port 8091 --log-stats

Prompt / request:

A small red robot holding a timing chart
POST /v1/chat/completions with height=1024, width=1024, num_inference_steps=20, true_cfg_scale=4.0, seed=42

OmniTiming:

(APIServer pid=10017) INFO 04-23 21:44:20 [omni_base.py:253] [OmniTiming] req=chatcmpl-a1a0742ab3ad9655 total=27.55s preprocess=2.69s engine=24.86s stages=[0:ar=19.01s,1:diffusion=5.85s]

StageTiming:

INFO 04-23 21:44:20 [diffusion_engine.py:177] [StageTiming stage=1 diffusion] total=5.83s preprocess=0.01ms exec=5.80s postprocess=28.10ms

Overall Summary:

(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] [Overall Summary]
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | Field                             |      Value |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_requests                      |          1 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | request_wall_time_ms              | 27,549.833 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | input_preprocess_time_ms          |  2,693.649 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | build_add_request_message_time_ms |  2,693.754 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | engine_pipeline_time_ms           | 24,856.184 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_wall_time_ms                  | 27,550.202 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_total_tokens                  |      1,300 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_avg_time_per_request_ms       | 27,550.202 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_avg_tokens_per_s              |     47.187 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_stage_0_wall_time_ms          | 19,006.905 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] | e2e_stage_1_wall_time_ms          |  5,848.652 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:588] +-----------------------------------+------------+

Timing Composition:

(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] [Omni Timing Composition]
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] | Field   |              input_preprocess | engine_pipeline |                       request_wall |           request_message_build |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] | time_ms |                     2,693.649 |      24,856.184 |                         27,549.833 |                       2,693.754 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] | scope   | InputProcessor.process_inputs |  stage pipeline | input_preprocess + engine_pipeline | full _build_add_request_message |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] 

RequestE2EStats excerpt:

(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] [RequestE2EStats [request_id=chatcmpl-a1a0742ab3ad9655]]
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | Field                             |      Value |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | build_add_request_message_time_ms |  2,693.754 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | e2e_total_ms                      | 24,856.184 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | e2e_total_tokens                  |      1,300 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | engine_pipeline_time_ms           | 24,856.184 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | input_preprocess_time_ms          |  2,693.649 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] | request_wall_time_ms              | 27,549.833 |
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:634] +-----------------------------------+------------+
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:691] 
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:691] [StageRequestStats [request_id=chatcmpl-a1a0742ab3ad9655]]
(APIServer pid=10017) INFO 04-23 21:44:20 [stats.py:691] +-------------------+------------+-------------+
Qwen3-Omni log output

Model: Qwen/Qwen3-Omni-30B-A3B-Instruct
Server command:

CUDA_VISIBLE_DEVICES=0,1 HF_HOME=$HF_HOME HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN HF_TOKEN=$HF_TOKEN vllm serve Qwen/Qwen3-Omni-30B-A3B-Instruct --omni --port 8091 --log-stats

Prompt / request:

Say one short sentence about timing instrumentation.
POST /v1/chat/completions with short text-only request and bounded sampling_params_list for thinker/talker/code2wav

OmniTiming:

(APIServer pid=12672) INFO 04-23 21:47:29 [omni_base.py:253] [OmniTiming] req=chatcmpl-ac59cea85b422f04 total=1.40s preprocess=0.01s engine=1.39s stages=[0:thinker=0.22s,1:talker=1.33s,2:code2wav=1.39s]

Overall Summary:

(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] [Overall Summary]
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | Field                             |     Value |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_requests                      |         1 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | request_wall_time_ms              | 1,396.428 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | input_preprocess_time_ms          |     8.002 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | build_add_request_message_time_ms |     8.120 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | engine_pipeline_time_ms           | 1,388.426 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_wall_time_ms                  | 1,396.649 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_total_tokens                  |       148 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_avg_time_per_request_ms       | 1,396.649 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_avg_tokens_per_s              |   105.968 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_stage_0_wall_time_ms          |   224.958 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_stage_1_wall_time_ms          | 1,331.344 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] | e2e_stage_2_wall_time_ms          | 1,387.487 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:588] +-----------------------------------+-----------+

Timing Composition:

(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] [Omni Timing Composition]
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] | Field   |              input_preprocess | engine_pipeline |                       request_wall |           request_message_build |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] | time_ms |                         8.002 |       1,388.426 |                          1,396.428 |                           8.120 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] | scope   | InputProcessor.process_inputs |  stage pipeline | input_preprocess + engine_pipeline | full _build_add_request_message |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] 

RequestE2EStats excerpt:

(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] [RequestE2EStats [request_id=chatcmpl-ac59cea85b422f04]]
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | Field                             |     Value |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | build_add_request_message_time_ms |     8.120 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | e2e_total_ms                      | 1,388.426 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | e2e_total_tokens                  |       148 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | engine_pipeline_time_ms           | 1,388.426 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | input_preprocess_time_ms          |     8.002 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] | request_wall_time_ms              | 1,396.428 |
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:691] 
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:691] [StageRequestStats [request_id=chatcmpl-ac59cea85b422f04]]
(APIServer pid=12672) INFO 04-23 21:47:29 [stats.py:691] +-------------------+-----------+-----------+------------+
WAN2.2 T2V log output

Model: Wan-AI/Wan2.2-T2V-A14B-Diffusers
Artifact: final_pr_wan22.mp4
Server command:

CUDA_VISIBLE_DEVICES=0 HF_HOME=$HF_HOME HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN HF_TOKEN=$HF_TOKEN vllm serve Wan-AI/Wan2.2-T2V-A14B-Diffusers --omni --port 8091 --log-stats

Prompt / request:

A compact robot checks a timing dashboard
POST /v1/videos/sync with size=832x480, num_frames=33, fps=16, num_inference_steps=4, seed=42

StageTiming:

(APIServer pid=14495) INFO 04-23 21:50:45 [diffusion_engine.py:177] [StageTiming stage=0 diffusion] total=7.70s preprocess=0.01ms exec=7.61s postprocess=89.97ms

Overall Summary:

(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] [Overall Summary]
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | Field                             |     Value |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | e2e_requests                      |         1 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | request_wall_time_ms              | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | input_preprocess_time_ms          |     0.000 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | build_add_request_message_time_ms |     0.002 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | engine_pipeline_time_ms           | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | e2e_wall_time_ms                  | 7,712.232 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | e2e_avg_time_per_request_ms       | 7,712.232 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] | e2e_stage_0_wall_time_ms          | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:588] +-----------------------------------+-----------+

Timing Composition:

(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] [Omni Timing Composition]
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] | Field   |              input_preprocess | engine_pipeline |                       request_wall |           request_message_build |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] | time_ms |                         0.000 |       7,712.092 |                          7,712.092 |                           0.002 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] | scope   | InputProcessor.process_inputs |  stage pipeline | input_preprocess + engine_pipeline | full _build_add_request_message |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:592] +---------+-------------------------------+-----------------+------------------------------------+---------------------------------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] 

RequestE2EStats excerpt:

(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] [RequestE2EStats [request_id=video_sync-ac6b136d7c23149d]]
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | Field                             |     Value |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | build_add_request_message_time_ms |     0.002 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | e2e_total_ms                      | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | engine_pipeline_time_ms           | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] | request_wall_time_ms              | 7,712.092 |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:634] +-----------------------------------+-----------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] 
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] [StageRequestStats [request_id=video_sync-ac6b136d7c23149d]]
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] +-------------------+-------------+
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] | Field             | 0:diffusion |
(APIServer pid=14495) INFO 04-23 21:50:45 [stats.py:691] +-------------------+-------------+

@hsliuustc0106
Copy link
Copy Markdown
Collaborator

can we follow the style of #3149 for metrics?

@hsliuustc0106
Copy link
Copy Markdown
Collaborator

can you coordinate with @bjf-frz' PR #3069 ? I think you are working on the same issue

@allgather
Copy link
Copy Markdown
Author

@hsliuustc0106 this topic has gotten very competitive. I think 3069 is on its way to get merged so ill close this one, its fine. Thanks for the review effort. I appreciate your suggestions as well.

@allgather allgather closed this Apr 28, 2026
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants