From 8b3bcd60f63bf6e5de387d5264cfd33affbba59a Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Tue, 6 Jan 2026 06:53:00 +0000 Subject: [PATCH] [Chore] Remove more V0 dead code Signed-off-by: DarkLight1337 --- vllm/outputs.py | 3 +-- vllm/sequence.py | 34 ---------------------------------- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/vllm/outputs.py b/vllm/outputs.py index cdfe06f1c7fa..74e534ef0c07 100644 --- a/vllm/outputs.py +++ b/vllm/outputs.py @@ -13,7 +13,6 @@ from vllm.logprobs import PromptLogprobs, SampleLogprobs from vllm.lora.request import LoRARequest from vllm.multimodal.inputs import MultiModalPlaceholderDict -from vllm.sequence import RequestMetrics from vllm.v1.metrics.stats import RequestStateStats logger = init_logger(__name__) @@ -113,7 +112,7 @@ def __init__( prompt_logprobs: PromptLogprobs | None, outputs: list[CompletionOutput], finished: bool, - metrics: RequestMetrics | RequestStateStats | None = None, + metrics: RequestStateStats | None = None, lora_request: LoRARequest | None = None, encoder_prompt: str | None = None, encoder_prompt_token_ids: list[int] | None = None, diff --git a/vllm/sequence.py b/vllm/sequence.py index 6d20ca9aac22..3e12f148b22e 100644 --- a/vllm/sequence.py +++ b/vllm/sequence.py @@ -12,40 +12,6 @@ else: KVConnectorOutput = Any -VLLM_TOKEN_ID_ARRAY_TYPE = "l" - -VLLM_INVALID_TOKEN_ID = -1 - - -@dataclass -class RequestMetrics: - """Metrics associated with a request. - - Attributes: - arrival_time: The time when the request arrived. - first_scheduled_time: The time when the request was first scheduled. - first_token_time: The time when the first token was generated. - time_in_queue: The time the request spent in the queue. - finished_time: The time when the request was finished. - scheduler_time: The time spent in the scheduler when this request was - being considered by the scheduler. - model_forward_time: The time spent in the model forward pass when this - request was in the batch. - model_execute_time: The time spent in the model execute function. This - will include model forward, block/sync across - workers, cpu-gpu sync time and sampling time. - """ - - arrival_time: float - last_token_time: float - first_scheduled_time: float | None - first_token_time: float | None - time_in_queue: float | None - finished_time: float | None = None - scheduler_time: float | None = None - model_forward_time: float | None = None - model_execute_time: float | None = None - # cannot use msgspec.Struct here because Dynamo does not support it @dataclass