vllm-project · DarkLight1337 · Jan 6, 2026 · Jan 6, 2026
diff --git a/vllm/outputs.py b/vllm/outputs.py
@@ -13,7 +13,6 @@
 from vllm.logprobs import PromptLogprobs, SampleLogprobs
 from vllm.lora.request import LoRARequest
 from vllm.multimodal.inputs import MultiModalPlaceholderDict
-from vllm.sequence import RequestMetrics
 from vllm.v1.metrics.stats import RequestStateStats
 
 logger = init_logger(__name__)
@@ -113,7 +112,7 @@ def __init__(
         prompt_logprobs: PromptLogprobs | None,
         outputs: list[CompletionOutput],
         finished: bool,
-        metrics: RequestMetrics | RequestStateStats | None = None,
+        metrics: RequestStateStats | None = None,
         lora_request: LoRARequest | None = None,
         encoder_prompt: str | None = None,
         encoder_prompt_token_ids: list[int] | None = None,

diff --git a/vllm/sequence.py b/vllm/sequence.py
@@ -12,40 +12,6 @@
 else:
     KVConnectorOutput = Any
 
-VLLM_TOKEN_ID_ARRAY_TYPE = "l"
-
-VLLM_INVALID_TOKEN_ID = -1
-
-
-@dataclass
-class RequestMetrics:
-    """Metrics associated with a request.
-
-    Attributes:
-        arrival_time: The time when the request arrived.
-        first_scheduled_time: The time when the request was first scheduled.
-        first_token_time: The time when the first token was generated.
-        time_in_queue: The time the request spent in the queue.
-        finished_time: The time when the request was finished.
-        scheduler_time: The time spent in the scheduler when this request was
-                        being considered by the scheduler.
-        model_forward_time: The time spent in the model forward pass when this
-                            request was in the batch.
-        model_execute_time: The time spent in the model execute function. This
-                            will include model forward, block/sync across
-                            workers, cpu-gpu sync time and sampling time.
-    """
-
-    arrival_time: float
-    last_token_time: float
-    first_scheduled_time: float | None
-    first_token_time: float | None
-    time_in_queue: float | None
-    finished_time: float | None = None
-    scheduler_time: float | None = None
-    model_forward_time: float | None = None
-    model_execute_time: float | None = None
-
 
 # cannot use msgspec.Struct here because Dynamo does not support it
 @dataclass