Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions vllm/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from vllm.logprobs import PromptLogprobs, SampleLogprobs
from vllm.lora.request import LoRARequest
from vllm.multimodal.inputs import MultiModalPlaceholderDict
from vllm.sequence import RequestMetrics
from vllm.v1.metrics.stats import RequestStateStats

logger = init_logger(__name__)
Expand Down Expand Up @@ -113,7 +112,7 @@ def __init__(
prompt_logprobs: PromptLogprobs | None,
outputs: list[CompletionOutput],
finished: bool,
metrics: RequestMetrics | RequestStateStats | None = None,
metrics: RequestStateStats | None = None,
lora_request: LoRARequest | None = None,
encoder_prompt: str | None = None,
encoder_prompt_token_ids: list[int] | None = None,
Expand Down
34 changes: 0 additions & 34 deletions vllm/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,6 @@
else:
KVConnectorOutput = Any

VLLM_TOKEN_ID_ARRAY_TYPE = "l"

VLLM_INVALID_TOKEN_ID = -1


@dataclass
class RequestMetrics:
"""Metrics associated with a request.

Attributes:
arrival_time: The time when the request arrived.
first_scheduled_time: The time when the request was first scheduled.
first_token_time: The time when the first token was generated.
time_in_queue: The time the request spent in the queue.
finished_time: The time when the request was finished.
scheduler_time: The time spent in the scheduler when this request was
being considered by the scheduler.
model_forward_time: The time spent in the model forward pass when this
request was in the batch.
model_execute_time: The time spent in the model execute function. This
will include model forward, block/sync across
workers, cpu-gpu sync time and sampling time.
"""

arrival_time: float
last_token_time: float
first_scheduled_time: float | None
first_token_time: float | None
time_in_queue: float | None
finished_time: float | None = None
scheduler_time: float | None = None
model_forward_time: float | None = None
model_execute_time: float | None = None


# cannot use msgspec.Struct here because Dynamo does not support it
@dataclass
Expand Down