Skip to content

Commit 1244cd3

Browse files
committed
refactor: move metrics parsing options and rename HTTP queue metric
- Move get_parsing_options from grpc service to http metrics module - Rename HTTP_QUEUE to HTTP_QUEUED_REQUESTS for clarity - Update comment to specify 'first response' timing Signed-off-by: Keiven Chang <[email protected]>
1 parent 217afc4 commit 1244cd3

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

lib/llm/src/grpc/service/kserve.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ use tokio::task::JoinHandle;
2121
use tokio_stream::{Stream, StreamExt};
2222
use tokio_util::sync::CancellationToken;
2323

24-
use crate::grpc::service::openai::{completion_response_stream, get_parsing_options};
24+
use crate::grpc::service::openai::completion_response_stream;
25+
use crate::http::service::metrics::get_parsing_options;
2526
use tonic::{Request, Response, Status, transport::Server};
2627

2728
use crate::protocols::openai::completions::{

lib/runtime/src/metrics/prometheus_names.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,8 @@ pub mod frontend_service {
4545
/// Total number of LLM requests processed
4646
pub const REQUESTS_TOTAL: &str = "requests_total";
4747

48-
/// Number of requests waiting in HTTP queue before receiving a response.
49-
/// This can measure the engine pool exhaustion if the engine is not able to process requests fast enough.
50-
pub const HTTP_QUEUE: &str = "http_queue";
48+
/// Number of requests waiting in HTTP queue before receiving the first response.
49+
pub const HTTP_QUEUED_REQUESTS: &str = "http_queued_requests";
5150

5251
/// Number of inflight requests going to the engine (vLLM, SGLang, ...)
5352
pub const INFLIGHT_REQUESTS: &str = "inflight_requests";

0 commit comments

Comments
 (0)