Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion sgl-model-gateway/src/middleware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,12 @@ impl Default for ResponseLogger {
impl<B> OnResponse<B> for ResponseLogger {
fn on_response(self, response: &Response<B>, latency: Duration, span: &Span) {
let status = response.status();
let status_code = status.as_u16();

RouterMetrics::record_http_status_code(status_code);

// Record these in the span for structured logging/observability tools
span.record("status_code", status.as_u16());
span.record("status_code", status_code);
// Use microseconds as integer to avoid format! string allocation
span.record("latency", latency.as_micros() as u64);

Expand Down
12 changes: 12 additions & 0 deletions sgl-model-gateway/src/observability/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,11 @@ pub fn init_metrics() {
"sgl_tokenizer_factory_load_duration_seconds",
"Time to load and initialize tokenizer"
);

describe_counter!(
"sgl_router_http_responses_total",
"Total number of HTTP responses by status code"
);
}

pub fn start_prometheus(config: PrometheusConfig) {
Expand Down Expand Up @@ -563,6 +568,13 @@ impl RouterMetrics {
pub fn record_job_shutdown_rejected() {
counter!("sgl_router_job_shutdown_rejected_total").increment(1);
}

pub fn record_http_status_code(status_code: u16) {
counter!("sgl_router_http_responses_total",
"status_code" => status_code.to_string()
)
.increment(1);
}
}

impl TokenizerMetrics {
Expand Down
Loading