diff --git a/sgl-model-gateway/src/middleware.rs b/sgl-model-gateway/src/middleware.rs index b77a896e4800..3b5c5321f699 100644 --- a/sgl-model-gateway/src/middleware.rs +++ b/sgl-model-gateway/src/middleware.rs @@ -332,9 +332,12 @@ impl Default for ResponseLogger { impl OnResponse for ResponseLogger { fn on_response(self, response: &Response, latency: Duration, span: &Span) { let status = response.status(); + let status_code = status.as_u16(); + + RouterMetrics::record_http_status_code(status_code); // Record these in the span for structured logging/observability tools - span.record("status_code", status.as_u16()); + span.record("status_code", status_code); // Use microseconds as integer to avoid format! string allocation span.record("latency", latency.as_micros() as u64); diff --git a/sgl-model-gateway/src/observability/metrics.rs b/sgl-model-gateway/src/observability/metrics.rs index 3f1d4b09e234..2fa4fcb6641b 100644 --- a/sgl-model-gateway/src/observability/metrics.rs +++ b/sgl-model-gateway/src/observability/metrics.rs @@ -263,6 +263,11 @@ pub fn init_metrics() { "sgl_tokenizer_factory_load_duration_seconds", "Time to load and initialize tokenizer" ); + + describe_counter!( + "sgl_router_http_responses_total", + "Total number of HTTP responses by status code" + ); } pub fn start_prometheus(config: PrometheusConfig) { @@ -563,6 +568,13 @@ impl RouterMetrics { pub fn record_job_shutdown_rejected() { counter!("sgl_router_job_shutdown_rejected_total").increment(1); } + + pub fn record_http_status_code(status_code: u16) { + counter!("sgl_router_http_responses_total", + "status_code" => status_code.to_string() + ) + .increment(1); + } } impl TokenizerMetrics {