diff --git a/lib/llm/src/http/service/health.rs b/lib/llm/src/http/service/health.rs index 84ed629056..c3dfdfcd63 100644 --- a/lib/llm/src/http/service/health.rs +++ b/lib/llm/src/http/service/health.rs @@ -37,17 +37,44 @@ pub fn health_check_router( state: Arc, path: Option, ) -> (Vec, Router) { - let path = path.unwrap_or_else(|| "/health".to_string()); + let health_path = path.unwrap_or_else(|| "/health".to_string()); - let docs: Vec = vec![RouteDoc::new(Method::GET, &path)]; + let docs: Vec = vec![RouteDoc::new(Method::GET, &health_path)]; let router = Router::new() - .route(&path, get(health_handler)) + .route(&health_path, get(health_handler)) .with_state(state); (docs, router) } +pub fn live_check_router( + state: Arc, + path: Option, +) -> (Vec, Router) { + let live_path = path.unwrap_or_else(|| "/live".to_string()); + + let docs: Vec = vec![RouteDoc::new(Method::GET, &live_path)]; + + let router = Router::new() + .route(&live_path, get(live_handler)) + .with_state(state); + + (docs, router) +} + +async fn live_handler( + axum::extract::State(_state): axum::extract::State>, +) -> impl IntoResponse { + ( + StatusCode::OK, + Json(json!({ + "status": "live", + "message": "Service is live" + })), + ) +} + async fn health_handler( axum::extract::State(state): axum::extract::State>, ) -> impl IntoResponse { diff --git a/lib/llm/src/http/service/service_v2.rs b/lib/llm/src/http/service/service_v2.rs index 71524db363..0b2af7763c 100644 --- a/lib/llm/src/http/service/service_v2.rs +++ b/lib/llm/src/http/service/service_v2.rs @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +use std::env::var; use std::sync::Arc; use std::time::Duration; @@ -132,6 +133,23 @@ impl HttpService { } } +/// Environment variable to set the metrics endpoint path (default: `/metrics`) +static HTTP_SVC_METRICS_PATH_ENV: &str = "DYN_HTTP_SVC_METRICS_PATH"; +/// Environment variable to set the models endpoint path (default: `/v1/models`) +static HTTP_SVC_MODELS_PATH_ENV: &str = "DYN_HTTP_SVC_MODELS_PATH"; +/// Environment variable to set the health endpoint path (default: `/health`) +static HTTP_SVC_HEALTH_PATH_ENV: &str = "DYN_HTTP_SVC_HEALTH_PATH"; +/// Environment variable to set the live endpoint path (default: `/live`) +static HTTP_SVC_LIVE_PATH_ENV: &str = "DYN_HTTP_SVC_LIVE_PATH"; +/// Environment variable to set the chat completions endpoint path (default: `/v1/chat/completions`) +static HTTP_SVC_CHAT_PATH_ENV: &str = "DYN_HTTP_SVC_CHAT_PATH"; +/// Environment variable to set the completions endpoint path (default: `/v1/completions`) +static HTTP_SVC_CMP_PATH_ENV: &str = "DYN_HTTP_SVC_CMP_PATH"; +/// Environment variable to set the embeddings endpoint path (default: `/v1/embeddings`) +static HTTP_SVC_EMB_PATH_ENV: &str = "DYN_HTTP_SVC_EMB_PATH"; +/// Environment variable to set the responses endpoint path (default: `/v1/responses`) +static HTTP_SVC_RESPONSES_PATH_ENV: &str = "DYN_HTTP_SVC_RESPONSES_PATH"; + impl HttpServiceConfigBuilder { pub fn build(self) -> Result { let config: HttpServiceConfig = self.build_internal()?; @@ -148,32 +166,39 @@ impl HttpServiceConfigBuilder { let mut all_docs = Vec::new(); let mut routes = vec![ - metrics::router(registry, None), - super::openai::list_models_router(state.clone(), None), - super::health::health_check_router(state.clone(), None), + metrics::router(registry, var(HTTP_SVC_METRICS_PATH_ENV).ok()), + super::openai::list_models_router(state.clone(), var(HTTP_SVC_MODELS_PATH_ENV).ok()), + super::health::health_check_router(state.clone(), var(HTTP_SVC_HEALTH_PATH_ENV).ok()), + super::health::live_check_router(state.clone(), var(HTTP_SVC_LIVE_PATH_ENV).ok()), ]; if config.enable_chat_endpoints { routes.push(super::openai::chat_completions_router( state.clone(), config.request_template.clone(), // TODO clone()? reference? - None, + var(HTTP_SVC_CHAT_PATH_ENV).ok(), )); } if config.enable_cmpl_endpoints { - routes.push(super::openai::completions_router(state.clone(), None)); + routes.push(super::openai::completions_router( + state.clone(), + var(HTTP_SVC_CMP_PATH_ENV).ok(), + )); } if config.enable_embeddings_endpoints { - routes.push(super::openai::embeddings_router(state.clone(), None)); + routes.push(super::openai::embeddings_router( + state.clone(), + var(HTTP_SVC_EMB_PATH_ENV).ok(), + )); } if config.enable_responses_endpoints { routes.push(super::openai::responses_router( state.clone(), config.request_template, - None, + var(HTTP_SVC_RESPONSES_PATH_ENV).ok(), )); }