Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions lib/llm/src/http/service/health.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,44 @@ pub fn health_check_router(
state: Arc<service_v2::State>,
path: Option<String>,
) -> (Vec<RouteDoc>, Router) {
let path = path.unwrap_or_else(|| "/health".to_string());
let health_path = path.unwrap_or_else(|| "/health".to_string());

let docs: Vec<RouteDoc> = vec![RouteDoc::new(Method::GET, &path)];
let docs: Vec<RouteDoc> = vec![RouteDoc::new(Method::GET, &health_path)];

let router = Router::new()
.route(&path, get(health_handler))
.route(&health_path, get(health_handler))
.with_state(state);

(docs, router)
}

pub fn live_check_router(
state: Arc<service_v2::State>,
path: Option<String>,
) -> (Vec<RouteDoc>, Router) {
let live_path = path.unwrap_or_else(|| "/live".to_string());

let docs: Vec<RouteDoc> = vec![RouteDoc::new(Method::GET, &live_path)];

let router = Router::new()
.route(&live_path, get(live_handler))
.with_state(state);

(docs, router)
}

async fn live_handler(
axum::extract::State(_state): axum::extract::State<Arc<service_v2::State>>,
) -> impl IntoResponse {
(
StatusCode::OK,
Json(json!({
"status": "live",
"message": "Service is live"
})),
)
}

async fn health_handler(
axum::extract::State(state): axum::extract::State<Arc<service_v2::State>>,
) -> impl IntoResponse {
Expand Down
39 changes: 32 additions & 7 deletions lib/llm/src/http/service/service_v2.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use std::env::var;
use std::sync::Arc;
use std::time::Duration;

Expand Down Expand Up @@ -132,6 +133,23 @@ impl HttpService {
}
}

/// Environment variable to set the metrics endpoint path (default: `/metrics`)
static HTTP_SVC_METRICS_PATH_ENV: &str = "DYN_HTTP_SVC_METRICS_PATH";
/// Environment variable to set the models endpoint path (default: `/v1/models`)
static HTTP_SVC_MODELS_PATH_ENV: &str = "DYN_HTTP_SVC_MODELS_PATH";
/// Environment variable to set the health endpoint path (default: `/health`)
static HTTP_SVC_HEALTH_PATH_ENV: &str = "DYN_HTTP_SVC_HEALTH_PATH";
/// Environment variable to set the live endpoint path (default: `/live`)
static HTTP_SVC_LIVE_PATH_ENV: &str = "DYN_HTTP_SVC_LIVE_PATH";
/// Environment variable to set the chat completions endpoint path (default: `/v1/chat/completions`)
static HTTP_SVC_CHAT_PATH_ENV: &str = "DYN_HTTP_SVC_CHAT_PATH";
/// Environment variable to set the completions endpoint path (default: `/v1/completions`)
static HTTP_SVC_CMP_PATH_ENV: &str = "DYN_HTTP_SVC_CMP_PATH";
/// Environment variable to set the embeddings endpoint path (default: `/v1/embeddings`)
static HTTP_SVC_EMB_PATH_ENV: &str = "DYN_HTTP_SVC_EMB_PATH";
/// Environment variable to set the responses endpoint path (default: `/v1/responses`)
static HTTP_SVC_RESPONSES_PATH_ENV: &str = "DYN_HTTP_SVC_RESPONSES_PATH";

impl HttpServiceConfigBuilder {
pub fn build(self) -> Result<HttpService, anyhow::Error> {
let config: HttpServiceConfig = self.build_internal()?;
Expand All @@ -148,32 +166,39 @@ impl HttpServiceConfigBuilder {
let mut all_docs = Vec::new();

let mut routes = vec![
metrics::router(registry, None),
super::openai::list_models_router(state.clone(), None),
super::health::health_check_router(state.clone(), None),
metrics::router(registry, var(HTTP_SVC_METRICS_PATH_ENV).ok()),
super::openai::list_models_router(state.clone(), var(HTTP_SVC_MODELS_PATH_ENV).ok()),
super::health::health_check_router(state.clone(), var(HTTP_SVC_HEALTH_PATH_ENV).ok()),
super::health::live_check_router(state.clone(), var(HTTP_SVC_LIVE_PATH_ENV).ok()),
];

if config.enable_chat_endpoints {
routes.push(super::openai::chat_completions_router(
state.clone(),
config.request_template.clone(), // TODO clone()? reference?
None,
var(HTTP_SVC_CHAT_PATH_ENV).ok(),
));
}

if config.enable_cmpl_endpoints {
routes.push(super::openai::completions_router(state.clone(), None));
routes.push(super::openai::completions_router(
state.clone(),
var(HTTP_SVC_CMP_PATH_ENV).ok(),
));
}

if config.enable_embeddings_endpoints {
routes.push(super::openai::embeddings_router(state.clone(), None));
routes.push(super::openai::embeddings_router(
state.clone(),
var(HTTP_SVC_EMB_PATH_ENV).ok(),
));
}

if config.enable_responses_endpoints {
routes.push(super::openai::responses_router(
state.clone(),
config.request_template,
None,
var(HTTP_SVC_RESPONSES_PATH_ENV).ok(),
));
}

Expand Down
Loading