diff --git a/sgl-model-gateway/src/routers/grpc/pd_router.rs b/sgl-model-gateway/src/routers/grpc/pd_router.rs index c9c9afdc3963..35dd222df0dc 100644 --- a/sgl-model-gateway/src/routers/grpc/pd_router.rs +++ b/sgl-model-gateway/src/routers/grpc/pd_router.rs @@ -1,27 +1,14 @@ use std::sync::Arc; use async_trait::async_trait; -use axum::{ - body::Body, - extract::Request, - http::{HeaderMap, StatusCode}, - response::{IntoResponse, Response}, -}; +use axum::{http::HeaderMap, response::Response}; use tracing::debug; use super::{context::SharedComponents, pipeline::RequestPipeline}; use crate::{ app_context::AppContext, core::{ConnectionMode, WorkerRegistry, WorkerType}, - protocols::{ - chat::ChatCompletionRequest, - classify::ClassifyRequest, - completion::CompletionRequest, - embedding::EmbeddingRequest, - generate::GenerateRequest, - rerank::RerankRequest, - responses::{ResponsesGetParams, ResponsesRequest}, - }, + protocols::{chat::ChatCompletionRequest, generate::GenerateRequest}, routers::RouterTrait, }; @@ -160,26 +147,6 @@ impl RouterTrait for GrpcPDRouter { self } - async fn health_generate(&self, _req: Request) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Health generate not yet implemented for gRPC PD", - ) - .into_response() - } - - async fn get_server_info(&self, _req: Request) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn get_models(&self, _req: Request) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn get_model_info(&self, _req: Request) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - async fn route_generate( &self, headers: Option<&HeaderMap>, @@ -198,64 +165,6 @@ impl RouterTrait for GrpcPDRouter { self.route_chat_impl(headers, body, model_id).await } - async fn route_completion( - &self, - _headers: Option<&HeaderMap>, - _body: &CompletionRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn route_responses( - &self, - _headers: Option<&HeaderMap>, - _body: &ResponsesRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn get_response( - &self, - _headers: Option<&HeaderMap>, - _response_id: &str, - _params: &ResponsesGetParams, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn cancel_response(&self, _headers: Option<&HeaderMap>, _response_id: &str) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn route_embeddings( - &self, - _headers: Option<&HeaderMap>, - _body: &EmbeddingRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn route_classify( - &self, - _headers: Option<&HeaderMap>, - _body: &ClassifyRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn route_rerank( - &self, - _headers: Option<&HeaderMap>, - _body: &RerankRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - fn router_type(&self) -> &'static str { "grpc_pd" } diff --git a/sgl-model-gateway/src/routers/grpc/router.rs b/sgl-model-gateway/src/routers/grpc/router.rs index ad0bcef0bcf9..9c5af2a470c1 100644 --- a/sgl-model-gateway/src/routers/grpc/router.rs +++ b/sgl-model-gateway/src/routers/grpc/router.rs @@ -2,9 +2,7 @@ use std::sync::Arc; use async_trait::async_trait; use axum::{ - body::Body, - extract::Request, - http::{HeaderMap, StatusCode}, + http::HeaderMap, response::{IntoResponse, Response}, }; use tracing::debug; @@ -27,11 +25,7 @@ use crate::{ core::WorkerRegistry, protocols::{ chat::ChatCompletionRequest, - classify::ClassifyRequest, - completion::CompletionRequest, - embedding::EmbeddingRequest, generate::GenerateRequest, - rerank::RerankRequest, responses::{ResponsesGetParams, ResponsesRequest}, }, routers::RouterTrait, @@ -259,26 +253,6 @@ impl RouterTrait for GrpcRouter { self } - async fn health_generate(&self, _req: Request) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Health generate not yet implemented for gRPC", - ) - .into_response() - } - - async fn get_server_info(&self, _req: Request) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn get_models(&self, _req: Request) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn get_model_info(&self, _req: Request) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - async fn route_generate( &self, headers: Option<&HeaderMap>, @@ -297,15 +271,6 @@ impl RouterTrait for GrpcRouter { self.route_chat_impl(headers, body, model_id).await } - async fn route_completion( - &self, - _headers: Option<&HeaderMap>, - _body: &CompletionRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - async fn route_responses( &self, headers: Option<&HeaderMap>, @@ -328,33 +293,6 @@ impl RouterTrait for GrpcRouter { cancel_response_impl(&self.responses_context, response_id).await } - async fn route_embeddings( - &self, - _headers: Option<&HeaderMap>, - _body: &EmbeddingRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn route_classify( - &self, - _headers: Option<&HeaderMap>, - _body: &ClassifyRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - - async fn route_rerank( - &self, - _headers: Option<&HeaderMap>, - _body: &RerankRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED).into_response() - } - fn router_type(&self) -> &'static str { "grpc" } diff --git a/sgl-model-gateway/src/routers/http/pd_router.rs b/sgl-model-gateway/src/routers/http/pd_router.rs index dcaede54ebad..09e5b65d3b46 100644 --- a/sgl-model-gateway/src/routers/http/pd_router.rs +++ b/sgl-model-gateway/src/routers/http/pd_router.rs @@ -28,13 +28,10 @@ use crate::{ policies::{LoadBalancingPolicy, PolicyRegistry}, protocols::{ chat::{ChatCompletionRequest, ChatMessage, MessageContent}, - classify::ClassifyRequest, common::{InputIds, StringOrArray}, completion::CompletionRequest, - embedding::EmbeddingRequest, generate::GenerateRequest, rerank::RerankRequest, - responses::{ResponsesGetParams, ResponsesRequest}, }, routers::{header_utils, RouterTrait}, }; @@ -1190,66 +1187,6 @@ impl RouterTrait for PDRouter { self.execute_dual_dispatch(headers, body, context).await } - async fn route_responses( - &self, - _headers: Option<&HeaderMap>, - _body: &ResponsesRequest, - _model_id: Option<&str>, - ) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Responses endpoint not implemented for PD router", - ) - .into_response() - } - - async fn get_response( - &self, - _headers: Option<&HeaderMap>, - _response_id: &str, - _params: &ResponsesGetParams, - ) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Responses retrieve endpoint not implemented for PD router", - ) - .into_response() - } - - async fn cancel_response(&self, _headers: Option<&HeaderMap>, _response_id: &str) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Responses cancel endpoint not implemented for PD router", - ) - .into_response() - } - - async fn route_classify( - &self, - _headers: Option<&HeaderMap>, - _body: &ClassifyRequest, - _model_id: Option<&str>, - ) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Classify endpoint not implemented for PD router", - ) - .into_response() - } - - async fn route_embeddings( - &self, - _headers: Option<&HeaderMap>, - _body: &EmbeddingRequest, - _model_id: Option<&str>, - ) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Embeddings endpoint not implemented for PD router", - ) - .into_response() - } - async fn route_rerank( &self, headers: Option<&HeaderMap>, diff --git a/sgl-model-gateway/src/routers/mod.rs b/sgl-model-gateway/src/routers/mod.rs index 41fb34dc66de..fa2ba4084921 100644 --- a/sgl-model-gateway/src/routers/mod.rs +++ b/sgl-model-gateway/src/routers/mod.rs @@ -42,24 +42,46 @@ pub trait RouterTrait: Send + Sync + Debug { fn as_any(&self) -> &dyn std::any::Any; /// Route a health generate request - async fn health_generate(&self, req: Request) -> Response; + async fn health_generate(&self, _req: Request) -> Response { + ( + StatusCode::NOT_IMPLEMENTED, + "Health generate not implemented", + ) + .into_response() + } /// Get server information - async fn get_server_info(&self, req: Request) -> Response; + async fn get_server_info(&self, _req: Request) -> Response { + (StatusCode::NOT_IMPLEMENTED, "Server info not implemented").into_response() + } /// Get available models - async fn get_models(&self, req: Request) -> Response; + async fn get_models(&self, _req: Request) -> Response { + (StatusCode::NOT_IMPLEMENTED, "Get models not implemented").into_response() + } /// Get model information - async fn get_model_info(&self, req: Request) -> Response; + async fn get_model_info(&self, _req: Request) -> Response { + ( + StatusCode::NOT_IMPLEMENTED, + "Get model info not implemented", + ) + .into_response() + } /// Route a generate request async fn route_generate( &self, - headers: Option<&HeaderMap>, - body: &GenerateRequest, - model_id: Option<&str>, - ) -> Response; + _headers: Option<&HeaderMap>, + _body: &GenerateRequest, + _model_id: Option<&str>, + ) -> Response { + ( + StatusCode::NOT_IMPLEMENTED, + "Generate endpoint not implemented", + ) + .into_response() + } /// Route a chat completion request async fn route_chat( @@ -72,29 +94,49 @@ pub trait RouterTrait: Send + Sync + Debug { /// Route a completion request async fn route_completion( &self, - headers: Option<&HeaderMap>, - body: &CompletionRequest, - model_id: Option<&str>, - ) -> Response; + _headers: Option<&HeaderMap>, + _body: &CompletionRequest, + _model_id: Option<&str>, + ) -> Response { + ( + StatusCode::NOT_IMPLEMENTED, + "Completion endpoint not implemented", + ) + .into_response() + } /// Route a responses request async fn route_responses( &self, - headers: Option<&HeaderMap>, - body: &ResponsesRequest, - model_id: Option<&str>, - ) -> Response; + _headers: Option<&HeaderMap>, + _body: &ResponsesRequest, + _model_id: Option<&str>, + ) -> Response { + ( + StatusCode::NOT_IMPLEMENTED, + "Responses endpoint not implemented", + ) + .into_response() + } /// Retrieve a stored/background response by id async fn get_response( &self, - headers: Option<&HeaderMap>, - response_id: &str, - params: &ResponsesGetParams, - ) -> Response; + _headers: Option<&HeaderMap>, + _response_id: &str, + _params: &ResponsesGetParams, + ) -> Response { + (StatusCode::NOT_IMPLEMENTED, "Get response not implemented").into_response() + } /// Cancel a background response by id - async fn cancel_response(&self, headers: Option<&HeaderMap>, response_id: &str) -> Response; + async fn cancel_response(&self, _headers: Option<&HeaderMap>, _response_id: &str) -> Response { + ( + StatusCode::NOT_IMPLEMENTED, + "Cancel response not implemented", + ) + .into_response() + } /// Delete a response by id async fn delete_response(&self, _headers: Option<&HeaderMap>, _response_id: &str) -> Response { @@ -121,25 +163,32 @@ pub trait RouterTrait: Send + Sync + Debug { /// Route embedding requests (OpenAI-compatible /v1/embeddings) async fn route_embeddings( &self, - headers: Option<&HeaderMap>, - body: &EmbeddingRequest, - model_id: Option<&str>, - ) -> Response; + _headers: Option<&HeaderMap>, + _body: &EmbeddingRequest, + _model_id: Option<&str>, + ) -> Response { + (StatusCode::NOT_IMPLEMENTED, "Embeddings not implemented").into_response() + } /// Route classification requests (OpenAI-compatible /v1/classify) async fn route_classify( &self, - headers: Option<&HeaderMap>, - body: &ClassifyRequest, - model_id: Option<&str>, - ) -> Response; + _headers: Option<&HeaderMap>, + _body: &ClassifyRequest, + _model_id: Option<&str>, + ) -> Response { + (StatusCode::NOT_IMPLEMENTED, "Classify not implemented").into_response() + } + /// Route rerank requests async fn route_rerank( &self, - headers: Option<&HeaderMap>, - body: &RerankRequest, - model_id: Option<&str>, - ) -> Response; + _headers: Option<&HeaderMap>, + _body: &RerankRequest, + _model_id: Option<&str>, + ) -> Response { + (StatusCode::NOT_IMPLEMENTED, "Rerank not implemented").into_response() + } /// Get router type name fn router_type(&self) -> &'static str; diff --git a/sgl-model-gateway/src/routers/openai/router.rs b/sgl-model-gateway/src/routers/openai/router.rs index 6fc878b8facd..b45ab7da3e79 100644 --- a/sgl-model-gateway/src/routers/openai/router.rs +++ b/sgl-model-gateway/src/routers/openai/router.rs @@ -37,11 +37,6 @@ use crate::{ data_connector::{ConversationId, ListParams, ResponseId, SortOrder}, protocols::{ chat::ChatCompletionRequest, - classify::ClassifyRequest, - completion::CompletionRequest, - embedding::EmbeddingRequest, - generate::GenerateRequest, - rerank::RerankRequest, responses::{ generate_id, ResponseContentPart, ResponseInput, ResponseInputOutputItem, ResponsesGetParams, ResponsesRequest, @@ -522,27 +517,6 @@ impl crate::routers::RouterTrait for OpenAIRouter { (StatusCode::OK, Json(response_json)).into_response() } - async fn get_model_info(&self, _req: Request) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "get_model_info not implemented for OpenAI router", - ) - .into_response() - } - - async fn route_generate( - &self, - _headers: Option<&HeaderMap>, - _body: &GenerateRequest, - _model_id: Option<&str>, - ) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Generate endpoint not supported for OpenAI backend", - ) - .into_response() - } - async fn route_chat( &self, headers: Option<&HeaderMap>, @@ -671,19 +645,6 @@ impl crate::routers::RouterTrait for OpenAIRouter { } } - async fn route_completion( - &self, - _headers: Option<&HeaderMap>, - _body: &CompletionRequest, - _model_id: Option<&str>, - ) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Completion endpoint not implemented", - ) - .into_response() - } - async fn route_responses( &self, headers: Option<&HeaderMap>, @@ -1001,14 +962,6 @@ impl crate::routers::RouterTrait for OpenAIRouter { } } - async fn cancel_response(&self, _headers: Option<&HeaderMap>, _response_id: &str) -> Response { - ( - StatusCode::NOT_IMPLEMENTED, - "Cancel response not implemented for OpenAI router", - ) - .into_response() - } - async fn list_response_input_items( &self, _headers: Option<&HeaderMap>, @@ -1080,33 +1033,6 @@ impl crate::routers::RouterTrait for OpenAIRouter { } } - async fn route_embeddings( - &self, - _headers: Option<&HeaderMap>, - _body: &EmbeddingRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED, "Embeddings not supported").into_response() - } - - async fn route_classify( - &self, - _headers: Option<&HeaderMap>, - _body: &ClassifyRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED, "Classify not supported").into_response() - } - - async fn route_rerank( - &self, - _headers: Option<&HeaderMap>, - _body: &RerankRequest, - _model_id: Option<&str>, - ) -> Response { - (StatusCode::NOT_IMPLEMENTED, "Rerank not supported").into_response() - } - fn router_type(&self) -> &'static str { "openai" }