Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 103 additions & 35 deletions sgl-model-gateway/src/routers/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,88 +5,156 @@ use axum::{
};
use serde_json::json;

pub fn internal_error(message: impl Into<String>) -> Response {
create_error(StatusCode::INTERNAL_SERVER_ERROR, "internal_error", message)
pub fn internal_error(code: impl Into<String>, message: impl Into<String>) -> Response {
create_error(StatusCode::INTERNAL_SERVER_ERROR, code, message)
}

pub fn bad_request(message: impl Into<String>) -> Response {
create_error(StatusCode::BAD_REQUEST, "invalid_request_error", message)
pub fn bad_request(code: impl Into<String>, message: impl Into<String>) -> Response {
create_error(StatusCode::BAD_REQUEST, code, message)
}

pub fn not_found(message: impl Into<String>) -> Response {
create_error(StatusCode::NOT_FOUND, "invalid_request_error", message)
pub fn not_found(code: impl Into<String>, message: impl Into<String>) -> Response {
create_error(StatusCode::NOT_FOUND, code, message)
}

pub fn service_unavailable(message: impl Into<String>) -> Response {
create_error(
StatusCode::SERVICE_UNAVAILABLE,
"service_unavailable",
message,
)
pub fn service_unavailable(code: impl Into<String>, message: impl Into<String>) -> Response {
create_error(StatusCode::SERVICE_UNAVAILABLE, code, message)
}

pub fn failed_dependency(message: impl Into<String>) -> Response {
create_error(
StatusCode::FAILED_DEPENDENCY,
"external_connector_error",
message,
)
pub fn failed_dependency(code: impl Into<String>, message: impl Into<String>) -> Response {
create_error(StatusCode::FAILED_DEPENDENCY, code, message)
}

pub fn not_implemented(message: impl Into<String>) -> Response {
create_error(
StatusCode::NOT_IMPLEMENTED,
"not_implemented_error",
message,
)
pub fn not_implemented(code: impl Into<String>, message: impl Into<String>) -> Response {
create_error(StatusCode::NOT_IMPLEMENTED, code, message)
}

fn create_error(status_code: StatusCode, error_type: &str, message: impl Into<String>) -> Response {
let msg = message.into();
fn create_error(
status: StatusCode,
code: impl Into<String>,
message: impl Into<String>,
) -> Response {
(
status_code,
status,
Json(json!({
"error": {
"message": msg,
"type": error_type,
"code": status_code.as_u16()
"message": message.into(),
"type": status_code_to_str(status),
"code": code.into(),
}
})),
)
.into_response()
}

fn status_code_to_str(status_code: StatusCode) -> &'static str {
match status_code {
// 1xx
StatusCode::CONTINUE => "continue",
StatusCode::SWITCHING_PROTOCOLS => "switching_protocols",
StatusCode::PROCESSING => "processing",
StatusCode::EARLY_HINTS => "early_hints",

// 2xx
StatusCode::OK => "ok",
StatusCode::CREATED => "created",
StatusCode::ACCEPTED => "accepted",
StatusCode::NON_AUTHORITATIVE_INFORMATION => "non_authoritative_information",
StatusCode::NO_CONTENT => "no_content",
StatusCode::RESET_CONTENT => "reset_content",
StatusCode::PARTIAL_CONTENT => "partial_content",
StatusCode::MULTI_STATUS => "multi_status",
StatusCode::ALREADY_REPORTED => "already_reported",
StatusCode::IM_USED => "im_used",

// 3xx
StatusCode::MULTIPLE_CHOICES => "multiple_choices",
StatusCode::MOVED_PERMANENTLY => "moved_permanently",
StatusCode::FOUND => "found",
StatusCode::SEE_OTHER => "see_other",
StatusCode::NOT_MODIFIED => "not_modified",
StatusCode::USE_PROXY => "use_proxy",
StatusCode::TEMPORARY_REDIRECT => "temporary_redirect",
StatusCode::PERMANENT_REDIRECT => "permanent_redirect",

// 4xx
StatusCode::BAD_REQUEST => "bad_request",
StatusCode::UNAUTHORIZED => "unauthorized",
StatusCode::PAYMENT_REQUIRED => "payment_required",
StatusCode::FORBIDDEN => "forbidden",
StatusCode::NOT_FOUND => "not_found",
StatusCode::METHOD_NOT_ALLOWED => "method_not_allowed",
StatusCode::NOT_ACCEPTABLE => "not_acceptable",
StatusCode::PROXY_AUTHENTICATION_REQUIRED => "proxy_authentication_required",
StatusCode::REQUEST_TIMEOUT => "request_timeout",
StatusCode::CONFLICT => "conflict",
StatusCode::GONE => "gone",
StatusCode::LENGTH_REQUIRED => "length_required",
StatusCode::PRECONDITION_FAILED => "precondition_failed",
StatusCode::PAYLOAD_TOO_LARGE => "payload_too_large",
StatusCode::URI_TOO_LONG => "uri_too_long",
StatusCode::UNSUPPORTED_MEDIA_TYPE => "unsupported_media_type",
StatusCode::RANGE_NOT_SATISFIABLE => "range_not_satisfiable",
StatusCode::EXPECTATION_FAILED => "expectation_failed",
StatusCode::IM_A_TEAPOT => "im_a_teapot",
StatusCode::MISDIRECTED_REQUEST => "misdirected_request",
StatusCode::UNPROCESSABLE_ENTITY => "unprocessable_entity",
StatusCode::LOCKED => "locked",
StatusCode::FAILED_DEPENDENCY => "failed_dependency",
StatusCode::UPGRADE_REQUIRED => "upgrade_required",
StatusCode::PRECONDITION_REQUIRED => "precondition_required",
StatusCode::TOO_MANY_REQUESTS => "too_many_requests",
StatusCode::REQUEST_HEADER_FIELDS_TOO_LARGE => "request_header_fields_too_large",
StatusCode::UNAVAILABLE_FOR_LEGAL_REASONS => "unavailable_for_legal_reasons",

// 5xx
StatusCode::INTERNAL_SERVER_ERROR => "internal_server_error",
StatusCode::NOT_IMPLEMENTED => "not_implemented",
StatusCode::BAD_GATEWAY => "bad_gateway",
StatusCode::SERVICE_UNAVAILABLE => "service_unavailable",
StatusCode::GATEWAY_TIMEOUT => "gateway_timeout",
StatusCode::HTTP_VERSION_NOT_SUPPORTED => "http_version_not_supported",
StatusCode::VARIANT_ALSO_NEGOTIATES => "variant_also_negotiates",
StatusCode::INSUFFICIENT_STORAGE => "insufficient_storage",
StatusCode::LOOP_DETECTED => "loop_detected",
StatusCode::NOT_EXTENDED => "not_extended",
StatusCode::NETWORK_AUTHENTICATION_REQUIRED => "network_authentication_required",

_ => "unknown_status_code",
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_internal_error_string() {
let response = internal_error("Test error");
let response = internal_error("test_error", "Test error");
assert_eq!(response.status(), StatusCode::INTERNAL_SERVER_ERROR);
}

#[test]
fn test_internal_error_format() {
let response = internal_error(format!("Error: {}", 42));
let response = internal_error("test_error", format!("Error: {}", 42));
assert_eq!(response.status(), StatusCode::INTERNAL_SERVER_ERROR);
}

#[test]
fn test_bad_request() {
let response = bad_request("Invalid input");
let response = bad_request("invalid_input", "Invalid input");
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
}

#[test]
fn test_not_found() {
let response = not_found("Resource not found");
let response = not_found("resource_not_found", "Resource not found");
assert_eq!(response.status(), StatusCode::NOT_FOUND);
}

#[test]
fn test_service_unavailable() {
let response = service_unavailable("No workers");
let response = service_unavailable("no_workers", "No workers");
assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ pub async fn collect_responses(
};

if all_responses.is_empty() {
return Err(error::internal_error("No responses from server"));
return Err(error::internal_error(
"no_responses_from_server",
"No responses from server",
));
}

Ok(all_responses)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ pub async fn ensure_mcp_connection(
"Failed to connect to MCP server"
);
return Err(error::failed_dependency(
"connect_mcp_server_failed",
"Failed to connect to MCP server. Check server_url and authorization.",
));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ impl PipelineStage for ClientAcquisitionStage {
function = "ClientAcquisitionStage::execute",
"Worker selection stage not completed"
);
error::internal_error("Worker selection not completed")
error::internal_error(
"worker_selection_not_completed",
"Worker selection not completed",
)
})?;

let clients = match workers {
Expand All @@ -43,6 +46,7 @@ impl PipelineStage for ClientAcquisitionStage {
"vLLM backend does not support dual (PD disaggregated) mode"
);
return Err(error::bad_request(
"vllm_pd_mode_not_supported",
"vLLM backend does not support prefill/decode disaggregated mode. \
Please use runtime_type: sglang for PD mode, or use a regular (non-PD) worker configuration."
));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl PipelineStage for DispatchMetadataStage {
function = "DispatchMetadataStage::execute",
"Proto request not built"
);
error::internal_error("Proto request not built")
error::internal_error("proto_request_not_built", "Proto request not built")
})?;

let request_id = proto_request.request_id().to_string();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,18 @@ impl PipelineStage for RequestExecutionStage {
function = "RequestExecutionStage::execute",
"Proto request not built"
);
error::internal_error("Proto request not built")
error::internal_error("proto_request_not_built", "Proto request not built")
})?;

let clients = ctx.state.clients.as_mut().ok_or_else(|| {
error!(
function = "RequestExecutionStage::execute",
"Client acquisition not completed"
);
error::internal_error("Client acquisition not completed")
error::internal_error(
"client_acquisition_not_completed",
"Client acquisition not completed",
)
})?;

// Extract dispatch metadata for tracing span
Expand Down Expand Up @@ -108,7 +111,10 @@ impl RequestExecutionStage {
function = "execute_single",
"Expected single client but got dual"
);
error::internal_error("Expected single client but got dual")
error::internal_error(
"expected_single_client_got_dual",
"Expected single client but got dual",
)
})?;

let stream = client.generate(proto_request).await.map_err(|e| {
Expand All @@ -117,7 +123,10 @@ impl RequestExecutionStage {
error = %e,
"Failed to start generation"
);
error::internal_error(format!("Failed to start generation: {}", e))
error::internal_error(
"start_generation_failed",
format!("Failed to start generation: {}", e),
)
})?;

Ok(ExecutionResult::Single { stream })
Expand All @@ -133,7 +142,10 @@ impl RequestExecutionStage {
function = "execute_dual_dispatch",
"Expected dual clients but got single"
);
error::internal_error("Expected dual clients but got single")
error::internal_error(
"expected_dual_clients_got_single",
"Expected dual clients but got single",
)
})?;

let prefill_request = proto_request.clone_inner();
Expand All @@ -153,10 +165,10 @@ impl RequestExecutionStage {
error = %e,
"Prefill worker failed to start"
);
return Err(error::internal_error(format!(
"Prefill worker failed to start: {}",
e
)));
return Err(error::internal_error(
"prefill_worker_failed_to_start",
format!("Prefill worker failed to start: {}", e),
));
}
};

Expand All @@ -169,10 +181,10 @@ impl RequestExecutionStage {
error = %e,
"Decode worker failed to start"
);
return Err(error::internal_error(format!(
"Decode worker failed to start: {}",
e
)));
return Err(error::internal_error(
"decode_worker_failed_to_start",
format!("Decode worker failed to start: {}", e),
));
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ impl PipelineStage for WorkerSelectionStage {
function = "WorkerSelectionStage::execute",
"Preparation stage not completed"
);
error::internal_error("Preparation stage not completed")
error::internal_error(
"preparation_stage_not_completed",
"Preparation stage not completed",
)
})?;

// For Harmony, use selection_text produced during Harmony encoding
Expand All @@ -74,10 +77,10 @@ impl PipelineStage for WorkerSelectionStage {
model_id = ?ctx.input.model_id,
"No available workers for model"
);
return Err(error::service_unavailable(format!(
"No available workers for model: {:?}",
ctx.input.model_id
)));
return Err(error::service_unavailable(
"no_available_workers",
format!("No available workers for model: {:?}", ctx.input.model_id),
));
}
}
}
Expand All @@ -91,10 +94,13 @@ impl PipelineStage for WorkerSelectionStage {
model_id = ?ctx.input.model_id,
"No available PD worker pairs for model"
);
return Err(error::service_unavailable(format!(
"No available PD worker pairs for model: {:?}",
ctx.input.model_id
)));
return Err(error::service_unavailable(
"no_available_pd_worker_pairs",
format!(
"No available PD worker pairs for model: {:?}",
ctx.input.model_id
),
));
}
}
}
Expand Down
Loading
Loading