From f326303f13871ded9dcda6b19855491c87d35f73 Mon Sep 17 00:00:00 2001 From: Revanth Reddy Airre Date: Sun, 3 May 2026 17:07:25 -0700 Subject: [PATCH 1/2] fix(router): configure HTTP pool idle timeout Signed-off-by: Revanth Reddy Airre --- docs/advanced_features/sgl_model_gateway.md | 8 +++++ .../advanced_features/sgl_model_gateway.mdx | 26 +++++++++++++++++ sgl-model-gateway/README.md | 1 + sgl-model-gateway/src/app_context.rs | 2 +- sgl-model-gateway/src/config/builder.rs | 5 ++++ sgl-model-gateway/src/config/types.rs | 29 +++++++++++++++++++ sgl-model-gateway/src/main.rs | 19 +++++++++--- 7 files changed, 85 insertions(+), 5 deletions(-) diff --git a/docs/advanced_features/sgl_model_gateway.md b/docs/advanced_features/sgl_model_gateway.md index 5f965b9527c8..8aa1b2d1b71e 100644 --- a/docs/advanced_features/sgl_model_gateway.md +++ b/docs/advanced_features/sgl_model_gateway.md @@ -593,6 +593,14 @@ Response: ## Reliability and Flow Control +### HTTP Client Pool + +Configure the idle timeout for pooled upstream HTTP connections: + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `--pool-idle-timeout-secs` | 50 | Idle timeout in seconds for pooled upstream HTTP connections. Can also be set with `SMG_POOL_IDLE_TIMEOUT_SECS`. | + ### Retries Configure exponential backoff retries: diff --git a/docs_new/docs/advanced_features/sgl_model_gateway.mdx b/docs_new/docs/advanced_features/sgl_model_gateway.mdx index 1789442dee27..f6867cda2edf 100644 --- a/docs_new/docs/advanced_features/sgl_model_gateway.mdx +++ b/docs_new/docs/advanced_features/sgl_model_gateway.mdx @@ -944,6 +944,32 @@ Response: *** ## Reliability and Flow Control +### HTTP Client Pool + +Configure the idle timeout for pooled upstream HTTP connections: + + + + + + + + + + + + + + + + + + + + + +
ParameterDefaultDescription
`--pool-idle-timeout-secs`50Idle timeout in seconds for pooled upstream HTTP connections. Can also be set with `SMG_POOL_IDLE_TIMEOUT_SECS`.
+ ### Retries Configure exponential backoff retries: diff --git a/sgl-model-gateway/README.md b/sgl-model-gateway/README.md index 046cf352a14e..c221a8f9052f 100644 --- a/sgl-model-gateway/README.md +++ b/sgl-model-gateway/README.md @@ -726,6 +726,7 @@ Router flags map to these values: - `--redis-retention-days` (env: `REDIS_RETENTION_DAYS`). Set to `-1` for persistent storage (default: 30 days). ## Reliability & Flow Control +- **HTTP Client Pool**: Upstream HTTP connection pool idle timeout defaults to 50 seconds. Configure via `--pool-idle-timeout-secs` or `SMG_POOL_IDLE_TIMEOUT_SECS`. - **Retries**: Default max retries = 5 with exponential backoff (`--retry-max-retries`, `--retry-initial-backoff-ms`, `--retry-max-backoff-ms`, `--retry-backoff-multiplier`, `--retry-jitter-factor`). Retries trigger on 408/429/500/502/503/504. - **Circuit Breakers**: Per worker thresholds (`--cb-failure-threshold`, `--cb-success-threshold`, `--cb-timeout-duration-secs`, `--cb-window-duration-secs`). Disable via `--disable-circuit-breaker`. - **Rate Limiting**: Token bucket driven by `--max-concurrent-requests`. Set `--rate-limit-tokens-per-second` to override refill rate. Configure request queue via `--queue-size` and `--queue-timeout-secs`; queued requests observe FIFO order and respect cancellation. diff --git a/sgl-model-gateway/src/app_context.rs b/sgl-model-gateway/src/app_context.rs index cbb47c1e14f4..0254ff222c7c 100644 --- a/sgl-model-gateway/src/app_context.rs +++ b/sgl-model-gateway/src/app_context.rs @@ -329,7 +329,7 @@ impl AppContextBuilder { let has_tls_config = config.client_identity.is_some() || !config.ca_certificates.is_empty(); let mut client_builder = Client::builder() - .pool_idle_timeout(Some(Duration::from_secs(50))) + .pool_idle_timeout(Some(Duration::from_secs(config.pool_idle_timeout_secs))) .pool_max_idle_per_host(500) .timeout(Duration::from_secs(timeout_secs)) .connect_timeout(Duration::from_secs(10)) diff --git a/sgl-model-gateway/src/config/builder.rs b/sgl-model-gateway/src/config/builder.rs index b103d8a672f5..70091180ab8c 100644 --- a/sgl-model-gateway/src/config/builder.rs +++ b/sgl-model-gateway/src/config/builder.rs @@ -187,6 +187,11 @@ impl RouterConfigBuilder { self } + pub fn pool_idle_timeout_secs(mut self, timeout: u64) -> Self { + self.config.pool_idle_timeout_secs = timeout; + self + } + // ==================== Rate Limiting ==================== pub fn max_concurrent_requests(mut self, max: i32) -> Self { diff --git a/sgl-model-gateway/src/config/types.rs b/sgl-model-gateway/src/config/types.rs index 39e0a1df4ad5..a8f93a1e7627 100644 --- a/sgl-model-gateway/src/config/types.rs +++ b/sgl-model-gateway/src/config/types.rs @@ -7,6 +7,8 @@ use serde::{Deserialize, Serialize}; use super::ConfigResult; use crate::core::ConnectionMode; +pub const DEFAULT_POOL_IDLE_TIMEOUT_SECS: u64 = 50; + /// Main router configuration #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RouterConfig { @@ -28,6 +30,8 @@ pub struct RouterConfig { pub log_dir: Option, pub log_level: Option, pub request_id_headers: Option>, + #[serde(default = "default_pool_idle_timeout_secs")] + pub pool_idle_timeout_secs: u64, /// Set to -1 to disable rate limiting pub max_concurrent_requests: i32, pub queue_size: usize, @@ -119,6 +123,10 @@ fn default_l1_max_memory() -> usize { 50 * 1024 * 1024 // 50MB } +fn default_pool_idle_timeout_secs() -> u64 { + DEFAULT_POOL_IDLE_TIMEOUT_SECS +} + impl TokenizerCacheConfig { /// Returns Some(self) if any caching is enabled, None otherwise. /// Use this when passing cache config to tokenizer registration workflow. @@ -492,6 +500,7 @@ impl Default for RouterConfig { log_dir: None, log_level: None, request_id_headers: None, + pool_idle_timeout_secs: default_pool_idle_timeout_secs(), max_concurrent_requests: -1, queue_size: 100, queue_timeout_secs: 60, @@ -613,6 +622,10 @@ mod tests { assert!(config.trace_config.is_none()); assert!(config.log_dir.is_none()); assert!(config.log_level.is_none()); + assert_eq!( + config.pool_idle_timeout_secs, + DEFAULT_POOL_IDLE_TIMEOUT_SECS + ); } #[test] @@ -662,6 +675,22 @@ mod tests { assert!(deserialized.trace_config.is_none()); } + #[test] + fn test_router_config_pool_idle_timeout_deserialization_default() { + let config = RouterConfig::default(); + let mut json = serde_json::to_value(&config).unwrap(); + json.as_object_mut() + .unwrap() + .remove("pool_idle_timeout_secs"); + + let deserialized: RouterConfig = serde_json::from_value(json).unwrap(); + + assert_eq!( + deserialized.pool_idle_timeout_secs, + default_pool_idle_timeout_secs() + ); + } + #[test] fn test_routing_mode_is_pd_mode() { let regular = RoutingMode::Regular { diff --git a/sgl-model-gateway/src/main.rs b/sgl-model-gateway/src/main.rs index 3d8b9842f56a..82b556cc7117 100644 --- a/sgl-model-gateway/src/main.rs +++ b/sgl-model-gateway/src/main.rs @@ -5,10 +5,10 @@ use rand::{distr::Alphanumeric, Rng}; use smg::{ auth::{ApiKeyEntry, ControlPlaneAuthConfig, JwtConfig, Role}, config::{ - CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, HealthCheckConfig, - HistoryBackend, ManualAssignmentMode, MetricsConfig, OracleConfig, PolicyConfig, - PostgresConfig, RedisConfig, RetryConfig, RouterConfig, RoutingMode, TokenizerCacheConfig, - TraceConfig, + CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, + DEFAULT_POOL_IDLE_TIMEOUT_SECS, HealthCheckConfig, HistoryBackend, ManualAssignmentMode, + MetricsConfig, OracleConfig, PolicyConfig, PostgresConfig, RedisConfig, RetryConfig, + RouterConfig, RoutingMode, TokenizerCacheConfig, TraceConfig, }, core::ConnectionMode, observability::{ @@ -298,6 +298,16 @@ struct CliArgs { #[arg(long, num_args = 0.., help_heading = "Request Handling")] cors_allowed_origins: Vec, + // ==================== HTTP Client Pool ==================== + /// Idle timeout in seconds for pooled upstream HTTP connections + #[arg( + long, + env = "SMG_POOL_IDLE_TIMEOUT_SECS", + default_value_t = DEFAULT_POOL_IDLE_TIMEOUT_SECS, + help_heading = "HTTP Client Pool" + )] + pool_idle_timeout_secs: u64, + // ==================== Rate Limiting ==================== /// Maximum concurrent requests (-1 to disable) #[arg(long, default_value_t = -1, help_heading = "Rate Limiting")] @@ -972,6 +982,7 @@ impl CliArgs { .request_timeout_secs(self.request_timeout_secs) .worker_startup_timeout_secs(self.worker_startup_timeout_secs) .worker_startup_check_interval_secs(self.worker_startup_check_interval) + .pool_idle_timeout_secs(self.pool_idle_timeout_secs) .max_concurrent_requests(self.max_concurrent_requests) .queue_size(self.queue_size) .queue_timeout_secs(self.queue_timeout_secs) From 9a1c47d71f11718213823a0af3a5b06dd91edafc Mon Sep 17 00:00:00 2001 From: Revanth Reddy Airre Date: Wed, 6 May 2026 16:16:16 -0700 Subject: [PATCH 2/2] style(router): run rustfmt --- sgl-model-gateway/src/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sgl-model-gateway/src/main.rs b/sgl-model-gateway/src/main.rs index 82b556cc7117..6d61264031de 100644 --- a/sgl-model-gateway/src/main.rs +++ b/sgl-model-gateway/src/main.rs @@ -5,10 +5,10 @@ use rand::{distr::Alphanumeric, Rng}; use smg::{ auth::{ApiKeyEntry, ControlPlaneAuthConfig, JwtConfig, Role}, config::{ - CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, - DEFAULT_POOL_IDLE_TIMEOUT_SECS, HealthCheckConfig, HistoryBackend, ManualAssignmentMode, - MetricsConfig, OracleConfig, PolicyConfig, PostgresConfig, RedisConfig, RetryConfig, - RouterConfig, RoutingMode, TokenizerCacheConfig, TraceConfig, + CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, HealthCheckConfig, + HistoryBackend, ManualAssignmentMode, MetricsConfig, OracleConfig, PolicyConfig, + PostgresConfig, RedisConfig, RetryConfig, RouterConfig, RoutingMode, TokenizerCacheConfig, + TraceConfig, DEFAULT_POOL_IDLE_TIMEOUT_SECS, }, core::ConnectionMode, observability::{