Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
3d5e744
feat: add the dp-aware scheduling sgl-router side Rust implementation
oldsharp Jun 4, 2025
405b6ac
test: add sgl-router dp aware unit tests
oldsharp Jun 9, 2025
02514f9
fix: sgl-router add api key
oldsharp Jun 10, 2025
4d5f2cc
fix: refine error handling
oldsharp Jun 11, 2025
2d413d6
fix: check worker existence when alter tree and queue
oldsharp Jun 12, 2025
41f082f
fix: incorporate the data_parallel_rank field in the main branch
oldsharp Jun 12, 2025
a9f21f1
fix: typo and style fix
oldsharp Jun 13, 2025
24dcd7c
Merge commit '09ae5b20f3123487f36097d284a1f535cd267e7b' into rc/dp-aw…
oldsharp Jun 23, 2025
d797afa
fix: fixup dp-aware routing issues after merge with pdlb
oldsharp Jun 23, 2025
af24684
fixup
oldsharp Jun 23, 2025
7d99151
Merge branch 'main' into rc/dp-aware-sgl-router
oldsharp Jun 23, 2025
837a966
fixup
oldsharp Jun 23, 2025
3ffdf51
minor refining
oldsharp Jun 23, 2025
ea303f8
Merge branch 'main' into feature/dp-aware-sgl-router
slin1237 Jun 23, 2025
e4a9f53
fix: address review comments
oldsharp Jun 24, 2025
4c9fc4e
fix: address review comments
oldsharp Jun 24, 2025
234ba29
fix: serialise the serde_json::Value directly without extra lines of …
oldsharp Jun 24, 2025
e10202e
Merge branch 'main' into rc/dp-aware-sgl-router
oldsharp Jul 21, 2025
8814b67
Merge branch 'main' into rc/dp-aware-sgl-router
oldsharp Jul 22, 2025
358f4c2
Merge branch 'main' into rc/dp-aware-sgl-router
oldsharp Jul 23, 2025
c148715
fix: address async health checker URL parsing issue
oldsharp Jul 24, 2025
3f27a12
fix: address tree del issue during worker removing
oldsharp Jul 24, 2025
4f52232
add TODO for the extract_dp_rank()
oldsharp Jul 24, 2025
17e19d5
misc: var rename
oldsharp Jul 24, 2025
094b723
misc: rename the option dp_awareness -> dp_aware
oldsharp Jul 24, 2025
bd5f04e
fix: address read lock releasing issue
oldsharp Jul 24, 2025
ae26b24
misc: remove unnecessary parentheses
oldsharp Jul 24, 2025
7b5f5c8
validate the service-discovery and dp-aware config conflict
oldsharp Jul 24, 2025
8971f27
test: verify api key for the /get_server_info
oldsharp Jul 24, 2025
0a125d6
test api key
oldsharp Jul 24, 2025
09d810e
Merge branch 'main' into rc/dp-aware-sgl-router
oldsharp Jul 24, 2025
e698a63
fix: address typo found by linter
oldsharp Jul 24, 2025
3851e07
fix: address style issues found by Rust linter
oldsharp Jul 24, 2025
ad6b93d
fix: address test cases syntax issue
oldsharp Jul 24, 2025
ada2d9d
fix: update test case for both dp-aware and service-discovery enabled
oldsharp Jul 24, 2025
f3d8a04
Merge branch 'main' into rc/dp-aware-sgl-router
oldsharp Jul 28, 2025
906b9a7
fix: resolve the build errors in theirs after the merging
oldsharp Jul 28, 2025
c821ff1
Merge branch 'main' into rc/dp-aware-sgl-router
oldsharp Jul 28, 2025
4e3edb4
Merge branch 'main' into feature/dp-aware-sgl-router
slin1237 Jul 30, 2025
80b2bc2
Merge branch 'main' into feature/dp-aware-sgl-router
slin1237 Jul 30, 2025
a4fa0b8
test: enlarge the default timeout value in sgl-router py_test
oldsharp Jul 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/router/router.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ Process:

For unbalanced systems, this strategy tracks pending request counts per worker and routes new requests to the least busy worker. This helps maintain optimal load distribution across workers.

***Data-Parallelism Aware Routing***

An additional DP-aware routing strategy can be enabled on top of the sgl-router’s hybrid cache-aware load-balancing strategy by setting the `--dp-aware` flag when starting the router.

When this flag is enabled, the router attempts to contact the workers to retrieve the `dp_size` of each one and registers the new workers at the DP-rank level. In this mode, the router applies the cache-aware routing strategy in a more fine-grained manner, with assistance from the DP controller on the SRT side.

By default (when the flag is not set), the SRT’s DP controller distributes incoming requests across DP ranks in a round-robin fashion.

## Configuration Parameters

1. `cache_threshold`: (float, 0.0 to 1.0, default: 0.5)
Expand Down
17 changes: 17 additions & 0 deletions sgl-router/py_src/sglang_router/launch_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class RouterArgs:
eviction_interval: int = 60
max_tree_size: int = 2**24
max_payload_size: int = 256 * 1024 * 1024 # 256MB default for large batches
dp_aware: bool = False
api_key: Optional[str] = None
log_dir: Optional[str] = None
log_level: Optional[str] = None
# Service discovery configuration
Expand Down Expand Up @@ -197,6 +199,17 @@ def add_cli_args(
default=RouterArgs.max_payload_size,
help="Maximum payload size in bytes",
)
parser.add_argument(
f"--{prefix}dp-aware",
action="store_true",
help="Enable data parallelism aware schedule",
)
parser.add_argument(
f"--{prefix}api-key",
type=str,
default=None,
help="The api key used for the authorization with the worker. Useful when the dp aware scheduling strategy is enaled.",
)
parser.add_argument(
f"--{prefix}log-dir",
type=str,
Expand Down Expand Up @@ -304,6 +317,8 @@ def from_cli_args(
eviction_interval=getattr(args, f"{prefix}eviction_interval"),
max_tree_size=getattr(args, f"{prefix}max_tree_size"),
max_payload_size=getattr(args, f"{prefix}max_payload_size"),
dp_aware=getattr(args, f"{prefix}dp_aware", False),
api_key=getattr(args, f"{prefix}api_key", None),
log_dir=getattr(args, f"{prefix}log_dir", None),
log_level=getattr(args, f"{prefix}log_level", None),
service_discovery=getattr(args, f"{prefix}service_discovery", False),
Expand Down Expand Up @@ -463,6 +478,8 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
eviction_interval_secs=router_args.eviction_interval,
max_tree_size=router_args.max_tree_size,
max_payload_size=router_args.max_payload_size,
dp_aware=router_args.dp_aware,
api_key=router_args.api_key,
log_dir=router_args.log_dir,
log_level=router_args.log_level,
service_discovery=router_args.service_discovery,
Expand Down
8 changes: 8 additions & 0 deletions sgl-router/py_src/sglang_router/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ class Router:
routing. Default: 60
max_payload_size: Maximum payload size in bytes. Default: 256MB
max_tree_size: Maximum size of the approximation tree for cache-aware routing. Default: 2^24
dp_aware: Enable data parallelism aware schedule. Default: False
api_key: The api key used for the authorization with the worker.
Useful when the dp aware scheduling strategy is enabled.
Default: None
log_dir: Directory to store log files. If None, logs are only output to console. Default: None
log_level: Logging level. Options: 'debug', 'info', 'warning', 'error', 'critical'.
service_discovery: Enable Kubernetes service discovery. When enabled, the router will
Expand Down Expand Up @@ -73,6 +77,8 @@ def __init__(
eviction_interval_secs: int = 60,
max_tree_size: int = 2**24,
max_payload_size: int = 256 * 1024 * 1024, # 256MB
dp_aware: bool = False,
api_key: Optional[str] = None,
log_dir: Optional[str] = None,
log_level: Optional[str] = None,
service_discovery: bool = False,
Expand Down Expand Up @@ -110,6 +116,8 @@ def __init__(
eviction_interval_secs=eviction_interval_secs,
max_tree_size=max_tree_size,
max_payload_size=max_payload_size,
dp_aware=dp_aware,
api_key=api_key,
log_dir=log_dir,
log_level=log_level,
service_discovery=service_discovery,
Expand Down
2 changes: 1 addition & 1 deletion sgl-router/py_test/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
arg_parser.add_argument(
"--timeout-per-file",
type=int,
default=1000,
default=2000,
help="The time limit for running one file in seconds.",
)
args = arg_parser.parse_args()
Expand Down
47 changes: 47 additions & 0 deletions sgl-router/py_test/test_launch_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def setUp(self):
selector=None,
service_discovery_port=80,
service_discovery_namespace=None,
dp_aware=False,
prometheus_port=None,
prometheus_host=None,
# PD-specific attributes
Expand Down Expand Up @@ -111,6 +112,52 @@ def test_launch_router_with_service_discovery_namespace(self):
)
self.run_router_process(args)

def test_launch_router_common_with_dp_aware(self):
args = self.create_router_args(
worker_urls=["http://localhost:8000"],
dp_aware=True,
)
self.run_router_process(args)

def test_launch_router_with_empty_worker_urls_with_dp_aware(self):
args = self.create_router_args(
worker_urls=[],
dp_aware=True,
)
self.run_router_process(args)

def test_launch_router_common_with_dp_aware_service_discovery(self):
# Test launch router with bot srevice_discovery and dp_aware enabled
# Should fail since service_discovery and dp_aware is conflict
args = self.create_router_args(
worker_urls=["http://localhost:8000"],
dp_aware=True,
service_discovery=True,
selector=["app=test-worker"],
)

def run_router():
try:
from sglang_router.launch_router import launch_router

router = launch_router(args)
if router is None:
return 1
return 0
except Exception as e:
print(e)
return 1

process = multiprocessing.Process(target=run_router)
try:
process.start()
# Wait 3 seconds
time.sleep(3)
# Should fail since service_discovery and dp_aware is conflict
self.assertFalse(process.is_alive())
finally:
terminate_process(process)

def test_launch_router_pd_mode_basic(self):
"""Test basic PD router functionality without actually starting servers."""
# This test just verifies the PD router can be created and configured
Expand Down
Loading
Loading