Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sgl-router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ crate-type = ["rlib"]
name = "sglang-router"
path = "src/main.rs"

[[bin]]
name = "smg"
path = "src/main.rs"

[[bin]]
name = "amg"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯

path = "src/main.rs"

[dependencies]
clap = { version = "4", features = ["derive", "env"] }
axum = { version = "0.8.4", features = ["macros", "ws", "tracing"] }
Expand Down
5 changes: 4 additions & 1 deletion sgl-router/bindings/python/.coveragerc
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[run]
source = sglang_router
omit = */mini_lb.py
omit =
*/mini_lb.py
*/cli.py
*/__main__.py

[report]
fail_under = 80
5 changes: 5 additions & 0 deletions sgl-router/bindings/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ dev = [
"requests>=2.25.0",
]

[project.scripts]
smg = "sglang_router.cli:main"
amg = "sglang_router.cli:main"
sglang-router = "sglang_router.cli:main"


[tool.maturin]
python-source = "."
Expand Down
8 changes: 8 additions & 0 deletions sgl-router/bindings/python/sglang_router/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""
Allow running the CLI via: python -m sglang_router
"""

from sglang_router.cli import main

if __name__ == "__main__":
main()
104 changes: 104 additions & 0 deletions sgl-router/bindings/python/sglang_router/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env python3
"""
SGLang Model Gateway CLI

Provides convenient command-line interface for launching the router and server.

Usage:
smg launch [args] # Launch router only
smg server [args] # Launch router + server
smg --help # Show help
"""

import argparse
import os
import sys
from typing import List, Optional


def create_parser() -> argparse.ArgumentParser:
"""Create the main CLI parser with subcommands."""
prog_name = os.path.basename(sys.argv[0]) if sys.argv else "smg"
parser = argparse.ArgumentParser(
prog=prog_name,
description="SGLang Model Gateway - High-performance inference router",
formatter_class=argparse.RawDescriptionHelpFormatter,
)

subparsers = parser.add_subparsers(dest="command", help="Available commands")

# Launch router subcommand
launch_parser = subparsers.add_parser(
"launch",
help="Launch router only (requires existing worker URLs)",
description="Launch the SGLang router with existing worker instances",
)
launch_parser.add_argument(
"router_args",
nargs="*",
help="Arguments to pass to the router (use -- to separate)",
)

# Launch server + router subcommand
server_parser = subparsers.add_parser(
"server",
help="Launch router and server processes together",
description="Launch both SGLang router and server processes",
)
server_parser.add_argument(
"server_args",
nargs="*",
help="Arguments to pass to server/router (use -- to separate)",
)

return parser


def main(argv: Optional[List[str]] = None) -> None:
"""Main CLI entry point."""
if argv is None:
argv = sys.argv[1:]

# Handle empty command - show help
if not argv or argv[0] not in ["launch", "server", "-h", "--help"]:
parser = create_parser()
parser.print_help()
sys.exit(1)

parser = create_parser()
args, unknown = parser.parse_known_args(argv)

if args.command == "launch":
# Import and call launch_router functions directly
from sglang_router.launch_router import launch_router, parse_router_args

# Combine router_args with unknown args and parse
router_argv = args.router_args + unknown
router_args = parse_router_args(router_argv)
launch_router(router_args)

elif args.command == "server":
# Import and call launch_server main with proper argv
# Note: launch_server.main() uses argparse internally which reads sys.argv
# We need to temporarily set sys.argv for compatibility
import sglang_router.launch_server as launch_server_module

# Preserve original sys.argv
original_argv = sys.argv
try:
# Combine server_args with unknown args
server_argv = args.server_args + unknown
prog_name = os.path.basename(sys.argv[0]) if sys.argv else "smg"
sys.argv = [f"{prog_name} server"] + server_argv
launch_server_module.main()
finally:
# Restore original sys.argv
sys.argv = original_argv

else:
parser.print_help()
sys.exit(1)


if __name__ == "__main__":
main()
51 changes: 39 additions & 12 deletions sgl-router/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::HashMap;

use clap::{ArgAction, Parser, ValueEnum};
use clap::{ArgAction, Parser, Subcommand, ValueEnum};
use sglang_router_rs::{
config::{
CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, HealthCheckConfig,
Expand Down Expand Up @@ -71,36 +71,57 @@ impl std::fmt::Display for Backend {
}

#[derive(Parser, Debug)]
#[command(name = "sglang-router")]
#[command(about = "SGLang Router - High-performance request distribution across worker nodes")]
#[command(name = "sglang-router", alias = "smg", alias = "amg")]
#[command(about = "SGLang Model Gateway - High-performance inference gateway")]
#[command(args_conflicts_with_subcommands = true)]
#[command(long_about = r#"
SGLang Router - High-performance request distribution across worker nodes
SGLang Model Gateway - Rust-based inference gateway

Usage:
This launcher enables starting a router with individual worker instances. It is useful for
multi-node setups or when you want to start workers and router separately.
smg launch [OPTIONS] Launch router (short command)
amg launch [OPTIONS] Launch router (alternative)
sglang-router [OPTIONS] Launch router (full name)

Examples:
# Regular mode
sglang-router --worker-urls http://worker1:8000 http://worker2:8000
smg launch --worker-urls http://worker1:8000 http://worker2:8000

# PD disaggregated mode with same policy for both
sglang-router --pd-disaggregation \
# PD disaggregated mode
smg launch --pd-disaggregation \
--prefill http://127.0.0.1:30001 9001 \
--prefill http://127.0.0.2:30002 9002 \
--decode http://127.0.0.3:30003 \
--decode http://127.0.0.4:30004 \
--policy cache_aware

# PD mode with different policies for prefill and decode
sglang-router --pd-disaggregation \
# With different policies
smg launch --pd-disaggregation \
--prefill http://127.0.0.1:30001 9001 \
--prefill http://127.0.0.2:30002 \
--decode http://127.0.0.3:30003 \
--decode http://127.0.0.4:30004 \
--prefill-policy cache_aware --decode-policy power_of_two

"#)]
struct Cli {
#[command(subcommand)]
command: Option<Commands>,

#[command(flatten)]
router_args: CliArgs,
}

#[derive(Subcommand, Debug)]
enum Commands {
/// Launch the router (same as running without subcommand)
#[command(visible_alias = "start")]
Launch {
#[command(flatten)]
args: CliArgs,
},
}

#[derive(Parser, Debug)]
struct CliArgs {
#[arg(long, default_value = "0.0.0.0")]
host: String,
Expand Down Expand Up @@ -683,7 +704,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
}

let cli_args = CliArgs::parse_from(filtered_args);
let cli = Cli::parse_from(filtered_args);

// Handle subcommands or use direct args
let cli_args = match cli.command {
Some(Commands::Launch { args }) => args,
None => cli.router_args,
};

println!("SGLang Router starting...");
println!("Host: {}:{}", cli_args.host, cli_args.port);
Expand Down
Loading