diff --git a/sgl-router/Cargo.toml b/sgl-router/Cargo.toml index ddfc88119e9b..4fcf4a1c5957 100644 --- a/sgl-router/Cargo.toml +++ b/sgl-router/Cargo.toml @@ -19,6 +19,14 @@ crate-type = ["rlib"] name = "sglang-router" path = "src/main.rs" +[[bin]] +name = "smg" +path = "src/main.rs" + +[[bin]] +name = "amg" +path = "src/main.rs" + [dependencies] clap = { version = "4", features = ["derive", "env"] } axum = { version = "0.8.4", features = ["macros", "ws", "tracing"] } diff --git a/sgl-router/bindings/python/.coveragerc b/sgl-router/bindings/python/.coveragerc index 4a066e7b2e0b..cdadb9b5cdd7 100644 --- a/sgl-router/bindings/python/.coveragerc +++ b/sgl-router/bindings/python/.coveragerc @@ -1,6 +1,9 @@ [run] source = sglang_router -omit = */mini_lb.py +omit = + */mini_lb.py + */cli.py + */__main__.py [report] fail_under = 80 diff --git a/sgl-router/bindings/python/pyproject.toml b/sgl-router/bindings/python/pyproject.toml index ced74c3ee43e..f369c14bafe5 100644 --- a/sgl-router/bindings/python/pyproject.toml +++ b/sgl-router/bindings/python/pyproject.toml @@ -40,6 +40,11 @@ dev = [ "requests>=2.25.0", ] +[project.scripts] +smg = "sglang_router.cli:main" +amg = "sglang_router.cli:main" +sglang-router = "sglang_router.cli:main" + [tool.maturin] python-source = "." diff --git a/sgl-router/bindings/python/sglang_router/__main__.py b/sgl-router/bindings/python/sglang_router/__main__.py new file mode 100644 index 000000000000..02211dc69c4d --- /dev/null +++ b/sgl-router/bindings/python/sglang_router/__main__.py @@ -0,0 +1,8 @@ +""" +Allow running the CLI via: python -m sglang_router +""" + +from sglang_router.cli import main + +if __name__ == "__main__": + main() diff --git a/sgl-router/bindings/python/sglang_router/cli.py b/sgl-router/bindings/python/sglang_router/cli.py new file mode 100755 index 000000000000..4d69a35658aa --- /dev/null +++ b/sgl-router/bindings/python/sglang_router/cli.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +""" +SGLang Model Gateway CLI + +Provides convenient command-line interface for launching the router and server. + +Usage: + smg launch [args] # Launch router only + smg server [args] # Launch router + server + smg --help # Show help +""" + +import argparse +import os +import sys +from typing import List, Optional + + +def create_parser() -> argparse.ArgumentParser: + """Create the main CLI parser with subcommands.""" + prog_name = os.path.basename(sys.argv[0]) if sys.argv else "smg" + parser = argparse.ArgumentParser( + prog=prog_name, + description="SGLang Model Gateway - High-performance inference router", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # Launch router subcommand + launch_parser = subparsers.add_parser( + "launch", + help="Launch router only (requires existing worker URLs)", + description="Launch the SGLang router with existing worker instances", + ) + launch_parser.add_argument( + "router_args", + nargs="*", + help="Arguments to pass to the router (use -- to separate)", + ) + + # Launch server + router subcommand + server_parser = subparsers.add_parser( + "server", + help="Launch router and server processes together", + description="Launch both SGLang router and server processes", + ) + server_parser.add_argument( + "server_args", + nargs="*", + help="Arguments to pass to server/router (use -- to separate)", + ) + + return parser + + +def main(argv: Optional[List[str]] = None) -> None: + """Main CLI entry point.""" + if argv is None: + argv = sys.argv[1:] + + # Handle empty command - show help + if not argv or argv[0] not in ["launch", "server", "-h", "--help"]: + parser = create_parser() + parser.print_help() + sys.exit(1) + + parser = create_parser() + args, unknown = parser.parse_known_args(argv) + + if args.command == "launch": + # Import and call launch_router functions directly + from sglang_router.launch_router import launch_router, parse_router_args + + # Combine router_args with unknown args and parse + router_argv = args.router_args + unknown + router_args = parse_router_args(router_argv) + launch_router(router_args) + + elif args.command == "server": + # Import and call launch_server main with proper argv + # Note: launch_server.main() uses argparse internally which reads sys.argv + # We need to temporarily set sys.argv for compatibility + import sglang_router.launch_server as launch_server_module + + # Preserve original sys.argv + original_argv = sys.argv + try: + # Combine server_args with unknown args + server_argv = args.server_args + unknown + prog_name = os.path.basename(sys.argv[0]) if sys.argv else "smg" + sys.argv = [f"{prog_name} server"] + server_argv + launch_server_module.main() + finally: + # Restore original sys.argv + sys.argv = original_argv + + else: + parser.print_help() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/sgl-router/src/main.rs b/sgl-router/src/main.rs index b8e7839bea82..724d643df6fa 100644 --- a/sgl-router/src/main.rs +++ b/sgl-router/src/main.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use clap::{ArgAction, Parser, ValueEnum}; +use clap::{ArgAction, Parser, Subcommand, ValueEnum}; use sglang_router_rs::{ config::{ CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, HealthCheckConfig, @@ -71,29 +71,31 @@ impl std::fmt::Display for Backend { } #[derive(Parser, Debug)] -#[command(name = "sglang-router")] -#[command(about = "SGLang Router - High-performance request distribution across worker nodes")] +#[command(name = "sglang-router", alias = "smg", alias = "amg")] +#[command(about = "SGLang Model Gateway - High-performance inference gateway")] +#[command(args_conflicts_with_subcommands = true)] #[command(long_about = r#" -SGLang Router - High-performance request distribution across worker nodes +SGLang Model Gateway - Rust-based inference gateway Usage: -This launcher enables starting a router with individual worker instances. It is useful for -multi-node setups or when you want to start workers and router separately. + smg launch [OPTIONS] Launch router (short command) + amg launch [OPTIONS] Launch router (alternative) + sglang-router [OPTIONS] Launch router (full name) Examples: # Regular mode - sglang-router --worker-urls http://worker1:8000 http://worker2:8000 + smg launch --worker-urls http://worker1:8000 http://worker2:8000 - # PD disaggregated mode with same policy for both - sglang-router --pd-disaggregation \ + # PD disaggregated mode + smg launch --pd-disaggregation \ --prefill http://127.0.0.1:30001 9001 \ --prefill http://127.0.0.2:30002 9002 \ --decode http://127.0.0.3:30003 \ --decode http://127.0.0.4:30004 \ --policy cache_aware - # PD mode with different policies for prefill and decode - sglang-router --pd-disaggregation \ + # With different policies + smg launch --pd-disaggregation \ --prefill http://127.0.0.1:30001 9001 \ --prefill http://127.0.0.2:30002 \ --decode http://127.0.0.3:30003 \ @@ -101,6 +103,25 @@ Examples: --prefill-policy cache_aware --decode-policy power_of_two "#)] +struct Cli { + #[command(subcommand)] + command: Option, + + #[command(flatten)] + router_args: CliArgs, +} + +#[derive(Subcommand, Debug)] +enum Commands { + /// Launch the router (same as running without subcommand) + #[command(visible_alias = "start")] + Launch { + #[command(flatten)] + args: CliArgs, + }, +} + +#[derive(Parser, Debug)] struct CliArgs { #[arg(long, default_value = "0.0.0.0")] host: String, @@ -683,7 +704,13 @@ fn main() -> Result<(), Box> { } } - let cli_args = CliArgs::parse_from(filtered_args); + let cli = Cli::parse_from(filtered_args); + + // Handle subcommands or use direct args + let cli_args = match cli.command { + Some(Commands::Launch { args }) => args, + None => cli.router_args, + }; println!("SGLang Router starting..."); println!("Host: {}:{}", cli_args.host, cli_args.port);