-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Init PD Rust LB (PO2) #6437
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Init PD Rust LB (PO2) #6437
Changes from 28 commits
Commits
Show all changes
39 commits
Select commit
Hold shift + click to select a range
e92d0cc
rust lb init
hnyls2002 6b65b4c
add timeout configurable
hnyls2002 6a2b515
udpate lb args
hnyls2002 64c4aef
optimize error handling
hnyls2002 25e0eca
Merge branch 'main' into lsyin-rust-lb
hnyls2002 d3b3da4
fake load report server
hnyls2002 d76b846
move code
hnyls2002 c0ea969
merge code
hnyls2002 a81dc9e
add post init check
hnyls2002 a7d0703
tmp fix for get_model_info
hnyls2002 d16248f
support batch in /generate
hnyls2002 be7cb59
fix
hnyls2002 417f35b
fix request type check
hnyls2002 94fedb3
fix get load
hnyls2002 807892d
split func
hnyls2002 b4331e7
fix launch_lb type hint
hnyls2002 41f4c56
update toml
hnyls2002 dd479ea
use anyhow server to handle error
hnyls2002 c2f8bb4
merge route and generate func
hnyls2002 f736a59
route one for get model info
hnyls2002 2b7bdf3
add proxy response struct
hnyls2002 95d23e1
fix token_ids is a list of list
hnyls2002 2975ce2
dispatch static req struct
hnyls2002 c726cdc
optimize error and collect
hnyls2002 0161989
remove duplicate clone() on server end
hnyls2002 b647b2a
add fixme
hnyls2002 74129a6
remove scripts
hnyls2002 55d2831
Merge branch 'main' into lsyin-rust-lb
hnyls2002 b86c21c
Merge branch 'main' into lsyin-rust-lb
zhyncs 0c6440f
fix typo
hnyls2002 72310fd
add missing service
hnyls2002 a4ec1e1
use generics
hnyls2002 9c2c1fb
simplify code
hnyls2002 09a2ded
rename: ProxyResponseType -> ProxyResponseBody
hnyls2002 fba68af
optimize build method
hnyls2002 ca49a1a
use reqwest::Method
hnyls2002 2ec0af8
nits optimize
hnyls2002 6dbbb1a
move lb_state out of server.rs
hnyls2002 b501141
Merge branch 'main' into lsyin-rust-lb
hnyls2002 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| import argparse | ||
| import dataclasses | ||
|
|
||
|
|
||
| @dataclasses.dataclass | ||
| class LBArgs: | ||
| rust_lb: bool = False | ||
| host: str = "0.0.0.0" | ||
| port: int = 8000 | ||
| policy: str = "random" | ||
| prefill_infos: list = dataclasses.field(default_factory=list) | ||
| decode_infos: list = dataclasses.field(default_factory=list) | ||
| log_interval: int = 5 | ||
| timeout: int = 600 | ||
|
|
||
| @staticmethod | ||
| def add_cli_args(parser: argparse.ArgumentParser): | ||
| parser.add_argument( | ||
| "--rust-lb", | ||
| action="store_true", | ||
| help="Use Rust load balancer", | ||
| ) | ||
| parser.add_argument( | ||
| "--host", | ||
| type=str, | ||
| default=LBArgs.host, | ||
| help=f"Host to bind the server (default: {LBArgs.host})", | ||
| ) | ||
| parser.add_argument( | ||
| "--port", | ||
| type=int, | ||
| default=LBArgs.port, | ||
| help=f"Port to bind the server (default: {LBArgs.port})", | ||
| ) | ||
| parser.add_argument( | ||
| "--policy", | ||
| type=str, | ||
| default=LBArgs.policy, | ||
| choices=["random", "po2"], | ||
| help=f"Policy to use for load balancing (default: {LBArgs.policy})", | ||
| ) | ||
| parser.add_argument( | ||
| "--prefill", | ||
| type=str, | ||
| default=[], | ||
| nargs="+", | ||
| help="URLs for prefill servers", | ||
| ) | ||
| parser.add_argument( | ||
| "--decode", | ||
| type=str, | ||
| default=[], | ||
| nargs="+", | ||
| help="URLs for decode servers", | ||
| ) | ||
| parser.add_argument( | ||
| "--prefill-bootstrap-ports", | ||
| type=int, | ||
| nargs="+", | ||
| help="Bootstrap ports for prefill servers", | ||
| ) | ||
| parser.add_argument( | ||
| "--log-interval", | ||
| type=int, | ||
| default=LBArgs.log_interval, | ||
| help=f"Log interval in seconds (default: {LBArgs.log_interval})", | ||
| ) | ||
| parser.add_argument( | ||
| "--timeout", | ||
| type=int, | ||
| default=LBArgs.timeout, | ||
| help=f"Timeout in seconds (default: {LBArgs.timeout})", | ||
| ) | ||
|
|
||
| @classmethod | ||
| def from_cli_args(cls, args: argparse.Namespace) -> "LBArgs": | ||
| bootstrap_ports = args.prefill_bootstrap_ports | ||
| if bootstrap_ports is None: | ||
| bootstrap_ports = [None] * len(args.prefill) | ||
| elif len(bootstrap_ports) == 1: | ||
| bootstrap_ports = bootstrap_ports * len(args.prefill) | ||
| else: | ||
| if len(bootstrap_ports) != len(args.prefill): | ||
| raise ValueError( | ||
| "Number of prefill URLs must match number of bootstrap ports" | ||
| ) | ||
|
|
||
| prefill_infos = [ | ||
| (url, port) for url, port in zip(args.prefill, bootstrap_ports) | ||
| ] | ||
|
|
||
| return cls( | ||
| rust_lb=args.rust_lb, | ||
| host=args.host, | ||
| port=args.port, | ||
| policy=args.policy, | ||
| prefill_infos=prefill_infos, | ||
| decode_infos=args.decode, | ||
| log_interval=args.log_interval, | ||
| timeout=args.timeout, | ||
| ) | ||
|
|
||
| def __post_init__(self): | ||
| if not self.rust_lb: | ||
| assert ( | ||
| self.policy == "random" | ||
| ), "Only random policy is supported for Python load balancer" | ||
|
|
||
|
|
||
| def main(): | ||
| parser = argparse.ArgumentParser( | ||
| description="PD Disaggregation Load Balancer Server" | ||
| ) | ||
| LBArgs.add_cli_args(parser) | ||
| args = parser.parse_args() | ||
| lb_args = LBArgs.from_cli_args(args) | ||
|
|
||
| if lb_args.rust_lb: | ||
| from sgl_pdlb._rust import LoadBalancer as RustLB | ||
|
|
||
| RustLB( | ||
| host=lb_args.host, | ||
| port=lb_args.port, | ||
| policy=lb_args.policy, | ||
| prefill_infos=lb_args.prefill_infos, | ||
| decode_infos=lb_args.decode_infos, | ||
| log_interval=lb_args.log_interval, | ||
| timeout=lb_args.timeout, | ||
| ).start() | ||
| else: | ||
| from sglang.srt.disaggregation.mini_lb import PrefillConfig, run | ||
|
|
||
| prefill_configs = [ | ||
| PrefillConfig(url, port) for url, port in lb_args.prefill_infos | ||
| ] | ||
| run(prefill_configs, lb_args.decode_infos, lb_args.host, lb_args.port) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| reorder_imports = true | ||
| reorder_modules = true |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| [package] | ||
| edition = "2024" | ||
| name = "sgl-pdlb" | ||
| version = "0.1.0" | ||
|
|
||
| [lib] | ||
| crate-type = ["cdylib", "rlib"] | ||
| name = "sgl_pdlb_rs" | ||
|
|
||
| [dependencies] | ||
| actix-web = "4.11" | ||
| bytes = "1.8.0" | ||
| chrono = "0.4.38" | ||
| clap = { version = "4.4", features = ["derive"] } | ||
| dashmap = "6.1.0" | ||
| env_logger = "0.11.5" | ||
| futures = "0.3" | ||
| futures-util = "0.3" | ||
| http = "1.3.1" | ||
| log = "0.4.22" | ||
| pyo3 = { version = "0.25.0", features = ["extension-module"] } | ||
| rand = "0.9.0" | ||
| reqwest = { version = "0.12.8", features = ["stream", "blocking", "json"] } | ||
| serde = { version = "1.0", features = ["derive"] } | ||
| serde_json = "1.0" | ||
| tokio = { version = "1.34", features = ["full"] } | ||
| anyhow = "1.0.98" | ||
| typetag = "0.2.20" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| ### Install dependencies | ||
|
|
||
| ```bash | ||
| pip install "maturin[patchelf]" | ||
| ``` | ||
|
|
||
| ### Build and install | ||
|
|
||
| ```bash | ||
| maturin develop | ||
| pip install -e . | ||
| ``` |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| __version__ = "0.0.1" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| [build-system] | ||
| requires = ["maturin>=1.8.0"] | ||
| build-backend = "maturin" | ||
|
|
||
| [project] | ||
| name = "sgl_pdlb" | ||
| version = "0.0.1" | ||
|
|
||
| [tool.maturin] | ||
| python-source = "py_src" | ||
| module-name = "sgl_pdlb._rust" | ||
|
|
||
| [tool.maturin.build-backend] | ||
| features = ["pyo3/extension-module"] |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.