From 78e78ec44db5c45473b0e85c5da9b322a4148374 Mon Sep 17 00:00:00 2001 From: Paul Saab Date: Tue, 10 Mar 2026 22:55:45 -0700 Subject: [PATCH 1/2] Wrap IPv6 addresses in gRPC, bench_serving, and log messages Use NetworkAddress.to_host_port_str() for proper IPv6 bracket wrapping in host:port strings across: - grpc_server.py (listen address, warmup URL, bootstrap log) - encode_grpc_server.py (listen address) - bench_serving.py (all URL construction) - multimodal_gen/bench_serving.py (base URL) - decode.py (bootstrap_addr construction) - encode_receiver.py (receive_url) - common.py (health check server log messages) - mooncake/conn.py (failure log message) --- python/sglang/bench_serving.py | 27 +++++++++---------- .../benchmarks/bench_serving.py | 3 ++- python/sglang/srt/disaggregation/decode.py | 3 ++- .../srt/disaggregation/encode_grpc_server.py | 2 +- .../srt/disaggregation/encode_receiver.py | 10 +++++-- .../srt/disaggregation/mooncake/conn.py | 3 ++- python/sglang/srt/utils/common.py | 8 ++++-- 7 files changed, 34 insertions(+), 22 deletions(-) diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index 4e79ece7e3bc..dc88ff685b42 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -44,6 +44,7 @@ remove_prefix, set_ulimit, ) +from sglang.srt.utils.network import NetworkAddress _ROUTING_KEY_HEADER = "X-SMG-Routing-Key" @@ -1726,10 +1727,12 @@ def run_benchmark(args_: argparse.Namespace): "truss": 8080, }.get(args.backend, 30000) + # Build base URL with proper IPv6 bracket wrapping + _na = NetworkAddress(args.host, args.port) + _host_base = _na.to_url() + model_url = ( - f"{args.base_url}/v1/models" - if args.base_url - else f"http://{args.host}:{args.port}/v1/models" + f"{args.base_url}/v1/models" if args.base_url else f"{_host_base}/v1/models" ) if args.backend == "sglang-embedding": @@ -1740,43 +1743,39 @@ def run_benchmark(args_: argparse.Namespace): ) elif args.backend in ["sglang", "sglang-native"]: api_url = ( - f"{args.base_url}/generate" - if args.base_url - else f"http://{args.host}:{args.port}/generate" + f"{args.base_url}/generate" if args.base_url else f"{_host_base}/generate" ) elif args.backend in ["sglang-oai", "vllm", "lmdeploy"]: api_url = ( f"{args.base_url}/v1/completions" if args.base_url - else f"http://{args.host}:{args.port}/v1/completions" + else f"{_host_base}/v1/completions" ) elif args.backend in ["sglang-oai-chat", "vllm-chat", "lmdeploy-chat"]: api_url = ( f"{args.base_url}/v1/chat/completions" if args.base_url - else f"http://{args.host}:{args.port}/v1/chat/completions" + else f"{_host_base}/v1/chat/completions" ) elif args.backend == "trt": api_url = ( f"{args.base_url}/v2/models/ensemble/generate_stream" if args.base_url - else f"http://{args.host}:{args.port}/v2/models/ensemble/generate_stream" + else f"{_host_base}/v2/models/ensemble/generate_stream" ) if args.model is None: print("Please provide a model using `--model` when using `trt` backend.") sys.exit(1) elif args.backend == "gserver": - api_url = args.base_url if args.base_url else f"{args.host}:{args.port}" + api_url = args.base_url if args.base_url else _na.to_host_port_str() args.model = args.model or "default" elif args.backend == "truss": api_url = ( f"{args.base_url}/v1/models/model:predict" if args.base_url - else f"http://{args.host}:{args.port}/v1/models/model:predict" + else f"{_host_base}/v1/models/model:predict" ) - base_url = ( - f"http://{args.host}:{args.port}" if args.base_url is None else args.base_url - ) + base_url = _host_base if args.base_url is None else args.base_url # Wait for server to be ready if args.ready_check_timeout_sec > 0: diff --git a/python/sglang/multimodal_gen/benchmarks/bench_serving.py b/python/sglang/multimodal_gen/benchmarks/bench_serving.py index d4910473d77f..7baa0e129dd0 100644 --- a/python/sglang/multimodal_gen/benchmarks/bench_serving.py +++ b/python/sglang/multimodal_gen/benchmarks/bench_serving.py @@ -39,6 +39,7 @@ init_logger, ) from sglang.multimodal_gen.test.test_utils import print_divider, print_value_formatted +from sglang.srt.utils.network import NetworkAddress logger = init_logger(__name__) @@ -457,7 +458,7 @@ async def benchmark(args): # Construct base_url if not provided if args.base_url is None: - args.base_url = f"http://{args.host}:{args.port}" + args.base_url = NetworkAddress(args.host, args.port).to_url() # Wait for service wait_for_service(args.base_url) diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py index b64fa4ff926f..360e87d80715 100644 --- a/python/sglang/srt/disaggregation/decode.py +++ b/python/sglang/srt/disaggregation/decode.py @@ -66,6 +66,7 @@ set_schedule_time_batch, set_time_batch, ) +from sglang.srt.utils.network import NetworkAddress from sglang.srt.utils.torch_memory_saver_adapter import TorchMemorySaverAdapter logger = logging.getLogger(__name__) @@ -87,7 +88,7 @@ def _is_fake_transfer(req: Req, server_args: ServerArgs) -> bool: def _bootstrap_addr(req: Req) -> str: # FIXME: make a property of a req - return f"{req.bootstrap_host}:{req.bootstrap_port}" + return NetworkAddress(req.bootstrap_host, req.bootstrap_port).to_host_port_str() class DecodeReqToTokenPool: diff --git a/python/sglang/srt/disaggregation/encode_grpc_server.py b/python/sglang/srt/disaggregation/encode_grpc_server.py index b7d2e5f03919..033520093b58 100644 --- a/python/sglang/srt/disaggregation/encode_grpc_server.py +++ b/python/sglang/srt/disaggregation/encode_grpc_server.py @@ -258,7 +258,7 @@ async def serve_grpc_encoder(server_args: ServerArgs): ) reflection.enable_server_reflection(SERVICE_NAMES, server) - listen_addr = f"{server_args.host}:{server_args.port}" + listen_addr = NetworkAddress(server_args.host, server_args.port).to_host_port_str() server.add_insecure_port(listen_addr) await server.start() diff --git a/python/sglang/srt/disaggregation/encode_receiver.py b/python/sglang/srt/disaggregation/encode_receiver.py index fa1ea0d4b83b..bb9fbf4aa487 100644 --- a/python/sglang/srt/disaggregation/encode_receiver.py +++ b/python/sglang/srt/disaggregation/encode_receiver.py @@ -29,7 +29,11 @@ from sglang.srt.server_args import ServerArgs from sglang.srt.utils import ImageData from sglang.srt.utils.hf_transformers_utils import get_processor -from sglang.srt.utils.network import get_local_ip_auto, get_zmq_socket_on_host +from sglang.srt.utils.network import ( + NetworkAddress, + get_local_ip_auto, + get_zmq_socket_on_host, +) logger = logging.getLogger(__name__) @@ -447,7 +451,9 @@ async def send_embedding_port(req_id, receive_count, host_name, embedding_port): payload = { "req_id": part_req_id, # use part_req_id to match encode request "receive_count": receive_count, - "receive_url": f"{host_name}:{embedding_port}", + "receive_url": NetworkAddress( + host_name, embedding_port + ).to_host_port_str(), "modality": modality.name, } logger.info( diff --git a/python/sglang/srt/disaggregation/mooncake/conn.py b/python/sglang/srt/disaggregation/mooncake/conn.py index 15e815e696c2..fd7090b2ecb8 100644 --- a/python/sglang/srt/disaggregation/mooncake/conn.py +++ b/python/sglang/srt/disaggregation/mooncake/conn.py @@ -857,7 +857,8 @@ def transfer_worker( ) self.record_failure( kv_chunk.room, - f"Failed to send kv chunk of {kv_chunk.room} to {req.endpoint}:{req.dst_port}", + f"Failed to send kv chunk of {kv_chunk.room} to " + f"{NetworkAddress(req.endpoint, req.dst_port).to_host_port_str()}", ) self.update_status(kv_chunk.room, KVPoll.Failed) self.sync_status_to_decode_endpoint( diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py index a4a90b5b1d03..ab2e9f94aa57 100644 --- a/python/sglang/srt/utils/common.py +++ b/python/sglang/srt/utils/common.py @@ -2371,6 +2371,8 @@ def launch_dummy_health_check_server(host, port, enable_metrics): import uvicorn from fastapi import FastAPI, Response + from sglang.srt.utils.network import NetworkAddress + app = FastAPI() @app.get("/ping") @@ -2413,14 +2415,16 @@ def run_server(): logger.error(f"Dummy health check server failed to start: {e}") raise finally: - logger.info(f"Dummy health check server stopped at {host}:{port}") + logger.info( + f"Dummy health check server stopped at {NetworkAddress(host, port).to_host_port_str()}" + ) thread = threading.Thread( target=run_server, daemon=True, name="health-check-server" ) thread.start() logger.info( - f"Dummy health check server started in background thread at {host}:{port}" + f"Dummy health check server started in background thread at {NetworkAddress(host, port).to_host_port_str()}" ) From 6d4bf7e8aec1b49ab75409895491081acb8dc72b Mon Sep 17 00:00:00 2001 From: hnyls2002 Date: Mon, 23 Mar 2026 17:26:26 -0700 Subject: [PATCH 2/2] Fix NetworkAddress crash when host is None (base_url provided) Only construct NetworkAddress from host/port when base_url is not provided. Tests pass base_url directly with host=None, which caused AttributeError in NetworkAddress.__post_init__. --- python/sglang/bench_serving.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index dc88ff685b42..ac3798f7b65d 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -1727,9 +1727,13 @@ def run_benchmark(args_: argparse.Namespace): "truss": 8080, }.get(args.backend, 30000) - # Build base URL with proper IPv6 bracket wrapping - _na = NetworkAddress(args.host, args.port) - _host_base = _na.to_url() + # Build base URL with proper IPv6 bracket wrapping (only when base_url is not provided) + if not args.base_url: + _na = NetworkAddress(args.host, args.port) + _host_base = _na.to_url() + else: + _na = None + _host_base = None model_url = ( f"{args.base_url}/v1/models" if args.base_url else f"{_host_base}/v1/models"