From e78792b9bcfd3421d7df255e1f191b91c7194ae3 Mon Sep 17 00:00:00 2001 From: Kangyan Zhou Date: Sat, 28 Feb 2026 21:11:41 -0800 Subject: [PATCH] [Bugfix] Add missing auto_create_handle_loop to communicator methods The handle_loop asyncio task in TokenizerManager is responsible for receiving responses from schedulers via ZMQ and dispatching them to the appropriate _Communicator. However, handle_loop is lazily started by auto_create_handle_loop() and several communicator methods were missing this call. This caused /server_info (and other endpoints like /flush_cache, /get_loads) to hang indefinitely when called on freshly-started servers that had not yet processed any inference request -- because no inference request had triggered auto_create_handle_loop() yet, the scheduler responses were never received. This is particularly critical for PD disaggregation setups where the sglang router's service discovery calls /server_info as the very first interaction with worker pods during the discover_metadata step, before any generate request is sent. Co-Authored-By: Claude Opus 4.6 --- .../sglang/srt/managers/tokenizer_communicator_mixin.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/sglang/srt/managers/tokenizer_communicator_mixin.py b/python/sglang/srt/managers/tokenizer_communicator_mixin.py index d6892431ed88..e08e0955bc2f 100644 --- a/python/sglang/srt/managers/tokenizer_communicator_mixin.py +++ b/python/sglang/srt/managers/tokenizer_communicator_mixin.py @@ -344,10 +344,12 @@ def _get_communicator_dispatcher(self: TokenizerManager): ) async def flush_cache(self: TokenizerManager) -> FlushCacheReqOutput: + self.auto_create_handle_loop() return (await self.flush_cache_communicator(FlushCacheReqInput()))[0] async def clear_hicache_storage(self: TokenizerManager) -> ClearHiCacheReqOutput: """Clear the hierarchical cache storage.""" + self.auto_create_handle_loop() # Delegate to the scheduler to handle HiCacheStorage clearing return (await self.clear_hicache_storage_communicator(ClearHiCacheReqInput()))[ 0 @@ -361,6 +363,7 @@ async def attach_hicache_storage( hicache_write_policy: Optional[str] = None, ) -> AttachHiCacheStorageReqOutput: """Attach (enable) HiCache storage backend at runtime.""" + self.auto_create_handle_loop() results = await self.attach_hicache_storage_communicator( AttachHiCacheStorageReqInput( hicache_storage_backend=hicache_storage_backend, @@ -392,6 +395,7 @@ async def detach_hicache_storage( self: TokenizerManager, ) -> DetachHiCacheStorageReqOutput: """Detach (disable) HiCache storage backend at runtime.""" + self.auto_create_handle_loop() results = await self.detach_hicache_storage_communicator( DetachHiCacheStorageReqInput() ) @@ -855,6 +859,7 @@ async def slow_down( await self.slow_down_communicator(obj) async def get_internal_state(self: TokenizerManager) -> List[Dict[Any, Any]]: + self.auto_create_handle_loop() req = GetInternalStateReq() responses: List[GetInternalStateReqOutput] = ( await self.get_internal_state_communicator(req) @@ -865,6 +870,7 @@ async def get_internal_state(self: TokenizerManager) -> List[Dict[Any, Any]]: async def set_internal_state( self: TokenizerManager, obj: SetInternalStateReq ) -> List[bool]: + self.auto_create_handle_loop() responses: List[SetInternalStateReqOutput] = ( await self.set_internal_state_communicator(obj) ) @@ -873,9 +879,11 @@ async def set_internal_state( async def dumper_control( self: TokenizerManager, obj: DumperControlReqInput ) -> List[DumperControlReqOutput]: + self.auto_create_handle_loop() return await self.dumper_control_communicator(obj) async def get_load(self: TokenizerManager) -> List[GetLoadReqOutput]: + self.auto_create_handle_loop() req = GetLoadReqInput() return await self.get_load_communicator(req) @@ -894,6 +902,7 @@ async def get_loads( Returns: List of GetLoadsReqOutput, one per scheduler (filtered by dp_rank if specified) """ + self.auto_create_handle_loop() req = GetLoadsReqInput( include=include if include else ["all"], dp_rank=dp_rank,