diff --git a/python/sglang/srt/mem_cache/memory_pool_host.py b/python/sglang/srt/mem_cache/memory_pool_host.py index 276070b51897..726a419c73a8 100644 --- a/python/sglang/srt/mem_cache/memory_pool_host.py +++ b/python/sglang/srt/mem_cache/memory_pool_host.py @@ -1208,7 +1208,7 @@ def _load_indexer_to_device_per_layer( ) else: raise ValueError(f"Unsupported layout: {self.layout}") - else: + elif io_backend == "direct": if self.layout == "layer_first": transfer_kv_direct( src_layers=[self.index_k_with_scale_buffer[layer_id]], @@ -1228,6 +1228,8 @@ def _load_indexer_to_device_per_layer( ) else: raise ValueError(f"Unsupported layout: {self.layout}") + else: + raise ValueError(f"Unsupported IO backend: {io_backend}") def _backup_indexer_from_device_all_layer( self, device_pool, host_indices, device_indices, io_backend @@ -1258,7 +1260,7 @@ def _backup_indexer_from_device_all_layer( ) else: raise ValueError(f"Unsupported layout: {self.layout}") - else: + elif io_backend == "direct": if self.layout == "layer_first": transfer_kv_direct( src_layers=device_pool.index_k_with_scale_buffer, @@ -1277,6 +1279,8 @@ def _backup_indexer_from_device_all_layer( ) else: raise ValueError(f"Unsupported layout: {self.layout}") + else: + raise ValueError(f"Unsupported IO backend: {io_backend}") def load_to_device_per_layer( self,