diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 2c52825a47ad..7d2377227392 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -2360,6 +2360,13 @@ def _get_new_batch_prefill_raw( ) > 0 or (not self.running_batch.is_empty()) else: self.running_batch.batch_is_full = True + # revert matched mamba idx to avoid memory leak, if req is not added + added = len(adder.can_run_list) > 0 and req is adder.can_run_list[-1] + if not added and req.mamba_pool_idx is not None: + self.tree_cache.req_to_token_pool.mamba_pool.free( + req.mamba_pool_idx.unsqueeze(-1) + ) + req.mamba_pool_idx = None break # Update waiting queue