Enlarge scheduler and slot manager capacity under disagg bs == 1

yifeizhang-c · yifeizhang-c · commit 4ea8dff54646 · 2025-07-30T23:14:24.000-07:00
diff --git a/tensorrt_llm/_torch/pyexecutor/_util.py b/tensorrt_llm/_torch/pyexecutor/_util.py
@@ -506,6 +506,10 @@ def create_py_executor_instance(
             lora_config.trtllm_modules_to_hf_modules)
 
     max_num_sequences = executor_config.max_batch_size * mapping.pp_size
+    # When max_batch_size == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
+    # Enlarge slot and scheduler capacity to avoid starvation.
+    if executor_config.max_batch_size == 1:
+        max_num_sequences += mapping.pp_size
 
     resources[ResourceManagerType.SEQ_SLOT_MANAGER] = SeqSlotManager(
         max_num_sequences)
@@ -558,6 +562,10 @@ def create_py_executor_instance(
 def create_torch_sampler_args(executor_config: ExecutorConfig, mapping: Mapping,
                               *, max_seq_len: int, enable_mixed_sampler: bool):
     max_num_sequences = executor_config.max_batch_size * mapping.pp_size
+    # When max_batch_size == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
+    # Enlarge sampler size to align with slot and scheduler capacity.
+    if executor_config.max_batch_size == 1:
+        max_num_sequences += mapping.pp_size
     max_draft_len = (0 if executor_config.speculative_config is None else
                      executor_config.speculative_config.max_draft_len)
     return TorchSampler.Args(