Skip to content

Commit 4ea8dff

Browse files
committed
Enlarge scheduler and slot manager capacity under disagg bs == 1
1 parent fcd5706 commit 4ea8dff

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

tensorrt_llm/_torch/pyexecutor/_util.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,10 @@ def create_py_executor_instance(
506506
lora_config.trtllm_modules_to_hf_modules)
507507

508508
max_num_sequences = executor_config.max_batch_size * mapping.pp_size
509+
# When max_batch_size == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
510+
# Enlarge slot and scheduler capacity to avoid starvation.
511+
if executor_config.max_batch_size == 1:
512+
max_num_sequences += mapping.pp_size
509513

510514
resources[ResourceManagerType.SEQ_SLOT_MANAGER] = SeqSlotManager(
511515
max_num_sequences)
@@ -558,6 +562,10 @@ def create_py_executor_instance(
558562
def create_torch_sampler_args(executor_config: ExecutorConfig, mapping: Mapping,
559563
*, max_seq_len: int, enable_mixed_sampler: bool):
560564
max_num_sequences = executor_config.max_batch_size * mapping.pp_size
565+
# When max_batch_size == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
566+
# Enlarge sampler size to align with slot and scheduler capacity.
567+
if executor_config.max_batch_size == 1:
568+
max_num_sequences += mapping.pp_size
561569
max_draft_len = (0 if executor_config.speculative_config is None else
562570
executor_config.speculative_config.max_draft_len)
563571
return TorchSampler.Args(

0 commit comments

Comments
 (0)