File tree Expand file tree Collapse file tree 1 file changed +8
-0
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Original file line number Diff line number Diff line change @@ -506,6 +506,10 @@ def create_py_executor_instance(
506506 lora_config .trtllm_modules_to_hf_modules )
507507
508508 max_num_sequences = executor_config .max_batch_size * mapping .pp_size
509+ # When max_batch_size == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
510+ # Enlarge slot and scheduler capacity to avoid starvation.
511+ if executor_config .max_batch_size == 1 :
512+ max_num_sequences += mapping .pp_size
509513
510514 resources [ResourceManagerType .SEQ_SLOT_MANAGER ] = SeqSlotManager (
511515 max_num_sequences )
@@ -558,6 +562,10 @@ def create_py_executor_instance(
558562def create_torch_sampler_args (executor_config : ExecutorConfig , mapping : Mapping ,
559563 * , max_seq_len : int , enable_mixed_sampler : bool ):
560564 max_num_sequences = executor_config .max_batch_size * mapping .pp_size
565+ # When max_batch_size == 1, attention dp dummy request will prevent the scheduling of DISAGG_GENERATION_INIT.
566+ # Enlarge sampler size to align with slot and scheduler capacity.
567+ if executor_config .max_batch_size == 1 :
568+ max_num_sequences += mapping .pp_size
561569 max_draft_len = (0 if executor_config .speculative_config is None else
562570 executor_config .speculative_config .max_draft_len )
563571 return TorchSampler .Args (
You can’t perform that action at this time.
0 commit comments