diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py index 3ca099a213d..df4a29e8213 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py @@ -14,9 +14,7 @@ from tensorrt_llm._utils import get_sm_version from tensorrt_llm.bindings.executor import (CapacitySchedulerPolicy, ContextChunkingPolicy, - ExecutorConfig, - LogitsPostProcessorConfig, - ParallelConfig) + ExecutorConfig) from tensorrt_llm.bindings.internal.batch_manager import ContextChunkingConfig from tensorrt_llm.llmapi.llm_args import KvCacheConnectorConfig, TorchLlmArgs from tensorrt_llm.llmapi.tokenizer import TokenizerBase @@ -217,14 +215,9 @@ def create_py_executor( tokenizer: Optional[TokenizerBase] = None, lora_config: Optional[LoraConfig] = None, kv_connector_config: Optional[KvCacheConnectorConfig] = None, - logits_post_processor_config: Optional[LogitsPostProcessorConfig] = None, - parallel_config: Optional[ParallelConfig] = None, ) -> PyExecutor: executor_config = llm_args.get_executor_config(checkpoint_dir, tokenizer) - executor_config.logits_post_processor_config = logits_post_processor_config - executor_config.parallel_config = parallel_config - garbage_collection_gen0_threshold = llm_args.garbage_collection_gen0_threshold _mangle_executor_config(executor_config) diff --git a/tensorrt_llm/executor/worker.py b/tensorrt_llm/executor/worker.py index 51b9f4832e7..f4cd66d6f7d 100644 --- a/tensorrt_llm/executor/worker.py +++ b/tensorrt_llm/executor/worker.py @@ -113,6 +113,7 @@ def _create_py_executor(): assert hasattr( self.llm_args, "backend" ), "llm_args should be with backend in _create_py_executor" + _ = _get_comm_ranks_device_id() if self.llm_args.backend == "pytorch": from tensorrt_llm._torch.pyexecutor.py_executor_creator import \ create_py_executor @@ -122,13 +123,6 @@ def _create_py_executor(): args["tokenizer"] = tokenizer args["lora_config"] = lora_config args["kv_connector_config"] = kv_connector_config - args[ - "logits_post_processor_config"] = tllm.LogitsPostProcessorConfig( - processor_batched=batched_logits_processor, - replicate=False) - comm_ranks, device_ids = _get_comm_ranks_device_id() - args["parallel_config"] = tllm.ParallelConfig( - participant_ids=comm_ranks, device_ids=device_ids) elif self.llm_args.backend == "_autodeploy": from tensorrt_llm._torch.auto_deploy.llm_args import \ LlmArgs as ADLlmArgs