Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,7 @@ def forward(
return {"logits": logits_flat}


def create_autodeploy_executor(
executor_config: ExecutorConfig, checkpoint_dir: str = None, engine_dir: str = None
):
def create_autodeploy_executor(executor_config: ExecutorConfig, checkpoint_dir: str = None):
"""Create an AutoDeploy executor from the given configuration and checkpoint directory.

This is the entrypoint API to the _autodeploy backend.
Expand Down
3 changes: 0 additions & 3 deletions tensorrt_llm/_torch/pyexecutor/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ class PyTorchConfig:
'tokens_per_block',
'mapping',
'hf_model_dir',
'trt_engine_dir',
]


Expand All @@ -107,7 +106,6 @@ def update_executor_config(
build_config: Optional[BuildConfig] = None,
speculative_config: Optional[SpecConfig] = None,
hf_model_dir: Optional[str] = None,
trt_engine_dir: Optional[str] = None,
max_input_len: Optional[int] = None,
max_seq_len: Optional[int] = None):
if backend is None:
Expand All @@ -131,7 +129,6 @@ def update_executor_config(
executor_config.tokens_per_block = executor_config.tokens_per_block or build_config.plugin_config.tokens_per_block

executor_config.hf_model_dir = hf_model_dir
executor_config.trt_engine_dir = trt_engine_dir

if max_input_len is not None:
executor_config.max_input_len = max_input_len
Expand Down
2 changes: 0 additions & 2 deletions tensorrt_llm/_torch/pyexecutor/llm_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,6 @@ def __init__(
self.is_cuda_graph_dummy = False
self.py_lora_task_layer_module_configs = None

self.py_tokens = super().get_tokens()

self.py_return_log_probs = return_log_probs
self.py_return_context_logits = return_context_logits
self.py_return_generation_logits = return_generation_logits
Expand Down
1 change: 0 additions & 1 deletion tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ def _get_mapping(executor_config: ExecutorConfig) -> Mapping:
def create_py_executor(
executor_config: ExecutorConfig,
checkpoint_dir: str = None,
engine_dir: str = None,
lora_config: Optional[LoraConfig] = None,
garbage_collection_gen0_threshold: Optional[int] = None) -> PyExecutor:
_mangle_executor_config(executor_config)
Expand Down
2 changes: 0 additions & 2 deletions tensorrt_llm/executor/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ def _create_engine():
args = {
"executor_config": executor_config,
"checkpoint_dir": executor_config.hf_model_dir,
"engine_dir": executor_config.trt_engine_dir,
}
if executor_config.backend == "pytorch":
from tensorrt_llm._torch.pyexecutor.py_executor_creator import \
Expand All @@ -135,7 +134,6 @@ def _create_engine():
else:
raise ValueError(
f"Unsupported backend config: {executor_config.backend}")

return create_executor(**args)

self.engine = _create_engine()
Expand Down
1 change: 0 additions & 1 deletion tensorrt_llm/llmapi/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,6 @@ def _build_model(self):
if self._on_trt_backend else None,
speculative_config=self.args.speculative_config,
hf_model_dir=self._hf_model_dir,
trt_engine_dir=self._engine_dir,
max_input_len=self.args.max_input_len,
max_seq_len=max_seq_len)
self._executor_config.llm_parallel_config = self.args.parallel_config
Expand Down