diff --git a/deepspeed/inference/engine.py b/deepspeed/inference/engine.py index 54f2d6bb4390..b22325d1d0e4 100755 --- a/deepspeed/inference/engine.py +++ b/deepspeed/inference/engine.py @@ -126,7 +126,7 @@ def __init__(self, model, config): self._apply_injection_policy(config, client_module) elif config.replace_method == 'auto': self._apply_injection_policy(config) - else: + elif not config.replace_with_kernel_inject: # Automatic Tensor Parallelism parser_dict = AutoTP.tp_parser(model) for client_module, injection_policy in parser_dict: