diff --git a/examples/auto_deploy/nano_v3.yaml b/examples/auto_deploy/nano_v3.yaml index a87e26242546..a847d9a8d4ed 100644 --- a/examples/auto_deploy/nano_v3.yaml +++ b/examples/auto_deploy/nano_v3.yaml @@ -14,6 +14,7 @@ kv_cache_config: transforms: detect_sharding: sharding_dims: ['ep', 'bmm'] + allreduce_strategy: 'AUTO' manual_config: head_dim: 128 tp_plan: diff --git a/tensorrt_llm/_torch/auto_deploy/config/default.yaml b/tensorrt_llm/_torch/auto_deploy/config/default.yaml index a7251de20a12..4edf3de15072 100644 --- a/tensorrt_llm/_torch/auto_deploy/config/default.yaml +++ b/tensorrt_llm/_torch/auto_deploy/config/default.yaml @@ -81,7 +81,7 @@ transforms: sharding_source: ['manual', 'factory', 'heuristic'] support_partial_config: true sharding_dims: ['tp', 'ep', 'bmm'] - allreduce_strategy: 'AUTO' + allreduce_strategy: 'NCCL' dist_backend: auto requires_shape_prop: true sharding_transform_executor: