Support for partial config from factory

greg-kwasniewski1 · greg-kwasniewski1 · commit 11c3ef4c4773 · 2025-08-30T11:50:56.000-07:00
Signed-off-by: greg-kwasniewski1 &lt;213329731+greg-kwasniewski1@users.noreply.github.com&gt;
diff --git a/setup.py b/setup.py
@@ -215,7 +215,7 @@ def extract_from_precompiled(precompiled_location: str, package_data: List[str],
             precompiled_location = download_precompiled(tempdir, version)
         extract_from_precompiled(precompiled_location, package_data, tempdir)
 
-# sanity_check()
+sanity_check()
 
 # https://setuptools.pypa.io/en/latest/references/keywords.html
 setup(
diff --git a/tensorrt_llm/_torch/auto_deploy/config/default.yaml b/tensorrt_llm/_torch/auto_deploy/config/default.yaml
@@ -56,7 +56,7 @@ transforms:
     stage: sharding
     simple_shard_only: false
     use_sharding_from_factory: false
-    support_partial_config: true
+    support_partial_config: false
     sharding_dims: ['tp', 'ep', 'bmm']
   # TODO: (hg) need to ensure run_shape_prop after sharding.
   sharding_transform_executor:
diff --git a/tensorrt_llm/_torch/auto_deploy/llm_args.py b/tensorrt_llm/_torch/auto_deploy/llm_args.py
@@ -58,7 +58,7 @@ class AutoDeployConfig(DynamicYamlMixInForSettings, BaseSettings):
     )
 
     model_factory: Literal["AutoModelForCausalLM", "AutoModelForImageTextToText"] = Field(
-        default="AutoModelForCausalLM",
+        default="AutoModelForImageTextToText",
         description="The model factory to use for loading the model.",
     )
 
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py b/tensorrt_llm/_torch/auto_deploy/transform/library/sharding.py
@@ -186,6 +186,8 @@ def _apply(
         )
         shared_config.sharding_config.simple_shard_only = self.config.simple_shard_only
         shared_config.sharding_config.support_partial_config = self.config.support_partial_config
+        shared_config.sharding_config.sharding_dims = self.config.sharding_dims
+
         shared_config.sharding_config.use_sharding_from_factory = (
             self.config.use_sharding_from_factory
         )
@@ -201,8 +203,6 @@ def _apply(
             factory_info = detect_sharding_from_factory_config(gm, sharding_config)
             return gm, factory_info
 
-        shared_config.sharding_config.sharding_dims = self.config.sharding_dims
-
         ad_logger.info(
             f"Running autodeploy sharding heuristics: {shared_config.sharding_config.sharding_dims}"
         )
@@ -339,8 +339,39 @@ def detect_sharding_from_factory_config(
                     # TODO: Sequence parallelism is not supported yet.
                     ad_logger.warning("Sequence parallelism is not supported yet. Skipping.")
                 elif "local" in config:
-                    # TODO: local refers to hybrid EP+TP parallelism. Not supported yet.
-                    ad_logger.warning("Local EP+TP sharding is not supported yet. Skipping.")
+                    # Check if this applies to shared experts in EP parallelism.
+                    # If yes, apply the TP col-row shard.
+                    if "shared" in module_name:
+                        col_row_action = config.replace("local_", "")
+                        if col_row_action == "colwise":
+                            sharding_config.tp_transforms.append(
+                                TPShardingInfo(
+                                    target_node=lin_node.name,
+                                    split_dim=SplitDimension.COLUMN,
+                                    rank=rank,
+                                    world_size=world_size,
+                                    dist_op=None,
+                                    min_local_shape=min_local_shape,
+                                )
+                            )
+                        elif col_row_action == "rowwise":
+                            sharding_config.tp_transforms.append(
+                                TPShardingInfo(
+                                    target_node=lin_node.name,
+                                    split_dim=SplitDimension.ROW,
+                                    rank=rank,
+                                    world_size=world_size,
+                                    dist_op="all_reduce",
+                                    min_local_shape=min_local_shape,
+                                )
+                            )
+                            num_row_col_shards += 1
+                        else:
+                            ad_logger.warning("Invalid sharding config. Skipping.")
+                    else:
+                        # TODO: local refers to hybrid EP+TP parallelism. Not supported yet.
+                        ad_logger.warning("Local EP+TP sharding is not supported yet. Skipping.")
+
                 elif "gather" in config:
                     # Simple shard (row + all_gather)
                     sharding_config.tp_transforms.append(
@@ -363,9 +394,35 @@ def detect_sharding_from_factory_config(
         f"Applied {num_shards} TP shards (simple: {num_simple_shards}, "
         f"row-col pattern: {num_row_col_shards})"
     )
+
+    num_matches = len(sharding_config.tp_transforms)
+
+    if sharding_config.support_partial_config:
+        ad_logger.info(
+            f"Partial factory config applied only for TP. "
+            f"Applying heuristics for {sharding_config.sharding_dims}."
+        )
+
+        # run EP sharding across ranks
+        if "ep" in sharding_config.sharding_dims:
+            ep_info = detect_ep_shard(gm, sharding_config)
+        else:
+            ep_info = TransformInfo(
+                skipped=True, num_matches=0, is_clean=True, has_valid_shapes=True
+            )
+
+        # run BMM sharding across ranks
+        if "bmm" in sharding_config.sharding_dims:
+            dp_bmm_info = detect_dp_bmm_shard(gm, sharding_config)
+        else:
+            dp_bmm_info = TransformInfo(
+                skipped=True, num_matches=0, is_clean=True, has_valid_shapes=True
+            )
+        num_matches += ep_info.num_matches + dp_bmm_info.num_matches
+
     return TransformInfo(
         skipped=False,
-        num_matches=len(sharding_config.tp_transforms),
+        num_matches=num_matches,
         is_clean=False,
         has_valid_shapes=False,
     )

Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ class AutoDeployConfig(DynamicYamlMixInForSettings, BaseSettings):`
`58`	`58`	`)`
`59`	`59`
`60`	`60`	`model_factory: Literal["AutoModelForCausalLM", "AutoModelForImageTextToText"] = Field(`
`61`		`- default="AutoModelForCausalLM",`
	`61`	`+ default="AutoModelForImageTextToText",`
`62`	`62`	`description="The model factory to use for loading the model.",`
`63`	`63`	`)`
`64`	`64`