We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
mlp_hidden_size
1 parent 88c9eda commit e106c4eCopy full SHA for e106c4e
tensorrt_llm/_torch/model_config.py
@@ -494,7 +494,8 @@ def get_bindings_model_config(self,
494
architectures = self.pretrained_config.architectures
495
if len(architectures
496
) == 1 and architectures[0] == "DeciLMForCausalLM":
497
- mlp_hidden_size = self._infer_nemotron_ffn_mult()
+ mlp_hidden_size = self._infer_nemotron_ffn_mult(
498
+ ) // self.mapping.tp_size
499
else:
500
raise ValueError(
501
f"Inferring mlp hidden size for model architecture: {architectures} isn't supported yet"
0 commit comments