Comment on issues

Signed-off-by: Jan Lasek <[email protected]>
NVIDIA · Oct 2, 2024 · b29975f · b29975f
1 parent 8e1e1c8
commit b29975f
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/nemo/deploy/nlp/megatronllm_deployable.py b/nemo/deploy/nlp/megatronllm_deployable.py
@@ -154,10 +154,13 @@ def _load_from_nemo_checkpoint(self, nemo_checkpoint_filepath: str, num_devices:
             # had to override these to make Nemotron3-22B work, see sample_sequence_batch() in text_generation_utils.py
             custom_config.activations_checkpoint_granularity = None
             custom_config.activations_checkpoint_method = None
+            # Models trained with TE < 1.10 and loaded with TE >= 1.10 require
+            # special handling on loading checkpoint due to structural updates
             custom_config.dist_ckpt_load_strictness = StrictHandling.LOG_ALL.value
             if custom_config.get("fp8", False):
                 # Need to disable FP8 for in-framework inference due to shape constraints imposed by TE,
-                # see https://github.com/NVIDIA/TransformerEngine/blob/v1.8/transformer_engine/pytorch/utils.py#L229
+                # see https://github.com/NVIDIA/TransformerEngine/blob/v1.10/transformer_engine/pytorch/utils.py#L229
+                LOGGER.warning("Disabling FP8 inference due to shape constraints imposed by Transformer Engine.")
                 custom_config.fp8 = False
 
             self.model = MegatronGPTModel.restore_from(