From a369858496a2afbca8691430e3f007120bc4018d Mon Sep 17 00:00:00 2001
From: Jan Lasek <janek.lasek@gmail.com>
Date: Wed, 2 Oct 2024 15:48:08 +0200
Subject: [PATCH] Comment on issues

Signed-off-by: Jan Lasek <janek.lasek@gmail.com>
---
 nemo/deploy/nlp/megatronllm_deployable.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/nemo/deploy/nlp/megatronllm_deployable.py b/nemo/deploy/nlp/megatronllm_deployable.py
index a9e3740c5ec22..64cf6114ceba4 100644
--- a/nemo/deploy/nlp/megatronllm_deployable.py
+++ b/nemo/deploy/nlp/megatronllm_deployable.py
@@ -154,10 +154,13 @@ def _load_from_nemo_checkpoint(self, nemo_checkpoint_filepath: str, num_devices:
             # had to override these to make Nemotron3-22B work, see sample_sequence_batch() in text_generation_utils.py
             custom_config.activations_checkpoint_granularity = None
             custom_config.activations_checkpoint_method = None
+            # Models trained with TE < 1.10 and loaded with TE >= 1.10 require
+            # special handling on loading checkpoint due to structural updates
             custom_config.dist_ckpt_load_strictness = StrictHandling.LOG_ALL.value
             if custom_config.get("fp8", False):
                 # Need to disable FP8 for in-framework inference due to shape constraints imposed by TE,
-                # see https://github.com/NVIDIA/TransformerEngine/blob/v1.8/transformer_engine/pytorch/utils.py#L229
+                # see https://github.com/NVIDIA/TransformerEngine/blob/v1.10/transformer_engine/pytorch/utils.py#L229
+                LOGGER.warning("Disabling FP8 inference due to shape constraints imposed by Transformer Engine.")
                 custom_config.fp8 = False
 
             self.model = MegatronGPTModel.restore_from(