From 5d6ccf78764a1f4f8b7b36f0d835ece1b5df6213 Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Tue, 1 Oct 2024 14:51:00 +0200 Subject: [PATCH 1/6] Fix loading legacy checkpoints Signed-off-by: Jan Lasek --- nemo/deploy/nlp/megatronllm_deployable.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/nemo/deploy/nlp/megatronllm_deployable.py b/nemo/deploy/nlp/megatronllm_deployable.py index 1fe029f9fade..500246b692c1 100644 --- a/nemo/deploy/nlp/megatronllm_deployable.py +++ b/nemo/deploy/nlp/megatronllm_deployable.py @@ -33,6 +33,18 @@ from nemo.deploy import ITritonDeployable from nemo.deploy.utils import cast_output, str_ndarray2list +try: + from megatron.core.dist_checkpointing.validation import StrictHandling + + HAVE_MEGATRON_CORE = True + +except (ImportError, ModuleNotFoundError) as e: + + HAVE_MEGATRON_CORE = False + IMPORT_ERROR = ( + "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-core." + f" Exact error: {e}" + ) @wrapt.decorator def noop_decorator(func): @@ -99,6 +111,8 @@ def __init__( num_nodes: int = 1, existing_model: MegatronGPTModel = None, ): + if not HAVE_MEGATRON_CORE: + raise ImportError(IMPORT_ERROR) if nemo_checkpoint_filepath is None and existing_model is None: raise ValueError( "MegatronLLMDeployable requires either a .nemo checkpoint filepath or an existing MegatronGPTModel, but both provided were None" @@ -142,6 +156,7 @@ def _load_from_nemo_checkpoint(self, nemo_checkpoint_filepath: str, num_devices: # had to override these to make Nemotron3-22B work, see sample_sequence_batch() in text_generation_utils.py custom_config.activations_checkpoint_granularity = None custom_config.activations_checkpoint_method = None + custom_config.dist_ckpt_load_strictness = StrictHandling.LOG_ALL.value self.model = MegatronGPTModel.restore_from( nemo_checkpoint_filepath, trainer=trainer, override_config_path=custom_config From 01e680ddc730d703dd7d255d4727857806076d76 Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Tue, 1 Oct 2024 14:52:47 +0200 Subject: [PATCH 2/6] Fix inference issues FP8-trained models Signed-off-by: Jan Lasek --- nemo/deploy/nlp/megatronllm_deployable.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nemo/deploy/nlp/megatronllm_deployable.py b/nemo/deploy/nlp/megatronllm_deployable.py index 500246b692c1..6c5624aeb0b1 100644 --- a/nemo/deploy/nlp/megatronllm_deployable.py +++ b/nemo/deploy/nlp/megatronllm_deployable.py @@ -157,6 +157,8 @@ def _load_from_nemo_checkpoint(self, nemo_checkpoint_filepath: str, num_devices: custom_config.activations_checkpoint_granularity = None custom_config.activations_checkpoint_method = None custom_config.dist_ckpt_load_strictness = StrictHandling.LOG_ALL.value + if custom_config.get("fp8", False): + custom_config.fp8 = False self.model = MegatronGPTModel.restore_from( nemo_checkpoint_filepath, trainer=trainer, override_config_path=custom_config From 1938b3875cb10c7bbb0b0081b5c3dbeba4c26f13 Mon Sep 17 00:00:00 2001 From: janekl Date: Tue, 1 Oct 2024 16:46:29 +0000 Subject: [PATCH 3/6] Apply isort and black reformatting Signed-off-by: janekl --- nemo/deploy/nlp/megatronllm_deployable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nemo/deploy/nlp/megatronllm_deployable.py b/nemo/deploy/nlp/megatronllm_deployable.py index 6c5624aeb0b1..eb5762496fb6 100644 --- a/nemo/deploy/nlp/megatronllm_deployable.py +++ b/nemo/deploy/nlp/megatronllm_deployable.py @@ -46,6 +46,7 @@ f" Exact error: {e}" ) + @wrapt.decorator def noop_decorator(func): def wrapper(*args, **kwargs): From 87bcae68f5be14a2e04983d0615456788340226b Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Tue, 1 Oct 2024 18:53:57 +0200 Subject: [PATCH 4/6] Comment on TE shape contraints during inference Signed-off-by: Jan Lasek --- nemo/deploy/nlp/megatronllm_deployable.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nemo/deploy/nlp/megatronllm_deployable.py b/nemo/deploy/nlp/megatronllm_deployable.py index eb5762496fb6..6ceb436e8077 100644 --- a/nemo/deploy/nlp/megatronllm_deployable.py +++ b/nemo/deploy/nlp/megatronllm_deployable.py @@ -159,6 +159,8 @@ def _load_from_nemo_checkpoint(self, nemo_checkpoint_filepath: str, num_devices: custom_config.activations_checkpoint_method = None custom_config.dist_ckpt_load_strictness = StrictHandling.LOG_ALL.value if custom_config.get("fp8", False): + # Need to disable FP8 for in-framework inference due to shape constraints imposed by TE, + # see https://github.com/NVIDIA/TransformerEngine/blob/v1.8/transformer_engine/pytorch/utils.py#L229 custom_config.fp8 = False self.model = MegatronGPTModel.restore_from( From 8e1e1c8b4386d62cfb3f8ddafe726f1a1b5f4e73 Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Wed, 2 Oct 2024 01:19:59 -0700 Subject: [PATCH 5/6] Simplify import error handling Signed-off-by: Jan Lasek --- nemo/deploy/nlp/megatronllm_deployable.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/nemo/deploy/nlp/megatronllm_deployable.py b/nemo/deploy/nlp/megatronllm_deployable.py index 6ceb436e8077..a9e3740c5ec2 100644 --- a/nemo/deploy/nlp/megatronllm_deployable.py +++ b/nemo/deploy/nlp/megatronllm_deployable.py @@ -41,10 +41,7 @@ except (ImportError, ModuleNotFoundError) as e: HAVE_MEGATRON_CORE = False - IMPORT_ERROR = ( - "megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-core." - f" Exact error: {e}" - ) + IMPORT_ERROR = e @wrapt.decorator @@ -113,7 +110,7 @@ def __init__( existing_model: MegatronGPTModel = None, ): if not HAVE_MEGATRON_CORE: - raise ImportError(IMPORT_ERROR) + raise IMPORT_ERROR if nemo_checkpoint_filepath is None and existing_model is None: raise ValueError( "MegatronLLMDeployable requires either a .nemo checkpoint filepath or an existing MegatronGPTModel, but both provided were None" From b29975fd4fb58ab0caacbcebacf9b44c223b03c7 Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Wed, 2 Oct 2024 15:48:08 +0200 Subject: [PATCH 6/6] Comment on issues Signed-off-by: Jan Lasek --- nemo/deploy/nlp/megatronllm_deployable.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nemo/deploy/nlp/megatronllm_deployable.py b/nemo/deploy/nlp/megatronllm_deployable.py index a9e3740c5ec2..64cf6114ceba 100644 --- a/nemo/deploy/nlp/megatronllm_deployable.py +++ b/nemo/deploy/nlp/megatronllm_deployable.py @@ -154,10 +154,13 @@ def _load_from_nemo_checkpoint(self, nemo_checkpoint_filepath: str, num_devices: # had to override these to make Nemotron3-22B work, see sample_sequence_batch() in text_generation_utils.py custom_config.activations_checkpoint_granularity = None custom_config.activations_checkpoint_method = None + # Models trained with TE < 1.10 and loaded with TE >= 1.10 require + # special handling on loading checkpoint due to structural updates custom_config.dist_ckpt_load_strictness = StrictHandling.LOG_ALL.value if custom_config.get("fp8", False): # Need to disable FP8 for in-framework inference due to shape constraints imposed by TE, - # see https://github.com/NVIDIA/TransformerEngine/blob/v1.8/transformer_engine/pytorch/utils.py#L229 + # see https://github.com/NVIDIA/TransformerEngine/blob/v1.10/transformer_engine/pytorch/utils.py#L229 + LOGGER.warning("Disabling FP8 inference due to shape constraints imposed by Transformer Engine.") custom_config.fp8 = False self.model = MegatronGPTModel.restore_from(