From fb27b4e894ea352b9f010e85bb892b246206af62 Mon Sep 17 00:00:00 2001
From: Onur Yilmaz <oyilmaz@nvidia.com>
Date: Thu, 4 Apr 2024 14:06:04 -0400
Subject: [PATCH] Address the CodeQL issues

Signed-off-by: Onur Yilmaz <oyilmaz@nvidia.com>
---
 nemo/deploy/deploy_base.py                | 39 +++++++++++++----------
 nemo/deploy/deploy_pytriton.py            |  6 ++--
 nemo/deploy/nlp/__init__.py               |  5 +--
 nemo/deploy/nlp/query_llm.py              |  2 +-
 nemo/export/__init__.py                   | 10 ++++--
 nemo/export/trt_llm/__init__.py           |  8 -----
 nemo/export/trt_llm/decoder/llama.py      |  8 ++---
 nemo/export/trt_llm/nemo_utils.py         |  4 +--
 nemo/export/trt_llm/tensorrt_llm_build.py |  9 +++---
 nemo/export/trt_llm/tensorrt_llm_utils.py |  5 ++-
 nemo/export/trt_llm/utils.py              |  2 --
 scripts/export/export_to_trt_llm.py       | 10 ------
 tests/deploy/test_nemo_deploy.py          |  5 ---
 13 files changed, 51 insertions(+), 62 deletions(-)

diff --git a/nemo/deploy/deploy_base.py b/nemo/deploy/deploy_base.py
index 33c5ccfb3cf4..5ad418ae4a74 100644
--- a/nemo/deploy/deploy_base.py
+++ b/nemo/deploy/deploy_base.py
@@ -14,35 +14,40 @@
 
 import importlib
 from abc import ABC, abstractmethod
+import logging
 
+use_pytorch_lightning = True
 try:
     from pytorch_lightning import Trainer
 except Exception:
-    pass
+    use_pytorch_lightning = False
 
 from nemo.deploy.triton_deployable import ITritonDeployable
 
 use_nemo = True
 try:
     from nemo.core.classes.modelPT import ModelPT
-except:
+except Exception:
     use_nemo = False
 
 
+LOGGER = logging.getLogger("NeMo")
+
+
 class DeployBase(ABC):
     def __init__(
-        self,
-        triton_model_name: str,
-        triton_model_version: int = 1,
-        checkpoint_path: str = None,
-        model=None,
-        max_batch_size: int = 128,
-        port: int = 8000,
-        address="0.0.0.0",
-        allow_grpc=True,
-        allow_http=True,
-        streaming=False,
-        pytriton_log_verbose=0,
+            self,
+            triton_model_name: str,
+            triton_model_version: int = 1,
+            checkpoint_path: str = None,
+            model=None,
+            max_batch_size: int = 128,
+            port: int = 8000,
+            address="0.0.0.0",
+            allow_grpc=True,
+            allow_http=True,
+            streaming=False,
+            pytriton_log_verbose=0,
     ):
         self.checkpoint_path = checkpoint_path
         self.triton_model_name = triton_model_name
@@ -87,8 +92,8 @@ def _init_nemo_model(self):
             # has to turn off activations_checkpoint_method for inference
             try:
                 self.model.model.language_model.encoder.activations_checkpoint_method = None
-            except AttributeError:
-                pass
+            except AttributeError as e:
+                LOGGER.warning(e)
 
         if self.model is None:
             raise Exception("There is no model to deploy.")
@@ -106,4 +111,4 @@ def _is_model_deployable(self):
     @staticmethod
     def get_module_and_class(target: str):
         ln = target.rindex(".")
-        return target[0:ln], target[ln + 1 : len(target)]
+        return target[0:ln], target[ln + 1: len(target)]
diff --git a/nemo/deploy/deploy_pytriton.py b/nemo/deploy/deploy_pytriton.py
index 09621ad2eba5..22dea8ac47cd 100644
--- a/nemo/deploy/deploy_pytriton.py
+++ b/nemo/deploy/deploy_pytriton.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 
+use_pytriton = True
 try:
     from pytriton.model_config import ModelConfig
     from pytriton.triton import Triton, TritonConfig
-except:
-    pass
+except Exception:
+    use_pytriton = False
+
 from nemo.deploy.deploy_base import DeployBase
 
 
diff --git a/nemo/deploy/nlp/__init__.py b/nemo/deploy/nlp/__init__.py
index 24b8c36a9cc0..21e2ca2751f8 100644
--- a/nemo/deploy/nlp/__init__.py
+++ b/nemo/deploy/nlp/__init__.py
@@ -13,7 +13,8 @@
 # limitations under the License.
 
 
+use_query_llm = True
 try:
     from nemo.deploy.nlp.query_llm import NemoQueryLLM
-except:
-    pass
+except Exception:
+    use_query_llm = False
diff --git a/nemo/deploy/nlp/query_llm.py b/nemo/deploy/nlp/query_llm.py
index 54c5e37142ef..6a4337024eeb 100644
--- a/nemo/deploy/nlp/query_llm.py
+++ b/nemo/deploy/nlp/query_llm.py
@@ -21,7 +21,7 @@
 use_pytriton = True
 try:
     from pytriton.client import DecoupledModelClient, ModelClient
-except:
+except Exception:
     use_pytriton = False
 
 
diff --git a/nemo/export/__init__.py b/nemo/export/__init__.py
index eb8ebec10441..55712d98852c 100644
--- a/nemo/export/__init__.py
+++ b/nemo/export/__init__.py
@@ -13,7 +13,13 @@
 # limitations under the License.
 
 
+import logging
+
+LOGGER = logging.getLogger("NeMo")
+
+
+use_TensorRTLLM = True
 try:
     from nemo.export.tensorrt_llm import TensorRTLLM
-except Exception:
-    pass
+except Exception as e:
+    LOGGER.warning("TensorRTLLM could not be imported.")
diff --git a/nemo/export/trt_llm/__init__.py b/nemo/export/trt_llm/__init__.py
index c7e532029979..9d1c9fb790ae 100644
--- a/nemo/export/trt_llm/__init__.py
+++ b/nemo/export/trt_llm/__init__.py
@@ -12,11 +12,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import tensorrt_llm
-from mpi4py import MPI
-
-from nemo.export.trt_llm.model_config_trt import *  # noqa
-from nemo.export.trt_llm.nemo_utils import *  # noqa
-from nemo.export.trt_llm.quantization_utils import *  # noqa
-from nemo.export.trt_llm.tensorrt_llm_run import *  # noqa
diff --git a/nemo/export/trt_llm/decoder/llama.py b/nemo/export/trt_llm/decoder/llama.py
index e8b3b5a58ebf..0f6951c28ec3 100644
--- a/nemo/export/trt_llm/decoder/llama.py
+++ b/nemo/export/trt_llm/decoder/llama.py
@@ -132,8 +132,7 @@ def build_decoder(self, layer):
         config.set_if_not_exist('moe_num_experts', 0)
 
         if layer.moe_num_experts:
-            moe_config = MoeConfig()
-            if not layer.moe_num_experts is None:
+            if layer.moe_num_experts is not None:
                 if layer.moe_top_k is None:
                     layer.moe_top_k = 1
 
@@ -143,10 +142,11 @@ def build_decoder(self, layer):
                 )
                 moe_config = MoeConfig(
                     layer.moe_num_experts, layer.moe_top_k, layer.moe_tp_mode, layer.moe_renorm_mode
-                ).validate()
+                )
+                moe_config.validate()
                 config.moe_num_experts = layer.moe_num_experts
                 config.moe_top_k = layer.moe_top_k
                 config.moe_tp_mode = layer.moe_tp_mode
                 config.moe_normalization_mode = layer.moe_renorm_mode
 
-        return LLaMADecoderLayer(config=config, layer_idx=self.layer_id,)
+        return LLaMADecoderLayer(config=config, layer_idx=self.layer_id, )
diff --git a/nemo/export/trt_llm/nemo_utils.py b/nemo/export/trt_llm/nemo_utils.py
index 0fffe0d784fd..a82a9aba2c6a 100644
--- a/nemo/export/trt_llm/nemo_utils.py
+++ b/nemo/export/trt_llm/nemo_utils.py
@@ -14,8 +14,6 @@
 
 
 import argparse
-import ast
-import configparser
 import copy
 import csv
 import datetime
@@ -31,7 +29,7 @@
 import numpy as np
 import tensorrt_llm
 from tensorrt_llm import str_dtype_to_trt
-from transformers import AutoTokenizer, GPT2Config, LlamaConfig, PretrainedConfig, PreTrainedTokenizer
+from transformers import AutoTokenizer, LlamaConfig, PretrainedConfig, PreTrainedTokenizer
 
 from nemo.export.trt_llm.model_config import (
     LAYERNORM_DEFAULT,
diff --git a/nemo/export/trt_llm/tensorrt_llm_build.py b/nemo/export/trt_llm/tensorrt_llm_build.py
index 0ac8025dba83..e01e6b991b87 100644
--- a/nemo/export/trt_llm/tensorrt_llm_build.py
+++ b/nemo/export/trt_llm/tensorrt_llm_build.py
@@ -18,6 +18,7 @@
 import time
 from pathlib import Path
 from typing import List
+import logging
 
 import tensorrt as trt
 import tensorrt_llm
@@ -32,6 +33,8 @@
 
 MODEL_NAME = "NeMo"
 
+LOGGER = logging.getLogger("NeMo")
+
 
 def get_engine_name(model, dtype, tp_size, pp_size, rank):
     """Returns the engine file name based on the provided info."""
@@ -129,7 +132,7 @@ def build_rank_engine(
             # Use the plugin for the embedding parallelism and sharing
             network.plugin_config.set_lookup_plugin(dtype=args.dtype)
     else:
-        print("Build engine in OOTB mode, disable all plugins except nccl.")
+        LOGGER.warning("Build engine in OOTB mode, disable all plugins except nccl.")
 
     if args.mapping.world_size > 1:
         network.plugin_config.set_nccl_plugin(args.dtype)
@@ -217,10 +220,6 @@ def _build_impl(tensorrt_llm_model, args):
     engine = build_rank_engine(tensorrt_llm_model, builder, builder_config, engine_name, args)
     assert engine is not None, f"Failed to build engine for rank {rank}"
 
-    if args.mapping.rank == 0:
-        # Use in-memory timing cache for multiple builder passes.
-        if not args.parallel_build:
-            timing_cache = builder_config.trt_builder_config.get_timing_cache()
     serialize_engine(engine, args.output_dir / engine_name)
 
     if args.mapping.rank == 0:
diff --git a/nemo/export/trt_llm/tensorrt_llm_utils.py b/nemo/export/trt_llm/tensorrt_llm_utils.py
index 2d02558eac4a..b732daca2525 100644
--- a/nemo/export/trt_llm/tensorrt_llm_utils.py
+++ b/nemo/export/trt_llm/tensorrt_llm_utils.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 
 import tensorrt as trt
 from tensorrt_llm.layers import Embedding, LayerNorm, PromptTuningEmbedding, RmsNorm
@@ -20,6 +21,8 @@
 from nemo.export.trt_llm.model_config import LAYERNORM_DEFAULT, LAYERNORM_RMS, EmbeddingConfig, LayernormConfig
 from nemo.export.trt_llm.tensor_utils import get_tensor_parallel_group
 
+LOGGER = logging.getLogger("NeMo")
+
 
 def build_embedding_from_config(
     config: EmbeddingConfig,
@@ -76,7 +79,7 @@ def print_tensorrt_llm(name: str, tensorrt_llm_module: Module):
         if hasattr(tensorrt_llm_module, tensor_name):
             tensor = getattr(tensorrt_llm_module, tensor_name)
             if tensor is not None:
-                print(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}")
+                LOGGER.info(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}")
 
     for k, v in tensorrt_llm_module.named_children():
         print_tensorrt_llm(f"{name}.{k}({v._get_name()})", v)
diff --git a/nemo/export/trt_llm/utils.py b/nemo/export/trt_llm/utils.py
index afa621cf7dcb..4095a809a456 100644
--- a/nemo/export/trt_llm/utils.py
+++ b/nemo/export/trt_llm/utils.py
@@ -19,8 +19,6 @@
 import tarfile
 import tempfile
 import typing
-from pathlib import Path
-
 import numpy as np
 import torch
 import yaml
diff --git a/scripts/export/export_to_trt_llm.py b/scripts/export/export_to_trt_llm.py
index 725a4caf223f..9798473dd880 100644
--- a/scripts/export/export_to_trt_llm.py
+++ b/scripts/export/export_to_trt_llm.py
@@ -14,20 +14,10 @@
 
 import argparse
 import logging
-import os
 import sys
-from pathlib import Path
 
-from nemo.deploy import DeployPyTriton
 from nemo.export import TensorRTLLM
 
-try:
-    from contextlib import nullcontext
-except ImportError:
-    # handle python < 3.7
-    from contextlib import suppress as nullcontext
-
-
 LOGGER = logging.getLogger("NeMo")
 
 
diff --git a/tests/deploy/test_nemo_deploy.py b/tests/deploy/test_nemo_deploy.py
index 4b2c98911560..39d064e8b489 100644
--- a/tests/deploy/test_nemo_deploy.py
+++ b/tests/deploy/test_nemo_deploy.py
@@ -13,13 +13,9 @@
 # limitations under the License.
 
 import json
-import os
 import shutil
-import time
-import urllib.request as req
 from pathlib import Path
 
-import numpy as np
 import pytest
 import torch
 from tqdm import tqdm
@@ -179,7 +175,6 @@ def run_trt_llm_export(model_name, n_gpu, skip_accuracy=False, use_pytriton=True
             print("Model Accuracy: {0}, Relaxed Model Accuracy: {1}".format(trtllm_accuracy, trtllm_accuracy_relaxed))
             assert trtllm_accuracy_relaxed > 0.5, "Model accuracy is below 0.5"
 
-        trt_llm_exporter = None
         nm.stop()
         shutil.rmtree(model_info["trt_llm_model_dir"])