Address the CodeQL issues

Signed-off-by: Onur Yilmaz <[email protected]>
NVIDIA · Apr 4, 2024 · fb27b4e · fb27b4e
1 parent 648acb6
commit fb27b4e
Show file tree

Hide file tree

Showing 13 changed files with 51 additions and 62 deletions.
diff --git a/nemo/deploy/deploy_base.py b/nemo/deploy/deploy_base.py
@@ -14,35 +14,40 @@
 
 import importlib
 from abc import ABC, abstractmethod
+import logging
 
+use_pytorch_lightning = True
 try:
     from pytorch_lightning import Trainer
 except Exception:
-    pass
+    use_pytorch_lightning = False
 
 from nemo.deploy.triton_deployable import ITritonDeployable
 
 use_nemo = True
 try:
     from nemo.core.classes.modelPT import ModelPT
-except:
+except Exception:
     use_nemo = False
 
 
+LOGGER = logging.getLogger("NeMo")
+
+
 class DeployBase(ABC):
     def __init__(
-        self,
-        triton_model_name: str,
-        triton_model_version: int = 1,
-        checkpoint_path: str = None,
-        model=None,
-        max_batch_size: int = 128,
-        port: int = 8000,
-        address="0.0.0.0",
-        allow_grpc=True,
-        allow_http=True,
-        streaming=False,
-        pytriton_log_verbose=0,
+            self,
+            triton_model_name: str,
+            triton_model_version: int = 1,
+            checkpoint_path: str = None,
+            model=None,
+            max_batch_size: int = 128,
+            port: int = 8000,
+            address="0.0.0.0",
+            allow_grpc=True,
+            allow_http=True,
+            streaming=False,
+            pytriton_log_verbose=0,
     ):
         self.checkpoint_path = checkpoint_path
         self.triton_model_name = triton_model_name
@@ -87,8 +92,8 @@ def _init_nemo_model(self):
             # has to turn off activations_checkpoint_method for inference
             try:
                 self.model.model.language_model.encoder.activations_checkpoint_method = None
-            except AttributeError:
-                pass
+            except AttributeError as e:
+                LOGGER.warning(e)
 
         if self.model is None:
             raise Exception("There is no model to deploy.")
@@ -106,4 +111,4 @@ def _is_model_deployable(self):
     @staticmethod
     def get_module_and_class(target: str):
         ln = target.rindex(".")
-        return target[0:ln], target[ln + 1 : len(target)]
+        return target[0:ln], target[ln + 1: len(target)]
diff --git a/nemo/deploy/deploy_pytriton.py b/nemo/deploy/deploy_pytriton.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 
+use_pytriton = True
 try:
     from pytriton.model_config import ModelConfig
     from pytriton.triton import Triton, TritonConfig
-except:
-    pass
+except Exception:
+    use_pytriton = False
+
 from nemo.deploy.deploy_base import DeployBase
 
 

diff --git a/nemo/deploy/nlp/__init__.py b/nemo/deploy/nlp/__init__.py
@@ -13,7 +13,8 @@
 # limitations under the License.
 
 
+use_query_llm = True
 try:
     from nemo.deploy.nlp.query_llm import NemoQueryLLM
-except:
-    pass
+except Exception:
+    use_query_llm = False
diff --git a/nemo/deploy/nlp/query_llm.py b/nemo/deploy/nlp/query_llm.py
@@ -21,7 +21,7 @@
 use_pytriton = True
 try:
     from pytriton.client import DecoupledModelClient, ModelClient
-except:
+except Exception:
     use_pytriton = False
 
 

diff --git a/nemo/export/__init__.py b/nemo/export/__init__.py
@@ -13,7 +13,13 @@
 # limitations under the License.
 
 
+import logging
+
+LOGGER = logging.getLogger("NeMo")
+
+
+use_TensorRTLLM = True
 try:
     from nemo.export.tensorrt_llm import TensorRTLLM
-except Exception:
-    pass
+except Exception as e:
+    LOGGER.warning("TensorRTLLM could not be imported.")
diff --git a/nemo/export/trt_llm/__init__.py b/nemo/export/trt_llm/__init__.py
@@ -12,11 +12,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import tensorrt_llm
-from mpi4py import MPI
-
-from nemo.export.trt_llm.model_config_trt import *  # noqa
-from nemo.export.trt_llm.nemo_utils import *  # noqa
-from nemo.export.trt_llm.quantization_utils import *  # noqa
-from nemo.export.trt_llm.tensorrt_llm_run import *  # noqa
diff --git a/nemo/export/trt_llm/decoder/llama.py b/nemo/export/trt_llm/decoder/llama.py
@@ -132,8 +132,7 @@ def build_decoder(self, layer):
         config.set_if_not_exist('moe_num_experts', 0)
 
         if layer.moe_num_experts:
-            moe_config = MoeConfig()
-            if not layer.moe_num_experts is None:
+            if layer.moe_num_experts is not None:
                 if layer.moe_top_k is None:
                     layer.moe_top_k = 1
 
@@ -143,10 +142,11 @@ def build_decoder(self, layer):
                 )
                 moe_config = MoeConfig(
                     layer.moe_num_experts, layer.moe_top_k, layer.moe_tp_mode, layer.moe_renorm_mode
-                ).validate()
+                )
+                moe_config.validate()
                 config.moe_num_experts = layer.moe_num_experts
                 config.moe_top_k = layer.moe_top_k
                 config.moe_tp_mode = layer.moe_tp_mode
                 config.moe_normalization_mode = layer.moe_renorm_mode
 
-        return LLaMADecoderLayer(config=config, layer_idx=self.layer_id,)
+        return LLaMADecoderLayer(config=config, layer_idx=self.layer_id, )
diff --git a/nemo/export/trt_llm/nemo_utils.py b/nemo/export/trt_llm/nemo_utils.py
@@ -14,8 +14,6 @@
 
 
 import argparse
-import ast
-import configparser
 import copy
 import csv
 import datetime
@@ -31,7 +29,7 @@
 import numpy as np
 import tensorrt_llm
 from tensorrt_llm import str_dtype_to_trt
-from transformers import AutoTokenizer, GPT2Config, LlamaConfig, PretrainedConfig, PreTrainedTokenizer
+from transformers import AutoTokenizer, LlamaConfig, PretrainedConfig, PreTrainedTokenizer
 
 from nemo.export.trt_llm.model_config import (
     LAYERNORM_DEFAULT,

diff --git a/nemo/export/trt_llm/tensorrt_llm_build.py b/nemo/export/trt_llm/tensorrt_llm_build.py
@@ -18,6 +18,7 @@
 import time
 from pathlib import Path
 from typing import List
+import logging
 
 import tensorrt as trt
 import tensorrt_llm
@@ -32,6 +33,8 @@
 
 MODEL_NAME = "NeMo"
 
+LOGGER = logging.getLogger("NeMo")
+
 
 def get_engine_name(model, dtype, tp_size, pp_size, rank):
     """Returns the engine file name based on the provided info."""
@@ -129,7 +132,7 @@ def build_rank_engine(
             # Use the plugin for the embedding parallelism and sharing
             network.plugin_config.set_lookup_plugin(dtype=args.dtype)
     else:
-        print("Build engine in OOTB mode, disable all plugins except nccl.")
+        LOGGER.warning("Build engine in OOTB mode, disable all plugins except nccl.")
 
     if args.mapping.world_size > 1:
         network.plugin_config.set_nccl_plugin(args.dtype)
@@ -217,10 +220,6 @@ def _build_impl(tensorrt_llm_model, args):
     engine = build_rank_engine(tensorrt_llm_model, builder, builder_config, engine_name, args)
     assert engine is not None, f"Failed to build engine for rank {rank}"
 
-    if args.mapping.rank == 0:
-        # Use in-memory timing cache for multiple builder passes.
-        if not args.parallel_build:
-            timing_cache = builder_config.trt_builder_config.get_timing_cache()
     serialize_engine(engine, args.output_dir / engine_name)
 
     if args.mapping.rank == 0:

diff --git a/nemo/export/trt_llm/tensorrt_llm_utils.py b/nemo/export/trt_llm/tensorrt_llm_utils.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 
 import tensorrt as trt
 from tensorrt_llm.layers import Embedding, LayerNorm, PromptTuningEmbedding, RmsNorm
@@ -20,6 +21,8 @@
 from nemo.export.trt_llm.model_config import LAYERNORM_DEFAULT, LAYERNORM_RMS, EmbeddingConfig, LayernormConfig
 from nemo.export.trt_llm.tensor_utils import get_tensor_parallel_group
 
+LOGGER = logging.getLogger("NeMo")
+
 
 def build_embedding_from_config(
     config: EmbeddingConfig,
@@ -76,7 +79,7 @@ def print_tensorrt_llm(name: str, tensorrt_llm_module: Module):
         if hasattr(tensorrt_llm_module, tensor_name):
             tensor = getattr(tensorrt_llm_module, tensor_name)
             if tensor is not None:
-                print(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}")
+                LOGGER.info(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}")
 
     for k, v in tensorrt_llm_module.named_children():
         print_tensorrt_llm(f"{name}.{k}({v._get_name()})", v)
diff --git a/nemo/export/trt_llm/utils.py b/nemo/export/trt_llm/utils.py
@@ -19,8 +19,6 @@
 import tarfile
 import tempfile
 import typing
-from pathlib import Path
-
 import numpy as np
 import torch
 import yaml

diff --git a/scripts/export/export_to_trt_llm.py b/scripts/export/export_to_trt_llm.py
@@ -14,20 +14,10 @@
 
 import argparse
 import logging
-import os
 import sys
-from pathlib import Path
 
-from nemo.deploy import DeployPyTriton
 from nemo.export import TensorRTLLM
 
-try:
-    from contextlib import nullcontext
-except ImportError:
-    # handle python < 3.7
-    from contextlib import suppress as nullcontext
-
-
 LOGGER = logging.getLogger("NeMo")
 
 

diff --git a/tests/deploy/test_nemo_deploy.py b/tests/deploy/test_nemo_deploy.py
@@ -13,13 +13,9 @@
 # limitations under the License.
 
 import json
-import os
 import shutil
-import time
-import urllib.request as req
 from pathlib import Path
 
-import numpy as np
 import pytest
 import torch
 from tqdm import tqdm
@@ -179,7 +175,6 @@ def run_trt_llm_export(model_name, n_gpu, skip_accuracy=False, use_pytriton=True
             print("Model Accuracy: {0}, Relaxed Model Accuracy: {1}".format(trtllm_accuracy, trtllm_accuracy_relaxed))
             assert trtllm_accuracy_relaxed > 0.5, "Model accuracy is below 0.5"
 
-        trt_llm_exporter = None
         nm.stop()
         shutil.rmtree(model_info["trt_llm_model_dir"])