From fb27b4e894ea352b9f010e85bb892b246206af62 Mon Sep 17 00:00:00 2001 From: Onur Yilmaz Date: Thu, 4 Apr 2024 14:06:04 -0400 Subject: [PATCH] Address the CodeQL issues Signed-off-by: Onur Yilmaz --- nemo/deploy/deploy_base.py | 39 +++++++++++++---------- nemo/deploy/deploy_pytriton.py | 6 ++-- nemo/deploy/nlp/__init__.py | 5 +-- nemo/deploy/nlp/query_llm.py | 2 +- nemo/export/__init__.py | 10 ++++-- nemo/export/trt_llm/__init__.py | 8 ----- nemo/export/trt_llm/decoder/llama.py | 8 ++--- nemo/export/trt_llm/nemo_utils.py | 4 +-- nemo/export/trt_llm/tensorrt_llm_build.py | 9 +++--- nemo/export/trt_llm/tensorrt_llm_utils.py | 5 ++- nemo/export/trt_llm/utils.py | 2 -- scripts/export/export_to_trt_llm.py | 10 ------ tests/deploy/test_nemo_deploy.py | 5 --- 13 files changed, 51 insertions(+), 62 deletions(-) diff --git a/nemo/deploy/deploy_base.py b/nemo/deploy/deploy_base.py index 33c5ccfb3cf4..5ad418ae4a74 100644 --- a/nemo/deploy/deploy_base.py +++ b/nemo/deploy/deploy_base.py @@ -14,35 +14,40 @@ import importlib from abc import ABC, abstractmethod +import logging +use_pytorch_lightning = True try: from pytorch_lightning import Trainer except Exception: - pass + use_pytorch_lightning = False from nemo.deploy.triton_deployable import ITritonDeployable use_nemo = True try: from nemo.core.classes.modelPT import ModelPT -except: +except Exception: use_nemo = False +LOGGER = logging.getLogger("NeMo") + + class DeployBase(ABC): def __init__( - self, - triton_model_name: str, - triton_model_version: int = 1, - checkpoint_path: str = None, - model=None, - max_batch_size: int = 128, - port: int = 8000, - address="0.0.0.0", - allow_grpc=True, - allow_http=True, - streaming=False, - pytriton_log_verbose=0, + self, + triton_model_name: str, + triton_model_version: int = 1, + checkpoint_path: str = None, + model=None, + max_batch_size: int = 128, + port: int = 8000, + address="0.0.0.0", + allow_grpc=True, + allow_http=True, + streaming=False, + pytriton_log_verbose=0, ): self.checkpoint_path = checkpoint_path self.triton_model_name = triton_model_name @@ -87,8 +92,8 @@ def _init_nemo_model(self): # has to turn off activations_checkpoint_method for inference try: self.model.model.language_model.encoder.activations_checkpoint_method = None - except AttributeError: - pass + except AttributeError as e: + LOGGER.warning(e) if self.model is None: raise Exception("There is no model to deploy.") @@ -106,4 +111,4 @@ def _is_model_deployable(self): @staticmethod def get_module_and_class(target: str): ln = target.rindex(".") - return target[0:ln], target[ln + 1 : len(target)] + return target[0:ln], target[ln + 1: len(target)] diff --git a/nemo/deploy/deploy_pytriton.py b/nemo/deploy/deploy_pytriton.py index 09621ad2eba5..22dea8ac47cd 100644 --- a/nemo/deploy/deploy_pytriton.py +++ b/nemo/deploy/deploy_pytriton.py @@ -13,11 +13,13 @@ # limitations under the License. +use_pytriton = True try: from pytriton.model_config import ModelConfig from pytriton.triton import Triton, TritonConfig -except: - pass +except Exception: + use_pytriton = False + from nemo.deploy.deploy_base import DeployBase diff --git a/nemo/deploy/nlp/__init__.py b/nemo/deploy/nlp/__init__.py index 24b8c36a9cc0..21e2ca2751f8 100644 --- a/nemo/deploy/nlp/__init__.py +++ b/nemo/deploy/nlp/__init__.py @@ -13,7 +13,8 @@ # limitations under the License. +use_query_llm = True try: from nemo.deploy.nlp.query_llm import NemoQueryLLM -except: - pass +except Exception: + use_query_llm = False diff --git a/nemo/deploy/nlp/query_llm.py b/nemo/deploy/nlp/query_llm.py index 54c5e37142ef..6a4337024eeb 100644 --- a/nemo/deploy/nlp/query_llm.py +++ b/nemo/deploy/nlp/query_llm.py @@ -21,7 +21,7 @@ use_pytriton = True try: from pytriton.client import DecoupledModelClient, ModelClient -except: +except Exception: use_pytriton = False diff --git a/nemo/export/__init__.py b/nemo/export/__init__.py index eb8ebec10441..55712d98852c 100644 --- a/nemo/export/__init__.py +++ b/nemo/export/__init__.py @@ -13,7 +13,13 @@ # limitations under the License. +import logging + +LOGGER = logging.getLogger("NeMo") + + +use_TensorRTLLM = True try: from nemo.export.tensorrt_llm import TensorRTLLM -except Exception: - pass +except Exception as e: + LOGGER.warning("TensorRTLLM could not be imported.") diff --git a/nemo/export/trt_llm/__init__.py b/nemo/export/trt_llm/__init__.py index c7e532029979..9d1c9fb790ae 100644 --- a/nemo/export/trt_llm/__init__.py +++ b/nemo/export/trt_llm/__init__.py @@ -12,11 +12,3 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import tensorrt_llm -from mpi4py import MPI - -from nemo.export.trt_llm.model_config_trt import * # noqa -from nemo.export.trt_llm.nemo_utils import * # noqa -from nemo.export.trt_llm.quantization_utils import * # noqa -from nemo.export.trt_llm.tensorrt_llm_run import * # noqa diff --git a/nemo/export/trt_llm/decoder/llama.py b/nemo/export/trt_llm/decoder/llama.py index e8b3b5a58ebf..0f6951c28ec3 100644 --- a/nemo/export/trt_llm/decoder/llama.py +++ b/nemo/export/trt_llm/decoder/llama.py @@ -132,8 +132,7 @@ def build_decoder(self, layer): config.set_if_not_exist('moe_num_experts', 0) if layer.moe_num_experts: - moe_config = MoeConfig() - if not layer.moe_num_experts is None: + if layer.moe_num_experts is not None: if layer.moe_top_k is None: layer.moe_top_k = 1 @@ -143,10 +142,11 @@ def build_decoder(self, layer): ) moe_config = MoeConfig( layer.moe_num_experts, layer.moe_top_k, layer.moe_tp_mode, layer.moe_renorm_mode - ).validate() + ) + moe_config.validate() config.moe_num_experts = layer.moe_num_experts config.moe_top_k = layer.moe_top_k config.moe_tp_mode = layer.moe_tp_mode config.moe_normalization_mode = layer.moe_renorm_mode - return LLaMADecoderLayer(config=config, layer_idx=self.layer_id,) + return LLaMADecoderLayer(config=config, layer_idx=self.layer_id, ) diff --git a/nemo/export/trt_llm/nemo_utils.py b/nemo/export/trt_llm/nemo_utils.py index 0fffe0d784fd..a82a9aba2c6a 100644 --- a/nemo/export/trt_llm/nemo_utils.py +++ b/nemo/export/trt_llm/nemo_utils.py @@ -14,8 +14,6 @@ import argparse -import ast -import configparser import copy import csv import datetime @@ -31,7 +29,7 @@ import numpy as np import tensorrt_llm from tensorrt_llm import str_dtype_to_trt -from transformers import AutoTokenizer, GPT2Config, LlamaConfig, PretrainedConfig, PreTrainedTokenizer +from transformers import AutoTokenizer, LlamaConfig, PretrainedConfig, PreTrainedTokenizer from nemo.export.trt_llm.model_config import ( LAYERNORM_DEFAULT, diff --git a/nemo/export/trt_llm/tensorrt_llm_build.py b/nemo/export/trt_llm/tensorrt_llm_build.py index 0ac8025dba83..e01e6b991b87 100644 --- a/nemo/export/trt_llm/tensorrt_llm_build.py +++ b/nemo/export/trt_llm/tensorrt_llm_build.py @@ -18,6 +18,7 @@ import time from pathlib import Path from typing import List +import logging import tensorrt as trt import tensorrt_llm @@ -32,6 +33,8 @@ MODEL_NAME = "NeMo" +LOGGER = logging.getLogger("NeMo") + def get_engine_name(model, dtype, tp_size, pp_size, rank): """Returns the engine file name based on the provided info.""" @@ -129,7 +132,7 @@ def build_rank_engine( # Use the plugin for the embedding parallelism and sharing network.plugin_config.set_lookup_plugin(dtype=args.dtype) else: - print("Build engine in OOTB mode, disable all plugins except nccl.") + LOGGER.warning("Build engine in OOTB mode, disable all plugins except nccl.") if args.mapping.world_size > 1: network.plugin_config.set_nccl_plugin(args.dtype) @@ -217,10 +220,6 @@ def _build_impl(tensorrt_llm_model, args): engine = build_rank_engine(tensorrt_llm_model, builder, builder_config, engine_name, args) assert engine is not None, f"Failed to build engine for rank {rank}" - if args.mapping.rank == 0: - # Use in-memory timing cache for multiple builder passes. - if not args.parallel_build: - timing_cache = builder_config.trt_builder_config.get_timing_cache() serialize_engine(engine, args.output_dir / engine_name) if args.mapping.rank == 0: diff --git a/nemo/export/trt_llm/tensorrt_llm_utils.py b/nemo/export/trt_llm/tensorrt_llm_utils.py index 2d02558eac4a..b732daca2525 100644 --- a/nemo/export/trt_llm/tensorrt_llm_utils.py +++ b/nemo/export/trt_llm/tensorrt_llm_utils.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import tensorrt as trt from tensorrt_llm.layers import Embedding, LayerNorm, PromptTuningEmbedding, RmsNorm @@ -20,6 +21,8 @@ from nemo.export.trt_llm.model_config import LAYERNORM_DEFAULT, LAYERNORM_RMS, EmbeddingConfig, LayernormConfig from nemo.export.trt_llm.tensor_utils import get_tensor_parallel_group +LOGGER = logging.getLogger("NeMo") + def build_embedding_from_config( config: EmbeddingConfig, @@ -76,7 +79,7 @@ def print_tensorrt_llm(name: str, tensorrt_llm_module: Module): if hasattr(tensorrt_llm_module, tensor_name): tensor = getattr(tensorrt_llm_module, tensor_name) if tensor is not None: - print(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}") + LOGGER.info(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}") for k, v in tensorrt_llm_module.named_children(): print_tensorrt_llm(f"{name}.{k}({v._get_name()})", v) diff --git a/nemo/export/trt_llm/utils.py b/nemo/export/trt_llm/utils.py index afa621cf7dcb..4095a809a456 100644 --- a/nemo/export/trt_llm/utils.py +++ b/nemo/export/trt_llm/utils.py @@ -19,8 +19,6 @@ import tarfile import tempfile import typing -from pathlib import Path - import numpy as np import torch import yaml diff --git a/scripts/export/export_to_trt_llm.py b/scripts/export/export_to_trt_llm.py index 725a4caf223f..9798473dd880 100644 --- a/scripts/export/export_to_trt_llm.py +++ b/scripts/export/export_to_trt_llm.py @@ -14,20 +14,10 @@ import argparse import logging -import os import sys -from pathlib import Path -from nemo.deploy import DeployPyTriton from nemo.export import TensorRTLLM -try: - from contextlib import nullcontext -except ImportError: - # handle python < 3.7 - from contextlib import suppress as nullcontext - - LOGGER = logging.getLogger("NeMo") diff --git a/tests/deploy/test_nemo_deploy.py b/tests/deploy/test_nemo_deploy.py index 4b2c98911560..39d064e8b489 100644 --- a/tests/deploy/test_nemo_deploy.py +++ b/tests/deploy/test_nemo_deploy.py @@ -13,13 +13,9 @@ # limitations under the License. import json -import os import shutil -import time -import urllib.request as req from pathlib import Path -import numpy as np import pytest import torch from tqdm import tqdm @@ -179,7 +175,6 @@ def run_trt_llm_export(model_name, n_gpu, skip_accuracy=False, use_pytriton=True print("Model Accuracy: {0}, Relaxed Model Accuracy: {1}".format(trtllm_accuracy, trtllm_accuracy_relaxed)) assert trtllm_accuracy_relaxed > 0.5, "Model accuracy is below 0.5" - trt_llm_exporter = None nm.stop() shutil.rmtree(model_info["trt_llm_model_dir"])