Skip to content

Commit

Permalink
Address the CodeQL issues
Browse files Browse the repository at this point in the history
Signed-off-by: Onur Yilmaz <[email protected]>
  • Loading branch information
oyilmaz-nvidia committed Apr 4, 2024
1 parent 648acb6 commit fb27b4e
Show file tree
Hide file tree
Showing 13 changed files with 51 additions and 62 deletions.
39 changes: 22 additions & 17 deletions nemo/deploy/deploy_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,40 @@

import importlib
from abc import ABC, abstractmethod
import logging

use_pytorch_lightning = True
try:
from pytorch_lightning import Trainer
except Exception:
pass
use_pytorch_lightning = False

from nemo.deploy.triton_deployable import ITritonDeployable

use_nemo = True
try:
from nemo.core.classes.modelPT import ModelPT
except:
except Exception:
use_nemo = False


LOGGER = logging.getLogger("NeMo")


class DeployBase(ABC):
def __init__(
self,
triton_model_name: str,
triton_model_version: int = 1,
checkpoint_path: str = None,
model=None,
max_batch_size: int = 128,
port: int = 8000,
address="0.0.0.0",
allow_grpc=True,
allow_http=True,
streaming=False,
pytriton_log_verbose=0,
self,
triton_model_name: str,
triton_model_version: int = 1,
checkpoint_path: str = None,
model=None,
max_batch_size: int = 128,
port: int = 8000,
address="0.0.0.0",
allow_grpc=True,
allow_http=True,
streaming=False,
pytriton_log_verbose=0,
):
self.checkpoint_path = checkpoint_path
self.triton_model_name = triton_model_name
Expand Down Expand Up @@ -87,8 +92,8 @@ def _init_nemo_model(self):
# has to turn off activations_checkpoint_method for inference
try:
self.model.model.language_model.encoder.activations_checkpoint_method = None
except AttributeError:
pass
except AttributeError as e:
LOGGER.warning(e)

if self.model is None:
raise Exception("There is no model to deploy.")
Expand All @@ -106,4 +111,4 @@ def _is_model_deployable(self):
@staticmethod
def get_module_and_class(target: str):
ln = target.rindex(".")
return target[0:ln], target[ln + 1 : len(target)]
return target[0:ln], target[ln + 1: len(target)]
6 changes: 4 additions & 2 deletions nemo/deploy/deploy_pytriton.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
# limitations under the License.


use_pytriton = True
try:
from pytriton.model_config import ModelConfig
from pytriton.triton import Triton, TritonConfig
except:
pass
except Exception:
use_pytriton = False

from nemo.deploy.deploy_base import DeployBase


Expand Down
5 changes: 3 additions & 2 deletions nemo/deploy/nlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
# limitations under the License.


use_query_llm = True
try:
from nemo.deploy.nlp.query_llm import NemoQueryLLM
except:
pass
except Exception:
use_query_llm = False
2 changes: 1 addition & 1 deletion nemo/deploy/nlp/query_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
use_pytriton = True
try:
from pytriton.client import DecoupledModelClient, ModelClient
except:
except Exception:
use_pytriton = False


Expand Down
10 changes: 8 additions & 2 deletions nemo/export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@
# limitations under the License.


import logging

LOGGER = logging.getLogger("NeMo")


use_TensorRTLLM = True
try:
from nemo.export.tensorrt_llm import TensorRTLLM
except Exception:
pass
except Exception as e:
LOGGER.warning("TensorRTLLM could not be imported.")
8 changes: 0 additions & 8 deletions nemo/export/trt_llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import tensorrt_llm
from mpi4py import MPI

from nemo.export.trt_llm.model_config_trt import * # noqa
from nemo.export.trt_llm.nemo_utils import * # noqa
from nemo.export.trt_llm.quantization_utils import * # noqa
from nemo.export.trt_llm.tensorrt_llm_run import * # noqa
8 changes: 4 additions & 4 deletions nemo/export/trt_llm/decoder/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,7 @@ def build_decoder(self, layer):
config.set_if_not_exist('moe_num_experts', 0)

if layer.moe_num_experts:
moe_config = MoeConfig()
if not layer.moe_num_experts is None:
if layer.moe_num_experts is not None:
if layer.moe_top_k is None:
layer.moe_top_k = 1

Expand All @@ -143,10 +142,11 @@ def build_decoder(self, layer):
)
moe_config = MoeConfig(
layer.moe_num_experts, layer.moe_top_k, layer.moe_tp_mode, layer.moe_renorm_mode
).validate()
)
moe_config.validate()
config.moe_num_experts = layer.moe_num_experts
config.moe_top_k = layer.moe_top_k
config.moe_tp_mode = layer.moe_tp_mode
config.moe_normalization_mode = layer.moe_renorm_mode

return LLaMADecoderLayer(config=config, layer_idx=self.layer_id,)
return LLaMADecoderLayer(config=config, layer_idx=self.layer_id, )
4 changes: 1 addition & 3 deletions nemo/export/trt_llm/nemo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@


import argparse
import ast
import configparser
import copy
import csv
import datetime
Expand All @@ -31,7 +29,7 @@
import numpy as np
import tensorrt_llm
from tensorrt_llm import str_dtype_to_trt
from transformers import AutoTokenizer, GPT2Config, LlamaConfig, PretrainedConfig, PreTrainedTokenizer
from transformers import AutoTokenizer, LlamaConfig, PretrainedConfig, PreTrainedTokenizer

from nemo.export.trt_llm.model_config import (
LAYERNORM_DEFAULT,
Expand Down
9 changes: 4 additions & 5 deletions nemo/export/trt_llm/tensorrt_llm_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import time
from pathlib import Path
from typing import List
import logging

import tensorrt as trt
import tensorrt_llm
Expand All @@ -32,6 +33,8 @@

MODEL_NAME = "NeMo"

LOGGER = logging.getLogger("NeMo")


def get_engine_name(model, dtype, tp_size, pp_size, rank):
"""Returns the engine file name based on the provided info."""
Expand Down Expand Up @@ -129,7 +132,7 @@ def build_rank_engine(
# Use the plugin for the embedding parallelism and sharing
network.plugin_config.set_lookup_plugin(dtype=args.dtype)
else:
print("Build engine in OOTB mode, disable all plugins except nccl.")
LOGGER.warning("Build engine in OOTB mode, disable all plugins except nccl.")

if args.mapping.world_size > 1:
network.plugin_config.set_nccl_plugin(args.dtype)
Expand Down Expand Up @@ -217,10 +220,6 @@ def _build_impl(tensorrt_llm_model, args):
engine = build_rank_engine(tensorrt_llm_model, builder, builder_config, engine_name, args)
assert engine is not None, f"Failed to build engine for rank {rank}"

if args.mapping.rank == 0:
# Use in-memory timing cache for multiple builder passes.
if not args.parallel_build:
timing_cache = builder_config.trt_builder_config.get_timing_cache()
serialize_engine(engine, args.output_dir / engine_name)

if args.mapping.rank == 0:
Expand Down
5 changes: 4 additions & 1 deletion nemo/export/trt_llm/tensorrt_llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

import tensorrt as trt
from tensorrt_llm.layers import Embedding, LayerNorm, PromptTuningEmbedding, RmsNorm
Expand All @@ -20,6 +21,8 @@
from nemo.export.trt_llm.model_config import LAYERNORM_DEFAULT, LAYERNORM_RMS, EmbeddingConfig, LayernormConfig
from nemo.export.trt_llm.tensor_utils import get_tensor_parallel_group

LOGGER = logging.getLogger("NeMo")


def build_embedding_from_config(
config: EmbeddingConfig,
Expand Down Expand Up @@ -76,7 +79,7 @@ def print_tensorrt_llm(name: str, tensorrt_llm_module: Module):
if hasattr(tensorrt_llm_module, tensor_name):
tensor = getattr(tensorrt_llm_module, tensor_name)
if tensor is not None:
print(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}")
LOGGER.info(f"{name}.{tensor_name}:{tensor._value.dtype}:{tensor._value.shape}:\n{tensor._value}")

for k, v in tensorrt_llm_module.named_children():
print_tensorrt_llm(f"{name}.{k}({v._get_name()})", v)
2 changes: 0 additions & 2 deletions nemo/export/trt_llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import tarfile
import tempfile
import typing
from pathlib import Path

import numpy as np
import torch
import yaml
Expand Down
10 changes: 0 additions & 10 deletions scripts/export/export_to_trt_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,10 @@

import argparse
import logging
import os
import sys
from pathlib import Path

from nemo.deploy import DeployPyTriton
from nemo.export import TensorRTLLM

try:
from contextlib import nullcontext
except ImportError:
# handle python < 3.7
from contextlib import suppress as nullcontext


LOGGER = logging.getLogger("NeMo")


Expand Down
5 changes: 0 additions & 5 deletions tests/deploy/test_nemo_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@
# limitations under the License.

import json
import os
import shutil
import time
import urllib.request as req
from pathlib import Path

import numpy as np
import pytest
import torch
from tqdm import tqdm
Expand Down Expand Up @@ -179,7 +175,6 @@ def run_trt_llm_export(model_name, n_gpu, skip_accuracy=False, use_pytriton=True
print("Model Accuracy: {0}, Relaxed Model Accuracy: {1}".format(trtllm_accuracy, trtllm_accuracy_relaxed))
assert trtllm_accuracy_relaxed > 0.5, "Model accuracy is below 0.5"

trt_llm_exporter = None
nm.stop()
shutil.rmtree(model_info["trt_llm_model_dir"])

Expand Down

0 comments on commit fb27b4e

Please sign in to comment.