From e952e4f38a3eaba61a0dc90d4c95cbcce1681204 Mon Sep 17 00:00:00 2001 From: jinminxi104 Date: Mon, 28 Oct 2024 07:19:42 +0000 Subject: [PATCH] fix supported model list of ascend graph mode --- docs/en/get_started/ascend/get_started.md | 2 +- docs/zh_cn/get_started/ascend/get_started.md | 2 +- .../backends/dlinfer/ascend/graph_runner.py | 30 +++++++++---------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/en/get_started/ascend/get_started.md b/docs/en/get_started/ascend/get_started.md index c737185420..402ac50fbf 100644 --- a/docs/en/get_started/ascend/get_started.md +++ b/docs/en/get_started/ascend/get_started.md @@ -49,7 +49,7 @@ For more information about running the Docker client on Ascend devices, please r ## Offline batch inference > \[!TIP\] -> Graph mode has been supported on Atlas 800T A2. Currently, InternLM2-7B/LLaMa2-7B/Qwen2-7B are tested on graph mode. +> Graph mode has been supported on Atlas 800T A2. Currently, LLaMa3-8B/LLaMa2-7B/Qwen2-7B are tested on graph mode. > Users can set `eager_mode=False` to enable graph mode, or, set `eager_mode=True` to disable graph mode. > (Please source `/usr/local/Ascend/nnal/atb/set_env.sh` before enabling graph mode) diff --git a/docs/zh_cn/get_started/ascend/get_started.md b/docs/zh_cn/get_started/ascend/get_started.md index cde7409a36..78bd8383d4 100644 --- a/docs/zh_cn/get_started/ascend/get_started.md +++ b/docs/zh_cn/get_started/ascend/get_started.md @@ -49,7 +49,7 @@ docker run -e ASCEND_VISIBLE_DEVICES=0 --rm --name lmdeploy -t lmdeploy-aarch64- ## 离线批处理 > \[!TIP\] -> 图模式已经支持了Atlas 800T A2。目前,单卡下的InternLM2-7B/LLaMa2-7B/Qwen2-7B已经通过测试。用户可以设定`eager_mode=False`来开启图模式,或者设定`eager_mode=True`来关闭图模式。(启动图模式需要事先source `/usr/local/Ascend/nnal/atb/set_env.sh`) +> 图模式已经支持了Atlas 800T A2。目前,单卡下的LLaMa3-8B/LLaMa2-7B/Qwen2-7B已经通过测试。用户可以设定`eager_mode=False`来开启图模式,或者设定`eager_mode=True`来关闭图模式。(启动图模式需要事先source `/usr/local/Ascend/nnal/atb/set_env.sh`) ### LLM 推理 diff --git a/lmdeploy/pytorch/backends/dlinfer/ascend/graph_runner.py b/lmdeploy/pytorch/backends/dlinfer/ascend/graph_runner.py index 3ecc4223bd..7dbb86d4b6 100644 --- a/lmdeploy/pytorch/backends/dlinfer/ascend/graph_runner.py +++ b/lmdeploy/pytorch/backends/dlinfer/ascend/graph_runner.py @@ -22,6 +22,7 @@ def __init__(self, model: torch.nn.Module, model_config: ModelConfig, super().__init__(model, model_config, cache_config, backend_config, device) + self.supported_model = ['Llama3-8B', 'Llama2-7B', 'Qwen2-7B'] self.enable_graph = self.check_enable_graph() if self.enable_graph: import dlinfer.graph @@ -44,21 +45,20 @@ def check_enable_graph(self): "Graph mode of device_type 'ascend' only supports tp=1 " 'for now, fallback to eager mode', RuntimeWarning) return False - # model support - self.supported_model = { - 'Llama2': 'LlamaConfig', - 'InternLM2': 'InternLM2Config', - 'Qwen2': 'Qwen2Config', - } - is_model_support = True - model_config_name = str(type(self.model_config.hf_config).__name__) - if model_config_name not in self.supported_model.values(): - is_model_support = False - if not is_model_support: - warnings.warn( - "Graph mode of device_type 'ascend' only supports models: " - f"{', '.join(self.supported_model.keys())} when tp=1 for now", - RuntimeWarning) + + warnings.warn( + '\n\n' + '**********************************************************\n' + ' The following models were tested in graph mode of\n' + " device_type 'ascend' when tp=1:\n" + f" {', '.join(self.supported_model)}\n" + ' Other LLaMa-like models may work in graph mode, please\n' + ' check the result yourself!\n' + ' If graph mode does not work correctly with your model,\n' + ' please use eager mode instead.\n' + '**********************************************************\n\n', + RuntimeWarning) + return True def patch_kernels_custom_op(self):