diff --git a/lmdeploy/model.py b/lmdeploy/model.py index f7b80ed102..015020778a 100644 --- a/lmdeploy/model.py +++ b/lmdeploy/model.py @@ -443,11 +443,12 @@ def match(cls, model_path: str) -> Optional[str]: model_path (str): the model path used for matching. """ path = model_path.lower() - if all([c not in path for c in ['internlm2', '8k']]) and \ + if all([c not in path for c in ['internlm3', 'internlm2', '8k']]) and \ all([c in path for c in ['internlm', 'chat']]): return 'internlm' +@MODELS.register_module(name='internlm3') @MODELS.register_module(name='internlm2') class InternLM2Chat7B(InternLMChat7B): """Chat template and generation parameters of InternLM2-Chat-7B.""" @@ -490,6 +491,8 @@ def match(cls, model_path: str) -> Optional[str]: path = model_path.lower() if 'internlm2' in path and ('chat' in path or 'math' in path): return 'internlm2' + if 'internlm3' in path and ('instruct' in path): + return 'internlm3' def messages2prompt(self, messages, diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py index 701e4330e2..0f2ea66367 100644 --- a/lmdeploy/serve/async_engine.py +++ b/lmdeploy/serve/async_engine.py @@ -574,7 +574,6 @@ def stream_infer( **kwargs) async def _get_prompt_input(self, - session_id: int, prompt: str, do_preprocess: bool, sequence_start: bool, diff --git a/lmdeploy/turbomind/deploy/source_model/llama.py b/lmdeploy/turbomind/deploy/source_model/llama.py index 0c702d6588..aa7f98b41f 100644 --- a/lmdeploy/turbomind/deploy/source_model/llama.py +++ b/lmdeploy/turbomind/deploy/source_model/llama.py @@ -191,7 +191,7 @@ def model_info(self): return dict( size_per_head=head_dim, - rotary_embedding=hidden_units // attn_head_num, + rotary_embedding=head_dim, num_layer=num_layer, norm_eps=norm_eps, head_num=attn_head_num, diff --git a/lmdeploy/turbomind/supported_models.py b/lmdeploy/turbomind/supported_models.py index 2b9c5156ed..59ddae2114 100644 --- a/lmdeploy/turbomind/supported_models.py +++ b/lmdeploy/turbomind/supported_models.py @@ -13,6 +13,8 @@ InternLMForCausalLM='llama', # internlm2 InternLM2ForCausalLM='internlm2', + # internlm3 + InternLM3ForCausalLM='llama', # llama, llama2, alpaca, vicuna, codellama, ultracm, yi, # deepseek-coder, deepseek-llm LlamaForCausalLM='llama',