Skip to content

Commit 2382a5f

Browse files
authored
Merge pull request #6492 from hiyouga/hiyouga/add_deepseek3
[model] add deepseek3 model
2 parents 91467ed + e67b9dc commit 2382a5f

File tree

4 files changed

+32
-2
lines changed

4 files changed

+32
-2
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
191191
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
192192
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
193193
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
194+
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/685B | deepseek3 |
194195
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
195196
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
196197
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |

README_zh.md

+1
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
192192
| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 |
193193
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
194194
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
195+
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/685B | deepseek3 |
195196
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
196197
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
197198
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |

src/llamafactory/data/template.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -619,9 +619,8 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args:
619619

620620

621621
_register_template(
622-
name="deepseek_v2.5",
622+
name="deepseek3",
623623
format_user=StringFormatter(slots=["<|User|>{{content}}<|Assistant|>"]),
624-
format_system=StringFormatter(slots=["{{content}}"]),
625624
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
626625
)
627626

src/llamafactory/extras/constants.py

+29
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,7 @@ def register_model_group(
448448
},
449449
"DeepSeek-Coder-7B-Base": {
450450
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-base-v1.5",
451+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-7b-base-v1.5",
451452
},
452453
"DeepSeek-Coder-33B-Base": {
453454
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base",
@@ -459,6 +460,7 @@ def register_model_group(
459460
},
460461
"DeepSeek-Coder-7B-Instruct": {
461462
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
463+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
462464
},
463465
"DeepSeek-Coder-33B-Instruct": {
464466
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct",
@@ -469,6 +471,33 @@ def register_model_group(
469471
)
470472

471473

474+
register_model_group(
475+
models={
476+
"DeepSeek-V2-236B-Chat-0628": {
477+
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat-0628",
478+
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat-0628",
479+
},
480+
"DeepSeek-V2.5-236B-Chat": {
481+
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2.5",
482+
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2.5",
483+
},
484+
"DeepSeek-V2.5-236B-Chat-1210": {
485+
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2.5-1210",
486+
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2.5-1210",
487+
},
488+
"DeepSeek-V3-685B-Base": {
489+
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3-Base",
490+
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3-Base",
491+
},
492+
"DeepSeek-V3-685B-Chat": {
493+
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V3",
494+
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V3",
495+
},
496+
},
497+
template="deepseek3",
498+
)
499+
500+
472501
register_model_group(
473502
models={
474503
"EXAONE-3.0-7.8B-Instruct": {

0 commit comments

Comments
 (0)