Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ci] add internlm3 into testcase #3038

Merged
merged 9 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/scripts/eval_base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,4 @@
for model in [v for k, v in locals().items() if '_batch1' in k]:
model['abbr'] = model['abbr'] + '_batch1'
model['engine_config']['max_batch_size'] = 1
lvhan028 marked this conversation as resolved.
Show resolved Hide resolved
model['batch_size'] = 1
model['batch_size'] = 100
39 changes: 25 additions & 14 deletions .github/scripts/eval_chat_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@
models as hf_internlm2_chat_7b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_chat_20b import \
models as hf_internlm2_chat_20b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm_chat_7b import \
models as hf_internlm_chat_7b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm_chat_20b import \
models as hf_internlm_chat_20b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
models as lmdeploy_internlm2_5_7b_chat # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import \
Expand Down Expand Up @@ -131,11 +127,6 @@
MAX_SESSION_LEN = 2048
MAX_NEW_TOKENS = 1024

# ===== Configs for internlm/internlm-chat-7b =====
turbomind_internlm_chat_7b = deepcopy(*lmdeploy_internlm_chat_7b)
turbomind_internlm_chat_7b_4bits = deepcopy(*lmdeploy_internlm_chat_7b)
pytorch_internlm_chat_7b = deepcopy(*lmdeploy_internlm_chat_7b)

# ===== Configs for internlm/internlm2-chat-7b =====
turbomind_internlm2_chat_7b = deepcopy(*lmdeploy_internlm2_chat_7b)
turbomind_internlm2_chat_7b_4bits = deepcopy(*lmdeploy_internlm2_chat_7b)
Expand All @@ -150,6 +141,21 @@
turbomind_internlm2_5_7b_chat_kvint8 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
pytorch_internlm2_5_7b_chat = deepcopy(*lmdeploy_internlm2_5_7b_chat)
pytorch_internlm2_5_7b_chat_w8a8 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm2_5_7b_chat_batch1 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm2_5_7b_chat_batch1_4bits = deepcopy(
*lmdeploy_internlm2_5_7b_chat)

turbomind_internlm3_8b_instruct = deepcopy(*lmdeploy_internlm2_5_7b_chat)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不应该用 internlm3-8b-instruct 模型么?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不应该用 internlm3-8b-instruct 模型么?

oc还没有internlm3的模版,为了能先验,先复用了,等oc有了我再替换下

turbomind_internlm3_8b_instruct_4bits = deepcopy(*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm3_8b_instruct_kvint4 = deepcopy(
*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm3_8b_instruct_kvint8 = deepcopy(
*lmdeploy_internlm2_5_7b_chat)
pytorch_internlm3_8b_instruct = deepcopy(*lmdeploy_internlm2_5_7b_chat)
pytorch_internlm3_8b_instruct_w8a8 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
for model in [v for k, v in locals().items() if 'internlm3_8b_instruct' in k]:
model['abbr'] = 'turbomind-internlm3-8b-instruct'
model['path'] = 'internlm/internlm3-8b-instruct'

# ===== Configs for internlm/internlm2_5_20b_chat =====
turbomind_internlm2_5_20b_chat = deepcopy(*lmdeploy_internlm2_5_20b_chat)
Expand Down Expand Up @@ -223,9 +229,9 @@
turbomind_llama2_7b_chat_kvint8 = deepcopy(*lmdeploy_llama2_7b_chat)

for model in [v for k, v in locals().items() if k.startswith('turbomind_')]:
model['engine_config']['max_batch_size'] = 1
model['engine_config']['max_batch_size'] = 512
model['gen_config']['do_sample'] = False
model['batch_size'] = 100
model['batch_size'] = 1000

for model in [v for k, v in locals().items() if k.endswith('_4bits')]:
model['engine_config']['model_format'] = 'awq'
Expand All @@ -247,19 +253,24 @@
for model in [v for k, v in locals().items() if k.startswith('pytorch_')]:
model['abbr'] = model['abbr'].replace('turbomind', 'pytorch')
model['backend'] = 'pytorch'
model['engine_config']['max_batch_size'] = 1
model['engine_config']['max_batch_size'] = 512
zhulinJulia24 marked this conversation as resolved.
Show resolved Hide resolved
model['gen_config']['do_sample'] = False
model['batch_size'] = 100

for model in [v for k, v in locals().items() if '_batch1' in k]:
model['abbr'] = model['abbr'] + '_batch1'
model['engine_config']['max_batch_size'] = 1
model['batch_size'] = 100

basic_pytorch_chat_tp1 = dict(type=TurboMindModelwithChatTemplate,
engine_config=dict(session_len=MAX_SESSION_LEN,
max_batch_size=1,
max_batch_size=512,
tp=1),
gen_config=dict(do_sample=False,
max_new_tokens=MAX_NEW_TOKENS),
max_out_len=MAX_NEW_TOKENS,
max_seq_len=MAX_SESSION_LEN,
batch_size=100,
batch_size=1000,
run_cfg=dict(num_gpus=1))

# ===== Configs for Qwen/Qwen1.5-MoE-A2.7B-Chat =====
Expand Down
14 changes: 5 additions & 9 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,14 @@ jobs:
timeout-minutes: 150
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /root/modelscope_hub
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
container:
image: openmmlab/lmdeploy:latest-cu11
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/shared:/mnt/shared
- /nvme/qa_test_models/lmdeploy/autotest:/local_case
Expand Down Expand Up @@ -225,16 +223,14 @@ jobs:
function: local_case
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /root/modelscope_hub
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
container:
image: openmmlab/lmdeploy:latest-cu11
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules
- /nvme/github-actions/resources/lora:/root/lora
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/shared:/mnt/shared
Expand Down Expand Up @@ -602,7 +598,7 @@ jobs:
run: |
export LMDEPLOY_DIR=$(pwd)

python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat_w8a8, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama2_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct_w8a8, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct_w8a8, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it]" "[*race_datasets, *gsm8k_datasets, *ifeval_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true
python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_batch1, turbomind_internlm2_5_7b_chat_batch1_4bits, turbomind_internlm3_8b_instruct, pytorch_internlm3_8b_instruct, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_internlm2_chat_20b, pytorch_internlm2_chat_20b, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, turbomind_llama2_7b_chat, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it]" "[*race_datasets, *gsm8k_datasets, *ifeval_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true
- name: Evaluate base models
if: matrix.evaluate_type == 'base'
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/evaluate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ on:
required: true
description: 'Tested TurboMind models list. eg. [internlm_chat_7b,internlm_chat_7b_w8a16]'
type: string
default: '[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama2_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it, turbomind_internlm2_chat_7b_kvint4, turbomind_internlm2_chat_7b_kvint8, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, turbomind_internlm2_5_7b_chat_kvint8, pytorch_internlm2_5_7b_chat_w8a8, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, turbomind_internlm2_5_20b_chat_kvint8, turbomind_qwen1_5_7b_chat_4bits, turbomind_qwen1_5_7b_chat_kvint4, turbomind_qwen1_5_7b_chat_kvint8, turbomind_llama2_7b_chat_4bits, turbomind_llama2_7b_chat_kvint4, turbomind_llama2_7b_chat_kvint8, turbomind_llama3_8b_instruct_4bits, turbomind_llama3_8b_instruct_kvint4, turbomind_llama3_8b_instruct_kvint8, turbomind_llama3_1_8b_instruct_4bits, turbomind_llama3_1_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_kvint8, pytorch_llama3_1_8b_instruct_w8a8, turbomind_qwen2_7b_instruct_4bits, turbomind_qwen2_7b_instruct_kvint8, turbomind_qwen2_5_7b_instruct_4bits, turbomind_qwen2_5_7b_instruct_kvint8, pytorch_qwen2_5_7b_instruct_w8a8]'
default: '[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_batch1, turbomind_internlm2_5_7b_chat_batch1_4bits, turbomind_internlm3_8b_instruct, pytorch_internlm3_8b_instruct, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_internlm2_chat_20b, pytorch_internlm2_chat_20b, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, turbomind_llama2_7b_chat, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it, turbomind_internlm2_chat_7b_4bits, turbomind_internlm2_chat_7b_kvint4, turbomind_internlm2_chat_7b_kvint8, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, turbomind_internlm2_5_7b_chat_kvint8, pytorch_internlm2_5_7b_chat_w8a8, turbomind_internlm3_8b_instruct_4bits, turbomind_internlm3_8b_instruct_kvint4, turbomind_internlm3_8b_instruct_kvint8, pytorch_internlm3_8b_instruct_w8a8, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, turbomind_internlm2_5_20b_chat_kvint8, turbomind_llama3_8b_instruct_4bits, turbomind_llama3_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_4bits, turbomind_llama3_1_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_kvint8,turbomind_llama3_8b_instruct_kvint8, pytorch_llama3_1_8b_instruct_w8a8, turbomind_qwen2_7b_instruct_4bits, turbomind_qwen2_7b_instruct_kvint4, turbomind_qwen2_7b_instruct_kvint8, pytorch_qwen2_7b_instruct_w8a8, turbomind_qwen2_5_7b_instruct_4bits, turbomind_qwen2_5_7b_instruct_kvint4, turbomind_qwen2_5_7b_instruct_kvint8, pytorch_qwen2_5_7b_instruct_w8a8, turbomind_llama2_7b_chat_4bits, turbomind_llama2_7b_chat_kvint4, turbomind_llama2_7b_chat_kvint8]'
chat_datasets:
required: true
description: 'Tested datasets list. eg. [*bbh_datasets,*ceval_datasets,*cmmlu_datasets,*GaokaoBench_datasets,*gpqa_datasets,*gsm8k_datasets,*hellaswag_datasets,*humaneval_datasets,*ifeval_datasets,*math_datasets,*sanitized_mbpp_datasets,*mmlu_datasets,*nq_datasets,*race_datasets,*TheoremQA_datasets,*triviaqa_datasets,*winogrande_datasets,*crowspairs_datasets]'
Expand Down
26 changes: 26 additions & 0 deletions autotest/benchmark/test_throughput_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,29 @@ def test_throughput_func_tp2(config, run_id, run_config, worker_id):
is_smoke=True)

assert result, msg


@pytest.mark.function
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_1
@pytest.mark.pr_test
@pytest.mark.parametrize('run_config', [{
'model': 'meta-llama/Meta-Llama-3-1-8B-Instruct',
'backend': 'pytorch',
'tp_num': 1
}, {
'model': 'meta-llama/Meta-Llama-3-1-8B-Instruct',
'backend': 'turbomind',
'quant_policy': 0,
'tp_num': 1
}])
def test_throughput_prtest_tp1(config, run_id, run_config, worker_id):
result, msg = throughput_test(config,
run_id,
run_config,
cuda_prefix=get_cuda_prefix_by_workerid(
worker_id, tp_num=1),
worker_id=worker_id,
is_smoke=True)

assert result, msg
4 changes: 3 additions & 1 deletion autotest/config-v100.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ benchmark_path: /nvme/qa_test_models/benchmark-reports
dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json

tp_config:
internlm-chat-20b: 2
internlm2-chat-20b: 2
Baichuan2-13B-Chat: 2
Mixtral-8x7B-Instruct-v0.1: 2
Expand All @@ -28,6 +27,7 @@ turbomind_chat_model:
- meta-llama/Meta-Llama-3-1-8B-Instruct-AWQ
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct-inner-4bits
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- internlm/internlm-xcomposer2d5-7b
Expand All @@ -48,6 +48,7 @@ turbomind_chat_model:
pytorch_chat_model:
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3-1-8B-Instruct
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- OpenGVLab/InternVL2-1B
Expand Down Expand Up @@ -122,6 +123,7 @@ turbomind_quatization:

pytorch_quatization:
awq:
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- Qwen/Qwen2-1.5B-Instruct
Expand Down
8 changes: 5 additions & 3 deletions autotest/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ benchmark_path: /nvme/qa_test_models/benchmark-reports
dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json

tp_config:
internlm-chat-20b: 2
internlm2-chat-20b: 2
Baichuan2-13B-Chat: 2
Mixtral-8x7B-Instruct-v0.1: 2
Expand Down Expand Up @@ -34,11 +33,11 @@ turbomind_chat_model:
- meta-llama/Meta-Llama-3-1-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Llama-2-7b-chat-hf
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- internlm/internlm2-chat-20b
- internlm/internlm2-chat-20b-4bits
- internlm/internlm-chat-20b
- internlm/internlm-xcomposer2-4khd-7b
- internlm/internlm-xcomposer2d5-7b
- OpenGVLab/InternVL2_5-1B
Expand Down Expand Up @@ -91,10 +90,10 @@ pytorch_chat_model:
- meta-llama/Llama-3.2-3B-Instruct
- meta-llama/Llama-3.2-11B-Vision-Instruct
- meta-llama/Llama-2-7b-chat-hf
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- internlm/internlm2-chat-20b
- internlm/internlm-chat-20b
- OpenGVLab/InternVL2_5-1B
- OpenGVLab/InternVL2_5-8B
- OpenGVLab/InternVL2_5-26B
Expand Down Expand Up @@ -235,6 +234,7 @@ pytorch_quatization:
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3-1-8B-Instruct
- meta-llama/Llama-2-7b-chat-hf
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- internlm/internlm2-chat-20b
Expand All @@ -251,6 +251,7 @@ pytorch_quatization:
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Llama-3.2-1B-Instruct
- meta-llama/Llama-2-7b-chat-hf
- internlm/internlm3-8b-instruct
- internlm/internlm2-chat-20b
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
Expand Down Expand Up @@ -298,6 +299,7 @@ benchmark_model:
- meta-llama/Meta-Llama-3-1-8B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3-1-70B-Instruct
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- THUDM/glm-4-9b-chat
Expand Down
6 changes: 4 additions & 2 deletions autotest/tools/chat/test_command_chat_hf_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def test_hf_pytorch_base_tp2(config, model, cli_case_config, worker_id):
@pytest.mark.hf_pytorch_chat
@pytest.mark.gpu_num_2
@pytest.mark.pr_test
@pytest.mark.parametrize('model', ['internlm/internlm2_5-20b-chat'])
@pytest.mark.parametrize(
'model',
['internlm/internlm2_5-20b-chat', 'mistralai/Mixtral-8x7B-Instruct-v0.1'])
def test_hf_pytorch_chat_pr(config, model, cli_case_config):
usercase = 'chat_testcase'
result, chat_log, msg = hf_command_line_test(
Expand All @@ -146,7 +148,7 @@ def test_hf_pytorch_chat_pr(config, model, cli_case_config):
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_pytorch_chat
@pytest.mark.gpu_num_1
@pytest.mark.parametrize('model', ['Qwen/Qwen-7B-Chat'])
@pytest.mark.parametrize('model', ['Qwen/Qwen2.5-7B-Instruct'])
def test_modelscope_pytorch_chat_tp1(config, model, cli_case_config,
worker_id):
os.environ['LMDEPLOY_USE_MODELSCOPE'] = 'True'
Expand Down
5 changes: 3 additions & 2 deletions autotest/tools/chat/test_command_chat_hf_turbomind.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ def test_hf_turbomind_base_tp2(config, model, cli_case_config, worker_id):
@pytest.mark.pr_test
@pytest.mark.parametrize('model', [
'internlm/internlm2_5-20b-chat',
'internlm/internlm2_5-20b-chat-inner-4bits'
'internlm/internlm2_5-20b-chat-inner-4bits',
'mistralai/Mixtral-8x7B-Instruct-v0.1'
])
def test_hf_turbomind_chat_pr(config, model, cli_case_config):
usercase = 'chat_testcase'
Expand All @@ -154,7 +155,7 @@ def test_hf_turbomind_chat_pr(config, model, cli_case_config):
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_turbomind_chat
@pytest.mark.gpu_num_1
@pytest.mark.parametrize('model', ['Qwen/Qwen-7B-Chat'])
@pytest.mark.parametrize('model', ['Qwen/Qwen2.5-7B-Instruct'])
def test_modelscope_turbomind_chat_tp1(config, model, cli_case_config,
worker_id):
os.environ['LMDEPLOY_USE_MODELSCOPE'] = 'True'
Expand Down
15 changes: 10 additions & 5 deletions autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,25 +250,30 @@ def test_pipeline_chat_kvint8_tp4(config, common_case_config, model,
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_2
@pytest.mark.pr_test
@pytest.mark.parametrize('model', ['internlm/internlm2_5-20b-chat'])
@pytest.mark.parametrize(
'model',
['internlm/internlm2_5-20b-chat', 'mistralai/Mixtral-8x7B-Instruct-v0.1'])
def test_pipeline_chat_pytorch_pr(config, common_case_config, model):
spawn_context = get_context('spawn')
case_config = {
k: v
for k, v in common_case_config.items() if k == 'memory_test'
}
p = spawn_context.Process(target=run_pipeline_chat_test,
args=(config, common_case_config, model,
'pytorch'))
args=(config, case_config, model, 'pytorch'))
p.start()
p.join()

# assert script
assert_pipeline_chat_log(config, common_case_config, model, 'pytorch')
assert_pipeline_chat_log(config, case_config, model, 'pytorch')


@pytest.mark.order(6)
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat_pytorch
@pytest.mark.gpu_num_1
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', ['Qwen/Qwen-7B-Chat'])
@pytest.mark.parametrize('model', ['Qwen/Qwen2.5-7B-Instruct'])
def test_modelscope_pipeline_chat_pytorch_tp1(config, common_case_config,
model, worker_id):
if 'gw' in worker_id:
Expand Down
Loading