Skip to content

Commit

Permalink
[ci] add internlm3 into testcase (#3038)
Browse files Browse the repository at this point in the history
* update

* update

* update

* update

* update

* update

* update

* update
  • Loading branch information
zhulinJulia24 authored Jan 16, 2025
1 parent 9fcb3b1 commit 0936305
Show file tree
Hide file tree
Showing 12 changed files with 156 additions and 92 deletions.
41 changes: 26 additions & 15 deletions .github/scripts/eval_chat_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@
models as hf_internlm2_chat_7b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_chat_20b import \
models as hf_internlm2_chat_20b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm_chat_7b import \
models as hf_internlm_chat_7b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm_chat_20b import \
models as hf_internlm_chat_20b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
models as lmdeploy_internlm2_5_7b_chat # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import \
Expand Down Expand Up @@ -131,11 +127,6 @@
MAX_SESSION_LEN = 2048
MAX_NEW_TOKENS = 1024

# ===== Configs for internlm/internlm-chat-7b =====
turbomind_internlm_chat_7b = deepcopy(*lmdeploy_internlm_chat_7b)
turbomind_internlm_chat_7b_4bits = deepcopy(*lmdeploy_internlm_chat_7b)
pytorch_internlm_chat_7b = deepcopy(*lmdeploy_internlm_chat_7b)

# ===== Configs for internlm/internlm2-chat-7b =====
turbomind_internlm2_chat_7b = deepcopy(*lmdeploy_internlm2_chat_7b)
turbomind_internlm2_chat_7b_4bits = deepcopy(*lmdeploy_internlm2_chat_7b)
Expand All @@ -150,6 +141,21 @@
turbomind_internlm2_5_7b_chat_kvint8 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
pytorch_internlm2_5_7b_chat = deepcopy(*lmdeploy_internlm2_5_7b_chat)
pytorch_internlm2_5_7b_chat_w8a8 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm2_5_7b_chat_batch1 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm2_5_7b_chat_batch1_4bits = deepcopy(
*lmdeploy_internlm2_5_7b_chat)

turbomind_internlm3_8b_instruct = deepcopy(*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm3_8b_instruct_4bits = deepcopy(*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm3_8b_instruct_kvint4 = deepcopy(
*lmdeploy_internlm2_5_7b_chat)
turbomind_internlm3_8b_instruct_kvint8 = deepcopy(
*lmdeploy_internlm2_5_7b_chat)
pytorch_internlm3_8b_instruct = deepcopy(*lmdeploy_internlm2_5_7b_chat)
pytorch_internlm3_8b_instruct_w8a8 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
for model in [v for k, v in locals().items() if 'internlm3_8b_instruct' in k]:
model['abbr'] = 'turbomind-internlm3-8b-instruct'
model['path'] = 'internlm/internlm3-8b-instruct'

# ===== Configs for internlm/internlm2_5_20b_chat =====
turbomind_internlm2_5_20b_chat = deepcopy(*lmdeploy_internlm2_5_20b_chat)
Expand Down Expand Up @@ -223,9 +229,9 @@
turbomind_llama2_7b_chat_kvint8 = deepcopy(*lmdeploy_llama2_7b_chat)

for model in [v for k, v in locals().items() if k.startswith('turbomind_')]:
model['engine_config']['max_batch_size'] = 1
model['engine_config']['max_batch_size'] = 512
model['gen_config']['do_sample'] = False
model['batch_size'] = 100
model['batch_size'] = 1000

for model in [v for k, v in locals().items() if k.endswith('_4bits')]:
model['engine_config']['model_format'] = 'awq'
Expand All @@ -247,19 +253,24 @@
for model in [v for k, v in locals().items() if k.startswith('pytorch_')]:
model['abbr'] = model['abbr'].replace('turbomind', 'pytorch')
model['backend'] = 'pytorch'
model['engine_config']['max_batch_size'] = 1
model['engine_config']['max_batch_size'] = 512
model['gen_config']['do_sample'] = False
model['batch_size'] = 100
model['batch_size'] = 1000

for model in [v for k, v in locals().items() if '_batch1' in k]:
model['abbr'] = model['abbr'] + '_batch1'
model['engine_config']['max_batch_size'] = 1
model['batch_size'] = 1

basic_pytorch_chat_tp1 = dict(type=TurboMindModelwithChatTemplate,
engine_config=dict(session_len=MAX_SESSION_LEN,
max_batch_size=1,
max_batch_size=512,
tp=1),
gen_config=dict(do_sample=False,
max_new_tokens=MAX_NEW_TOKENS),
max_out_len=MAX_NEW_TOKENS,
max_seq_len=MAX_SESSION_LEN,
batch_size=100,
batch_size=1000,
run_cfg=dict(num_gpus=1))

# ===== Configs for Qwen/Qwen1.5-MoE-A2.7B-Chat =====
Expand Down
14 changes: 5 additions & 9 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,14 @@ jobs:
timeout-minutes: 150
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /root/modelscope_hub
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
container:
image: openmmlab/lmdeploy:latest-cu11
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/shared:/mnt/shared
- /nvme/qa_test_models/lmdeploy/autotest:/local_case
Expand Down Expand Up @@ -225,16 +223,14 @@ jobs:
function: local_case
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
MODELSCOPE_CACHE: /root/modelscope_hub
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
container:
image: openmmlab/lmdeploy:latest-cu11
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules
- /nvme/github-actions/resources/lora:/root/lora
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/shared:/mnt/shared
Expand Down Expand Up @@ -602,7 +598,7 @@ jobs:
run: |
export LMDEPLOY_DIR=$(pwd)
python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat_w8a8, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama2_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct_w8a8, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct_w8a8, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it]" "[*race_datasets, *gsm8k_datasets, *ifeval_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true
python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_batch1, turbomind_internlm2_5_7b_chat_batch1_4bits, turbomind_internlm3_8b_instruct, pytorch_internlm3_8b_instruct, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_internlm2_chat_20b, pytorch_internlm2_chat_20b, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, turbomind_llama2_7b_chat, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it]" "[*race_datasets, *gsm8k_datasets, *ifeval_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true
- name: Evaluate base models
if: matrix.evaluate_type == 'base'
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/evaluate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ on:
required: true
description: 'Tested TurboMind models list. eg. [internlm_chat_7b,internlm_chat_7b_w8a16]'
type: string
default: '[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama2_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it, turbomind_internlm2_chat_7b_kvint4, turbomind_internlm2_chat_7b_kvint8, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, turbomind_internlm2_5_7b_chat_kvint8, pytorch_internlm2_5_7b_chat_w8a8, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, turbomind_internlm2_5_20b_chat_kvint8, turbomind_qwen1_5_7b_chat_4bits, turbomind_qwen1_5_7b_chat_kvint4, turbomind_qwen1_5_7b_chat_kvint8, turbomind_llama2_7b_chat_4bits, turbomind_llama2_7b_chat_kvint4, turbomind_llama2_7b_chat_kvint8, turbomind_llama3_8b_instruct_4bits, turbomind_llama3_8b_instruct_kvint4, turbomind_llama3_8b_instruct_kvint8, turbomind_llama3_1_8b_instruct_4bits, turbomind_llama3_1_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_kvint8, pytorch_llama3_1_8b_instruct_w8a8, turbomind_qwen2_7b_instruct_4bits, turbomind_qwen2_7b_instruct_kvint8, turbomind_qwen2_5_7b_instruct_4bits, turbomind_qwen2_5_7b_instruct_kvint8, pytorch_qwen2_5_7b_instruct_w8a8]'
default: '[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_batch1, turbomind_internlm2_5_7b_chat_batch1_4bits, turbomind_internlm3_8b_instruct, pytorch_internlm3_8b_instruct, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_internlm2_chat_20b, pytorch_internlm2_chat_20b, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, turbomind_llama2_7b_chat, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it, turbomind_internlm2_chat_7b_4bits, turbomind_internlm2_chat_7b_kvint4, turbomind_internlm2_chat_7b_kvint8, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, turbomind_internlm2_5_7b_chat_kvint8, pytorch_internlm2_5_7b_chat_w8a8, turbomind_internlm3_8b_instruct_4bits, turbomind_internlm3_8b_instruct_kvint4, turbomind_internlm3_8b_instruct_kvint8, pytorch_internlm3_8b_instruct_w8a8, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, turbomind_internlm2_5_20b_chat_kvint8, turbomind_llama3_8b_instruct_4bits, turbomind_llama3_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_4bits, turbomind_llama3_1_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_kvint8,turbomind_llama3_8b_instruct_kvint8, pytorch_llama3_1_8b_instruct_w8a8, turbomind_qwen2_7b_instruct_4bits, turbomind_qwen2_7b_instruct_kvint4, turbomind_qwen2_7b_instruct_kvint8, pytorch_qwen2_7b_instruct_w8a8, turbomind_qwen2_5_7b_instruct_4bits, turbomind_qwen2_5_7b_instruct_kvint4, turbomind_qwen2_5_7b_instruct_kvint8, pytorch_qwen2_5_7b_instruct_w8a8, turbomind_llama2_7b_chat_4bits, turbomind_llama2_7b_chat_kvint4, turbomind_llama2_7b_chat_kvint8]'
chat_datasets:
required: true
description: 'Tested datasets list. eg. [*bbh_datasets,*ceval_datasets,*cmmlu_datasets,*GaokaoBench_datasets,*gpqa_datasets,*gsm8k_datasets,*hellaswag_datasets,*humaneval_datasets,*ifeval_datasets,*math_datasets,*sanitized_mbpp_datasets,*mmlu_datasets,*nq_datasets,*race_datasets,*TheoremQA_datasets,*triviaqa_datasets,*winogrande_datasets,*crowspairs_datasets]'
Expand Down
26 changes: 26 additions & 0 deletions autotest/benchmark/test_throughput_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,29 @@ def test_throughput_func_tp2(config, run_id, run_config, worker_id):
is_smoke=True)

assert result, msg


@pytest.mark.function
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_1
@pytest.mark.pr_test
@pytest.mark.parametrize('run_config', [{
'model': 'meta-llama/Meta-Llama-3-1-8B-Instruct',
'backend': 'pytorch',
'tp_num': 1
}, {
'model': 'meta-llama/Meta-Llama-3-1-8B-Instruct',
'backend': 'turbomind',
'quant_policy': 0,
'tp_num': 1
}])
def test_throughput_prtest_tp1(config, run_id, run_config, worker_id):
result, msg = throughput_test(config,
run_id,
run_config,
cuda_prefix=get_cuda_prefix_by_workerid(
worker_id, tp_num=1),
worker_id=worker_id,
is_smoke=True)

assert result, msg
4 changes: 3 additions & 1 deletion autotest/config-v100.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ benchmark_path: /nvme/qa_test_models/benchmark-reports
dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json

tp_config:
internlm-chat-20b: 2
internlm2-chat-20b: 2
Baichuan2-13B-Chat: 2
Mixtral-8x7B-Instruct-v0.1: 2
Expand All @@ -28,6 +27,7 @@ turbomind_chat_model:
- meta-llama/Meta-Llama-3-1-8B-Instruct-AWQ
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct-inner-4bits
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- internlm/internlm-xcomposer2d5-7b
Expand All @@ -48,6 +48,7 @@ turbomind_chat_model:
pytorch_chat_model:
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3-1-8B-Instruct
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- OpenGVLab/InternVL2-1B
Expand Down Expand Up @@ -122,6 +123,7 @@ turbomind_quatization:

pytorch_quatization:
awq:
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- Qwen/Qwen2-1.5B-Instruct
Expand Down
8 changes: 5 additions & 3 deletions autotest/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ benchmark_path: /nvme/qa_test_models/benchmark-reports
dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json

tp_config:
internlm-chat-20b: 2
internlm2-chat-20b: 2
Baichuan2-13B-Chat: 2
Mixtral-8x7B-Instruct-v0.1: 2
Expand Down Expand Up @@ -34,11 +33,11 @@ turbomind_chat_model:
- meta-llama/Meta-Llama-3-1-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Llama-2-7b-chat-hf
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- internlm/internlm2-chat-20b
- internlm/internlm2-chat-20b-4bits
- internlm/internlm-chat-20b
- internlm/internlm-xcomposer2-4khd-7b
- internlm/internlm-xcomposer2d5-7b
- OpenGVLab/InternVL2_5-1B
Expand Down Expand Up @@ -91,10 +90,10 @@ pytorch_chat_model:
- meta-llama/Llama-3.2-3B-Instruct
- meta-llama/Llama-3.2-11B-Vision-Instruct
- meta-llama/Llama-2-7b-chat-hf
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- internlm/internlm2-chat-20b
- internlm/internlm-chat-20b
- OpenGVLab/InternVL2_5-1B
- OpenGVLab/InternVL2_5-8B
- OpenGVLab/InternVL2_5-26B
Expand Down Expand Up @@ -235,6 +234,7 @@ pytorch_quatization:
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3-1-8B-Instruct
- meta-llama/Llama-2-7b-chat-hf
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- internlm/internlm2-chat-20b
Expand All @@ -251,6 +251,7 @@ pytorch_quatization:
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Llama-3.2-1B-Instruct
- meta-llama/Llama-2-7b-chat-hf
- internlm/internlm3-8b-instruct
- internlm/internlm2-chat-20b
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
Expand Down Expand Up @@ -298,6 +299,7 @@ benchmark_model:
- meta-llama/Meta-Llama-3-1-8B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3-1-70B-Instruct
- internlm/internlm3-8b-instruct
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- THUDM/glm-4-9b-chat
Expand Down
6 changes: 4 additions & 2 deletions autotest/tools/chat/test_command_chat_hf_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def test_hf_pytorch_base_tp2(config, model, cli_case_config, worker_id):
@pytest.mark.hf_pytorch_chat
@pytest.mark.gpu_num_2
@pytest.mark.pr_test
@pytest.mark.parametrize('model', ['internlm/internlm2_5-20b-chat'])
@pytest.mark.parametrize(
'model',
['internlm/internlm2_5-20b-chat', 'mistralai/Mixtral-8x7B-Instruct-v0.1'])
def test_hf_pytorch_chat_pr(config, model, cli_case_config):
usercase = 'chat_testcase'
result, chat_log, msg = hf_command_line_test(
Expand All @@ -146,7 +148,7 @@ def test_hf_pytorch_chat_pr(config, model, cli_case_config):
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_pytorch_chat
@pytest.mark.gpu_num_1
@pytest.mark.parametrize('model', ['Qwen/Qwen-7B-Chat'])
@pytest.mark.parametrize('model', ['Qwen/Qwen2.5-7B-Instruct'])
def test_modelscope_pytorch_chat_tp1(config, model, cli_case_config,
worker_id):
os.environ['LMDEPLOY_USE_MODELSCOPE'] = 'True'
Expand Down
5 changes: 3 additions & 2 deletions autotest/tools/chat/test_command_chat_hf_turbomind.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ def test_hf_turbomind_base_tp2(config, model, cli_case_config, worker_id):
@pytest.mark.pr_test
@pytest.mark.parametrize('model', [
'internlm/internlm2_5-20b-chat',
'internlm/internlm2_5-20b-chat-inner-4bits'
'internlm/internlm2_5-20b-chat-inner-4bits',
'mistralai/Mixtral-8x7B-Instruct-v0.1'
])
def test_hf_turbomind_chat_pr(config, model, cli_case_config):
usercase = 'chat_testcase'
Expand All @@ -154,7 +155,7 @@ def test_hf_turbomind_chat_pr(config, model, cli_case_config):
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_turbomind_chat
@pytest.mark.gpu_num_1
@pytest.mark.parametrize('model', ['Qwen/Qwen-7B-Chat'])
@pytest.mark.parametrize('model', ['Qwen/Qwen2.5-7B-Instruct'])
def test_modelscope_turbomind_chat_tp1(config, model, cli_case_config,
worker_id):
os.environ['LMDEPLOY_USE_MODELSCOPE'] = 'True'
Expand Down
15 changes: 10 additions & 5 deletions autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,25 +250,30 @@ def test_pipeline_chat_kvint8_tp4(config, common_case_config, model,
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_2
@pytest.mark.pr_test
@pytest.mark.parametrize('model', ['internlm/internlm2_5-20b-chat'])
@pytest.mark.parametrize(
'model',
['internlm/internlm2_5-20b-chat', 'mistralai/Mixtral-8x7B-Instruct-v0.1'])
def test_pipeline_chat_pytorch_pr(config, common_case_config, model):
spawn_context = get_context('spawn')
case_config = {
k: v
for k, v in common_case_config.items() if k == 'memory_test'
}
p = spawn_context.Process(target=run_pipeline_chat_test,
args=(config, common_case_config, model,
'pytorch'))
args=(config, case_config, model, 'pytorch'))
p.start()
p.join()

# assert script
assert_pipeline_chat_log(config, common_case_config, model, 'pytorch')
assert_pipeline_chat_log(config, case_config, model, 'pytorch')


@pytest.mark.order(6)
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat_pytorch
@pytest.mark.gpu_num_1
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', ['Qwen/Qwen-7B-Chat'])
@pytest.mark.parametrize('model', ['Qwen/Qwen2.5-7B-Instruct'])
def test_modelscope_pipeline_chat_pytorch_tp1(config, common_case_config,
model, worker_id):
if 'gw' in worker_id:
Expand Down
Loading

0 comments on commit 0936305

Please sign in to comment.