Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ci] regular update #2431

Merged
merged 37 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .github/scripts/action_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ def _load_hf_results(test_results: dict, model_name: str):
return out


def evaluate(models: List[str], datasets: List[str], workspace: str):
def evaluate(models: List[str],
datasets: List[str],
workspace: str,
is_smoke: bool = False):
"""Evaluate models from lmdeploy using opencompass.

Args:
Expand Down Expand Up @@ -157,6 +160,10 @@ def evaluate(models: List[str], datasets: List[str], workspace: str):

with open(config_path_new, 'a') as f:
f.write(f'\ndatasets = {datasets}\n')
if is_smoke:
f.write('\nfor d in datasets:\n')
f.write(" if d['reader_cfg'] is not None:\n")
f.write(" d['reader_cfg']['test_range'] = '[0:50]'\n")
if engine_type == 'hf':
f.write(f'\nmodels = [ *{target_model} ]\n')
else:
Expand Down
83 changes: 50 additions & 33 deletions .github/scripts/eval_opencompass_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,68 +6,72 @@

with read_base():
# choose a list of datasets
from .datasets.bbh.bbh_gen_5b92b0 import bbh_datasets # noqa: F401, E501
from .datasets.ceval.ceval_gen_2daf24 import \
from opencompass.configs.datasets.bbh.bbh_gen_5b92b0 import \
bbh_datasets # noqa: F401, E501
from opencompass.configs.datasets.ceval.ceval_gen_2daf24 import \
ceval_datasets # noqa: F401, E501
from .datasets.cmmlu.cmmlu_gen_c13365 import \
from opencompass.configs.datasets.cmmlu.cmmlu_gen_c13365 import \
cmmlu_datasets # noqa: F401, E501
from .datasets.crowspairs.crowspairs_gen_381af0 import \
from opencompass.configs.datasets.crowspairs.crowspairs_gen_381af0 import \
crowspairs_datasets # noqa: F401, E501
from .datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import \
from opencompass.configs.datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import \
GaokaoBench_datasets # noqa: F401, E501
from .datasets.gpqa.gpqa_gen_4baadb import \
from opencompass.configs.datasets.gpqa.gpqa_gen_4baadb import \
gpqa_datasets # noqa: F401, E501
from .datasets.gsm8k.gsm8k_gen_1d7fe4 import \
from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import \
gsm8k_datasets # noqa: F401, E501
from .datasets.hellaswag.hellaswag_10shot_gen_e42710 import \
from opencompass.configs.datasets.hellaswag.hellaswag_10shot_gen_e42710 import \
hellaswag_datasets # noqa: F401, E501
from .datasets.humaneval.humaneval_gen_8e312c import \
from opencompass.configs.datasets.humaneval.humaneval_gen_8e312c import \
humaneval_datasets # noqa: F401, E501
from .datasets.IFEval.IFEval_gen_3321a3 import \
from opencompass.configs.datasets.IFEval.IFEval_gen_3321a3 import \
ifeval_datasets # noqa: F401, E501
from .datasets.math.math_0shot_gen_393424 import \
from opencompass.configs.datasets.math.math_0shot_gen_393424 import \
math_datasets # noqa: F401, E501
from .datasets.mbpp.sanitized_mbpp_gen_a0fc46 import \
from opencompass.configs.datasets.mbpp.sanitized_mbpp_gen_a0fc46 import \
sanitized_mbpp_datasets # noqa: F401, E501
from .datasets.mmlu.mmlu_gen_4d595a import \
from opencompass.configs.datasets.mmlu.mmlu_gen_4d595a import \
mmlu_datasets # noqa: F401, E501
from .datasets.nq.nq_open_1shot_gen_01cf41 import \
from opencompass.configs.datasets.nq.nq_open_1shot_gen_01cf41 import \
nq_datasets # noqa: F401, E501
from .datasets.race.race_gen_69ee4f import \
from opencompass.configs.datasets.race.race_gen_69ee4f import \
race_datasets # noqa: F401, E501
from .datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import \
from opencompass.configs.datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import \
TheoremQA_datasets # noqa: F401, E501
from .datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import \
from opencompass.configs.datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import \
triviaqa_datasets # noqa: F401, E501
from .datasets.winogrande.winogrande_5shot_gen_b36770 import \
from opencompass.configs.datasets.winogrande.winogrande_5shot_gen_b36770 import \
winogrande_datasets # noqa: F401, E501
# read hf models
from .models.baichuan.hf_baichuan2_7b_chat import \
from opencompass.configs.models.baichuan.hf_baichuan2_7b_chat import \
models as hf_baichuan2_chat_7b # noqa: F401, E501
from .models.gemma.hf_gemma_7b_it import \
from opencompass.configs.models.gemma.hf_gemma_7b_it import \
models as hf_gemma_chat_7b # noqa: F401, E501
from .models.hf_internlm.hf_internlm2_chat_7b import \
from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import \
models as hf_internlm2_chat_7b # noqa: F401, E501
from .models.hf_internlm.hf_internlm2_chat_20b import \
from opencompass.configs.models.hf_internlm.hf_internlm2_chat_20b import \
models as hf_internlm2_chat_20b # noqa: F401, E501
from .models.hf_internlm.hf_internlm_chat_7b import \
from opencompass.configs.models.hf_internlm.hf_internlm_chat_7b import \
models as hf_internlm_chat_7b # noqa: F401, E501
from .models.hf_internlm.hf_internlm_chat_20b import \
from opencompass.configs.models.hf_internlm.hf_internlm_chat_20b import \
models as hf_internlm_chat_20b # noqa: F401, E501
from .models.hf_llama.hf_llama2_7b_chat import \
from opencompass.configs.models.hf_llama.hf_llama2_7b_chat import \
models as hf_llama2_chat_7b # noqa: F401, E501
from .models.hf_llama.hf_llama3_8b_instruct import \
from opencompass.configs.models.hf_llama.hf_llama3_8b_instruct import \
models as hf_llama_3_8b_instruct # noqa: F401, E501
from .models.mistral.hf_mistral_7b_instruct_v0_1 import \
from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_1 import \
models as hf_mistral_chat_7b # noqa: F401, E501
from .models.mistral.hf_mixtral_8x7b_instruct_v0_1 import \
from opencompass.configs.models.mistral.hf_mixtral_8x7b_instruct_v0_1 import \
models as hf_mixtral_chat_8x7b # noqa: F401, E501
from .models.qwen.hf_qwen1_5_7b_chat import \
from opencompass.configs.models.qwen.hf_qwen1_5_7b_chat import \
models as hf_qwen1_5_chat_7b # noqa: F401, E501
from .models.qwen.hf_qwen_7b_chat import \
from opencompass.configs.models.qwen.hf_qwen2_7b_instruct import \
models as hf_qwen2_7b_instruct # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen_7b_chat import \
models as hf_qwen_chat_7b # noqa: F401, E501
# and output the results in a chosen format
from .summarizers.medium import summarizer # noqa: F401, E501
from opencompass.configs.summarizers.medium import \
summarizer # noqa: F401, E501

internlm_meta_template = dict(round=[
dict(role='HUMAN', begin='<|User|>:', end='\n'),
Expand Down Expand Up @@ -117,7 +121,7 @@
end='<|im_end|>\n',
generate=True),
],
eos_token_id=151645,
eos_token_id=[151645, 151643],
)

baichuan2_meta_template = dict(round=[
Expand Down Expand Up @@ -202,7 +206,7 @@
qwen_gen_config_template = dict(top_k=1,
top_p=0.8,
temperature=1.0,
stop_words=[151645],
stop_words=[151645, 151643],
max_new_tokens=MAX_NEW_TOKENS)

tokenizer_kwargs_template = dict(padding_side='left',
Expand Down Expand Up @@ -546,6 +550,19 @@
run_cfg=dict(num_gpus=1),
)

pt_qwen2_7b_instruct = dict(type=LmdeployPytorchModel,
abbr='pt_qwen2_7b_instruct',
path='Qwen/Qwen2-7B-Instruct',
engine_config=pt_engine_config_template_max_bs_128,
gen_config=gen_config_template,
max_out_len=MAX_NEW_TOKENS,
max_seq_len=MAX_SESSION_LEN,
batch_size=128,
concurrency=128,
meta_template=qwen1_5_meta_template,
run_cfg=run_cfg_tp1_template,
end_str='<|im_end|>')

tb_qwen2_7b_instruct_4bits = deepcopy(tb_qwen2_7b_instruct)
tb_qwen2_7b_instruct_kvint4 = deepcopy(tb_qwen2_7b_instruct)
tb_qwen2_7b_instruct_kvint8 = deepcopy(tb_qwen2_7b_instruct)
Expand Down
18 changes: 11 additions & 7 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ env:
REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
dependency_pkgs: ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers datasets matplotlib jmespath'}}
FAIL_CONFIG: ${{ github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}

jobs:
linux-build:
Expand Down Expand Up @@ -111,29 +112,32 @@ jobs:
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
python3 -m pip install -e /root/packages/AutoAWQ_kernels
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
python3 -m pip install ${{env.dependency_pkgs}}
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
python3 -m pip install lmdeploy-*.whl
python3 -m pip install triton==2.1.0
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
run: |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl
python3 -m pip install triton==2.1.0
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
pip uninstall -y nvidia-nccl-cu11
python3 -m pip list
lmdeploy check_env
mkdir ${{env.REPORT_DIR}}/allure-results/.pytest_cache -p
ln -s ${{env.REPORT_DIR}}/allure-results/.pytest_cache autotest
- name: Run other benchmark
run: |
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 8 --run_id ${{ github.run_id }} -m gpu_num_1 --lf --alluredir=${{env.REPORT_DIR}}/allure-results || true
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 4 --run_id ${{ github.run_id }} -m gpu_num_2 --lf --alluredir=${{env.REPORT_DIR}}/allure-results || true
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 2 --run_id ${{ github.run_id }} -m gpu_num_4 --lf --alluredir=${{env.REPORT_DIR}}/allure-results
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 8 --run_id ${{ github.run_id }} -m gpu_num_1 ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}}/allure-results || true
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 4 --run_id ${{ github.run_id }} -m gpu_num_2 ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}}/allure-results || true
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 2 --run_id ${{ github.run_id }} -m gpu_num_4 ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}}/allure-results
- name: Clear workfile
if: always()
run: |
Expand Down
Loading
Loading