Skip to content

Commit

Permalink
[ci] regular update (#2431)
Browse files Browse the repository at this point in the history
* update

* update

* update

* update

* update

* update

* update

* update

* update

* updaet

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* updaste

* update
  • Loading branch information
zhulinJulia24 authored Sep 18, 2024
1 parent 1678dc5 commit dd49877
Show file tree
Hide file tree
Showing 17 changed files with 459 additions and 341 deletions.
9 changes: 8 additions & 1 deletion .github/scripts/action_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ def _load_hf_results(test_results: dict, model_name: str):
return out


def evaluate(models: List[str], datasets: List[str], workspace: str):
def evaluate(models: List[str],
datasets: List[str],
workspace: str,
is_smoke: bool = False):
"""Evaluate models from lmdeploy using opencompass.
Args:
Expand Down Expand Up @@ -157,6 +160,10 @@ def evaluate(models: List[str], datasets: List[str], workspace: str):

with open(config_path_new, 'a') as f:
f.write(f'\ndatasets = {datasets}\n')
if is_smoke:
f.write('\nfor d in datasets:\n')
f.write(" if d['reader_cfg'] is not None:\n")
f.write(" d['reader_cfg']['test_range'] = '[0:50]'\n")
if engine_type == 'hf':
f.write(f'\nmodels = [ *{target_model} ]\n')
else:
Expand Down
83 changes: 50 additions & 33 deletions .github/scripts/eval_opencompass_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,68 +6,72 @@

with read_base():
# choose a list of datasets
from .datasets.bbh.bbh_gen_5b92b0 import bbh_datasets # noqa: F401, E501
from .datasets.ceval.ceval_gen_2daf24 import \
from opencompass.configs.datasets.bbh.bbh_gen_5b92b0 import \
bbh_datasets # noqa: F401, E501
from opencompass.configs.datasets.ceval.ceval_gen_2daf24 import \
ceval_datasets # noqa: F401, E501
from .datasets.cmmlu.cmmlu_gen_c13365 import \
from opencompass.configs.datasets.cmmlu.cmmlu_gen_c13365 import \
cmmlu_datasets # noqa: F401, E501
from .datasets.crowspairs.crowspairs_gen_381af0 import \
from opencompass.configs.datasets.crowspairs.crowspairs_gen_381af0 import \
crowspairs_datasets # noqa: F401, E501
from .datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import \
from opencompass.configs.datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import \
GaokaoBench_datasets # noqa: F401, E501
from .datasets.gpqa.gpqa_gen_4baadb import \
from opencompass.configs.datasets.gpqa.gpqa_gen_4baadb import \
gpqa_datasets # noqa: F401, E501
from .datasets.gsm8k.gsm8k_gen_1d7fe4 import \
from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import \
gsm8k_datasets # noqa: F401, E501
from .datasets.hellaswag.hellaswag_10shot_gen_e42710 import \
from opencompass.configs.datasets.hellaswag.hellaswag_10shot_gen_e42710 import \
hellaswag_datasets # noqa: F401, E501
from .datasets.humaneval.humaneval_gen_8e312c import \
from opencompass.configs.datasets.humaneval.humaneval_gen_8e312c import \
humaneval_datasets # noqa: F401, E501
from .datasets.IFEval.IFEval_gen_3321a3 import \
from opencompass.configs.datasets.IFEval.IFEval_gen_3321a3 import \
ifeval_datasets # noqa: F401, E501
from .datasets.math.math_0shot_gen_393424 import \
from opencompass.configs.datasets.math.math_0shot_gen_393424 import \
math_datasets # noqa: F401, E501
from .datasets.mbpp.sanitized_mbpp_gen_a0fc46 import \
from opencompass.configs.datasets.mbpp.sanitized_mbpp_gen_a0fc46 import \
sanitized_mbpp_datasets # noqa: F401, E501
from .datasets.mmlu.mmlu_gen_4d595a import \
from opencompass.configs.datasets.mmlu.mmlu_gen_4d595a import \
mmlu_datasets # noqa: F401, E501
from .datasets.nq.nq_open_1shot_gen_01cf41 import \
from opencompass.configs.datasets.nq.nq_open_1shot_gen_01cf41 import \
nq_datasets # noqa: F401, E501
from .datasets.race.race_gen_69ee4f import \
from opencompass.configs.datasets.race.race_gen_69ee4f import \
race_datasets # noqa: F401, E501
from .datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import \
from opencompass.configs.datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import \
TheoremQA_datasets # noqa: F401, E501
from .datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import \
from opencompass.configs.datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import \
triviaqa_datasets # noqa: F401, E501
from .datasets.winogrande.winogrande_5shot_gen_b36770 import \
from opencompass.configs.datasets.winogrande.winogrande_5shot_gen_b36770 import \
winogrande_datasets # noqa: F401, E501
# read hf models
from .models.baichuan.hf_baichuan2_7b_chat import \
from opencompass.configs.models.baichuan.hf_baichuan2_7b_chat import \
models as hf_baichuan2_chat_7b # noqa: F401, E501
from .models.gemma.hf_gemma_7b_it import \
from opencompass.configs.models.gemma.hf_gemma_7b_it import \
models as hf_gemma_chat_7b # noqa: F401, E501
from .models.hf_internlm.hf_internlm2_chat_7b import \
from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import \
models as hf_internlm2_chat_7b # noqa: F401, E501
from .models.hf_internlm.hf_internlm2_chat_20b import \
from opencompass.configs.models.hf_internlm.hf_internlm2_chat_20b import \
models as hf_internlm2_chat_20b # noqa: F401, E501
from .models.hf_internlm.hf_internlm_chat_7b import \
from opencompass.configs.models.hf_internlm.hf_internlm_chat_7b import \
models as hf_internlm_chat_7b # noqa: F401, E501
from .models.hf_internlm.hf_internlm_chat_20b import \
from opencompass.configs.models.hf_internlm.hf_internlm_chat_20b import \
models as hf_internlm_chat_20b # noqa: F401, E501
from .models.hf_llama.hf_llama2_7b_chat import \
from opencompass.configs.models.hf_llama.hf_llama2_7b_chat import \
models as hf_llama2_chat_7b # noqa: F401, E501
from .models.hf_llama.hf_llama3_8b_instruct import \
from opencompass.configs.models.hf_llama.hf_llama3_8b_instruct import \
models as hf_llama_3_8b_instruct # noqa: F401, E501
from .models.mistral.hf_mistral_7b_instruct_v0_1 import \
from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_1 import \
models as hf_mistral_chat_7b # noqa: F401, E501
from .models.mistral.hf_mixtral_8x7b_instruct_v0_1 import \
from opencompass.configs.models.mistral.hf_mixtral_8x7b_instruct_v0_1 import \
models as hf_mixtral_chat_8x7b # noqa: F401, E501
from .models.qwen.hf_qwen1_5_7b_chat import \
from opencompass.configs.models.qwen.hf_qwen1_5_7b_chat import \
models as hf_qwen1_5_chat_7b # noqa: F401, E501
from .models.qwen.hf_qwen_7b_chat import \
from opencompass.configs.models.qwen.hf_qwen2_7b_instruct import \
models as hf_qwen2_7b_instruct # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen_7b_chat import \
models as hf_qwen_chat_7b # noqa: F401, E501
# and output the results in a chosen format
from .summarizers.medium import summarizer # noqa: F401, E501
from opencompass.configs.summarizers.medium import \
summarizer # noqa: F401, E501

internlm_meta_template = dict(round=[
dict(role='HUMAN', begin='<|User|>:', end='\n'),
Expand Down Expand Up @@ -117,7 +121,7 @@
end='<|im_end|>\n',
generate=True),
],
eos_token_id=151645,
eos_token_id=[151645, 151643],
)

baichuan2_meta_template = dict(round=[
Expand Down Expand Up @@ -202,7 +206,7 @@
qwen_gen_config_template = dict(top_k=1,
top_p=0.8,
temperature=1.0,
stop_words=[151645],
stop_words=[151645, 151643],
max_new_tokens=MAX_NEW_TOKENS)

tokenizer_kwargs_template = dict(padding_side='left',
Expand Down Expand Up @@ -546,6 +550,19 @@
run_cfg=dict(num_gpus=1),
)

pt_qwen2_7b_instruct = dict(type=LmdeployPytorchModel,
abbr='pt_qwen2_7b_instruct',
path='Qwen/Qwen2-7B-Instruct',
engine_config=pt_engine_config_template_max_bs_128,
gen_config=gen_config_template,
max_out_len=MAX_NEW_TOKENS,
max_seq_len=MAX_SESSION_LEN,
batch_size=128,
concurrency=128,
meta_template=qwen1_5_meta_template,
run_cfg=run_cfg_tp1_template,
end_str='<|im_end|>')

tb_qwen2_7b_instruct_4bits = deepcopy(tb_qwen2_7b_instruct)
tb_qwen2_7b_instruct_kvint4 = deepcopy(tb_qwen2_7b_instruct)
tb_qwen2_7b_instruct_kvint8 = deepcopy(tb_qwen2_7b_instruct)
Expand Down
18 changes: 11 additions & 7 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ env:
REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
dependency_pkgs: ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers datasets matplotlib jmespath'}}
FAIL_CONFIG: ${{ github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}

jobs:
linux-build:
Expand Down Expand Up @@ -111,29 +112,32 @@ jobs:
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
python3 -m pip install -e /root/packages/AutoAWQ_kernels
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
python3 -m pip install ${{env.dependency_pkgs}}
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
python3 -m pip install lmdeploy-*.whl
python3 -m pip install triton==2.1.0
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
run: |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl
python3 -m pip install triton==2.1.0
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
pip uninstall -y nvidia-nccl-cu11
python3 -m pip list
lmdeploy check_env
mkdir ${{env.REPORT_DIR}}/allure-results/.pytest_cache -p
ln -s ${{env.REPORT_DIR}}/allure-results/.pytest_cache autotest
- name: Run other benchmark
run: |
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 8 --run_id ${{ github.run_id }} -m gpu_num_1 --lf --alluredir=${{env.REPORT_DIR}}/allure-results || true
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 4 --run_id ${{ github.run_id }} -m gpu_num_2 --lf --alluredir=${{env.REPORT_DIR}}/allure-results || true
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 2 --run_id ${{ github.run_id }} -m gpu_num_4 --lf --alluredir=${{env.REPORT_DIR}}/allure-results
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 8 --run_id ${{ github.run_id }} -m gpu_num_1 ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}}/allure-results || true
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 4 --run_id ${{ github.run_id }} -m gpu_num_2 ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}}/allure-results || true
pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n 2 --run_id ${{ github.run_id }} -m gpu_num_4 ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}}/allure-results
- name: Clear workfile
if: always()
run: |
Expand Down
Loading

0 comments on commit dd49877

Please sign in to comment.