-
Notifications
You must be signed in to change notification settings - Fork 0
107 lines (97 loc) · 3.48 KB
/
test-branch.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
name: Test - Models
on:
push:
branches:
- 'CI-CD/bach'
workflow_dispatch:
inputs:
model_id:
description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708'
required: true
default: homebrewltd/llama3-s-2024-07-08
type: string
dataset_id:
description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test'
required: true
default: jan-hq/instruction-speech-conversation-test
type: string
extra_args:
description: 'Extra arguments for python command, for example:--mode audio --num_rows 5'
required: false
default: "--mode audio --num_rows 5"
type: string
run_benchmark:
description: 'Run benchmark test'
required: false
default: true
type: boolean
run_si_benchmark:
description: 'Run SI benchmark'
required: false
default: true
type: boolean
run_asr_benchmark:
description: 'Run ASR benchmark'
required: false
default: true
type: boolean
jobs:
run-test:
runs-on: research
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: 'recursive'
- name: Install dependencies
working-directory: ./tests
run: |
python3 -m pip install --upgrade pip
pip3 install -r requirements.txt
- name: Run tests
working-directory: ./tests
run: |
python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'jan-hq/Jan-Llama3-0708' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }}
- name: Install benchmark dependencies
if: ${{ github.event.inputs.run_benchmark == 'true' }}
run: |
cd lm-evaluation-harness
pip3 install -e .
pip3 install lm_eval[vllm]
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Run benchmark
if: ${{ github.event.inputs.run_benchmark == 'true' }}
run: |
cd lm-evaluation-harness
chmod +x ./run_benchmark.sh
./run_benchmark.sh ${{ github.event.inputs.model_id }}
- name: Upload benchmark results
if: ${{ github.event.inputs.run_benchmark == 'true' }}
uses: actions/upload-artifact@v2
with:
name: benchmark-results
path: ./lm-evaluation-harness/benchmark_results/**/*.json
- name: Eval on Speech Instruction Benchmark
if: ${{ github.event.inputs.run_si_benchmark == 'true' }}
env:
AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
run: |
cd AudioBench
pip3 install -r requirements.txt
chmod +x eval_si.sh
./eval_si.sh ${{ github.event.inputs.model_id }}
- name: Eval on ASR Benchmark
if: ${{ github.event.inputs.run_asr_benchmark == 'true' }}
env:
AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
run: |
cd AudioBench
pip3 install -r requirements.txt
chmod +x eval_asr.sh
./eval_asr.sh ${{ github.event.inputs.model_id }}
- name: Upload audio results
if: ${{ github.event.inputs.run_benchmark == 'true' }}
uses: actions/upload-artifact@v2
with:
name: audio-benchmark-results
path: ./AudioBench/benchmark_results/log/**/*.json