-
Notifications
You must be signed in to change notification settings - Fork 857
200 lines (166 loc) · 6.27 KB
/
test.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
name: Python Tests on M1 Mac
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
unit_test:
runs-on: macos-14
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.12'
- name: Cache huggingface hub models
uses: actions/cache@v3
with:
path: ~/.cache/huggingface/hub
key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }}
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install .
- name: Run tests
run: |
# Check if cached files are present
ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
# Run unit tests
METAL_XCODE=1 python3 -m exo.inference.test_inference_engine
discovery_integration_test:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install .
- name: Run discovery integration test
run: |
# Start first instance
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
PID1=$!
# Start second instance
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
PID2=$!
# Wait for discovery
sleep 10
# Stop both instances
kill $PID1 $PID2
# Check outputs
if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then
echo "Test passed: Both instances discovered each other"
exit 0
else
echo "Test failed: Devices did not discover each other"
echo "Output of first instance:"
cat output1.log
echo "Output of second instance:"
cat output2.log
exit 1
fi
chatgpt_api_integration_test:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Cache huggingface hub models
uses: actions/cache@v3
with:
path: ~/.cache/huggingface/hub
key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }}
restore-keys: |
${{ runner.os }}-huggingface-hub-
- name: Cache tinygrad downloaded models
uses: actions/cache@v3
with:
path: ~/Library/Caches/tinygrad/downloads
key: ${{ runner.os }}-tinygrad-downloads-${{ hashFiles('~/Library/Caches/tinygrad/downloads/**/*') }}-${{ github.job }}
restore-keys: |
${{ runner.os }}-tinygrad-downloads-
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install .
- name: Run chatgpt api integration test
run: |
exit 0 # TODO
# Check if cached files are present
ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true
# Start first instance
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
PID1=$!
# Start second instance
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
PID2=$!
# Wait for discovery
sleep 10
# Function to check if processes are still running
check_processes() {
if ! kill -0 $PID1 2>/dev/null; then
echo "First instance (PID $PID1) died unexpectedly. Log output:"
cat output1.log
exit 1
fi
if ! kill -0 $PID2 2>/dev/null; then
echo "Second instance (PID $PID2) died unexpectedly. Log output:"
cat output2.log
exit 1
fi
}
# Check processes before proceeding
check_processes
# first one to load the model
curl -s http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama-3-8b",
"messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
"temperature": 0.7
}'
# Check processes after model load
check_processes
response_1=$(curl -s http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama-3-8b",
"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
"temperature": 0.7
}')
echo "Response 1: $response_1"
# Check processes after first response
check_processes
response_2=$(curl -s http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama-3-8b",
"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
"temperature": 0.7
}')
echo "Response 2: $response_2"
# Check processes after second response
check_processes
# Stop both instances
kill $PID1 $PID2
echo ""
if ! echo "$response_1" | grep -q "Michael Jackson" || ! echo "$response_2" | grep -q "Michael Jackson"; then
echo "Test failed: Response does not contain 'Michael Jackson'"
echo "Response 1: $response_1"
echo ""
echo "Response 2: $response_2"
echo "Output of first instance:"
cat output1.log
echo "Output of second instance:"
cat output2.log
exit 1
else
echo "Test passed: Response from both nodes contains 'Michael Jackson'"
fi