smart prompt longest prefix matching to avoid sending the same text t… #128
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Python Tests on M1 Mac | |
on: | |
push: | |
branches: [ main ] | |
pull_request: | |
branches: [ main ] | |
jobs: | |
unit_test: | |
runs-on: macos-14 | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.12' | |
- name: Cache huggingface hub models | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/huggingface/hub | |
key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }} | |
- name: Install dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
pip install . | |
- name: Run tests | |
run: | | |
# Check if cached files are present | |
ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true | |
# Run unit tests | |
METAL_XCODE=1 python3 -m exo.inference.test_inference_engine | |
discovery_integration_test: | |
runs-on: macos-latest | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.x' | |
- name: Install dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
pip install . | |
- name: Run discovery integration test | |
run: | | |
# Start first instance | |
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 & | |
PID1=$! | |
# Start second instance | |
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 & | |
PID2=$! | |
# Wait for discovery | |
sleep 10 | |
# Stop both instances | |
kill $PID1 $PID2 | |
# Check outputs | |
if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then | |
echo "Test passed: Both instances discovered each other" | |
exit 0 | |
else | |
echo "Test failed: Devices did not discover each other" | |
echo "Output of first instance:" | |
cat output1.log | |
echo "Output of second instance:" | |
cat output2.log | |
exit 1 | |
fi | |
chatgpt_api_integration_test: | |
runs-on: macos-latest | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.x' | |
- name: Cache huggingface hub models | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/huggingface/hub | |
key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }} | |
restore-keys: | | |
${{ runner.os }}-huggingface-hub- | |
- name: Cache tinygrad downloaded models | |
uses: actions/cache@v3 | |
with: | |
path: ~/Library/Caches/tinygrad/downloads | |
key: ${{ runner.os }}-tinygrad-downloads-${{ hashFiles('~/Library/Caches/tinygrad/downloads/**/*') }}-${{ github.job }} | |
restore-keys: | | |
${{ runner.os }}-tinygrad-downloads- | |
- name: Install dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
pip install . | |
- name: Run chatgpt api integration test | |
run: | | |
exit 0 # TODO | |
# Check if cached files are present | |
ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true | |
# Start first instance | |
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 & | |
PID1=$! | |
# Start second instance | |
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 & | |
PID2=$! | |
# Wait for discovery | |
sleep 10 | |
# Function to check if processes are still running | |
check_processes() { | |
if ! kill -0 $PID1 2>/dev/null; then | |
echo "First instance (PID $PID1) died unexpectedly. Log output:" | |
cat output1.log | |
exit 1 | |
fi | |
if ! kill -0 $PID2 2>/dev/null; then | |
echo "Second instance (PID $PID2) died unexpectedly. Log output:" | |
cat output2.log | |
exit 1 | |
fi | |
} | |
# Check processes before proceeding | |
check_processes | |
# first one to load the model | |
curl -s http://localhost:8000/v1/chat/completions \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "llama-3-8b", | |
"messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}], | |
"temperature": 0.7 | |
}' | |
# Check processes after model load | |
check_processes | |
response_1=$(curl -s http://localhost:8000/v1/chat/completions \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "llama-3-8b", | |
"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}], | |
"temperature": 0.7 | |
}') | |
echo "Response 1: $response_1" | |
# Check processes after first response | |
check_processes | |
response_2=$(curl -s http://localhost:8000/v1/chat/completions \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "llama-3-8b", | |
"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}], | |
"temperature": 0.7 | |
}') | |
echo "Response 2: $response_2" | |
# Check processes after second response | |
check_processes | |
# Stop both instances | |
kill $PID1 $PID2 | |
echo "" | |
if ! echo "$response_1" | grep -q "Michael Jackson" || ! echo "$response_2" | grep -q "Michael Jackson"; then | |
echo "Test failed: Response does not contain 'Michael Jackson'" | |
echo "Response 1: $response_1" | |
echo "" | |
echo "Response 2: $response_2" | |
echo "Output of first instance:" | |
cat output1.log | |
echo "Output of second instance:" | |
cat output2.log | |
exit 1 | |
else | |
echo "Test passed: Response from both nodes contains 'Michael Jackson'" | |
fi |