Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions .github/workflows/integration-test-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ on:
branches:
- main
paths-ignore:
- 'website/**'
- '**/*.md'
- "website/**"
- "**/*.md"
push:
branches:
- main
paths-ignore:
- 'website/**'
- '**/*.md'
- "website/**"
- "**/*.md"
workflow_dispatch: # Allow manual triggering

concurrency:
Expand All @@ -24,7 +24,7 @@ jobs:
test-ci-compose:
if: github.repository == 'vllm-project/semantic-router' && !github.event.pull_request.draft
runs-on: ubuntu-latest
timeout-minutes: 20 # Reduced from 30 - CI compose is faster
timeout-minutes: 20 # Reduced from 30 - CI compose is faster

steps:
- name: Check out the repo
Expand All @@ -36,11 +36,11 @@ jobs:
# This helps prevent "no space left on device" errors
echo "Disk space before setup:"
df -h / && df -h /mnt

# Create /mnt/models directory if it doesn't exist
sudo mkdir -p /mnt/models
sudo chown -R $USER:$USER /mnt/models

# If models directory already exists in workspace, move it to /mnt
if [ -d "models" ] && [ ! -L "models" ]; then
echo "Moving existing models directory to /mnt/models..."
Expand All @@ -53,7 +53,7 @@ jobs:
sudo mv models /mnt/models
fi
fi

# Create symlink from models/ to /mnt/models/ so existing code continues to work
if [ ! -e "models" ]; then
ln -s /mnt/models models
Expand All @@ -63,21 +63,21 @@ jobs:
else
echo "Warning: models exists but is not a symlink"
fi

echo "Disk space after setup:"
df -h / && df -h /mnt
echo "Models directory setup complete. Models will be stored in /mnt/models"

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: "3.11"

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y make curl
Comment thread
JaredforReal marked this conversation as resolved.
pip install huggingface_hub[cli]
pip install -r src/model_manager/requirements.txt

- name: Download models
run: |
Expand All @@ -86,6 +86,7 @@ jobs:
env:
CI: true
CI_MINIMAL_MODELS: true
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_HUB_ENABLE_HF_TRANSFER: 1
HF_HUB_DISABLE_TELEMETRY: 1

Expand All @@ -101,7 +102,7 @@ jobs:
echo "Waiting for services to be healthy..."
max_attempts=60
attempt=1

while [ $attempt -le $max_attempts ]; do
echo "Attempt $attempt/$max_attempts: Checking service health..."

Expand All @@ -128,7 +129,7 @@ jobs:
sleep 5
((attempt++))
done

echo "❌ Timeout waiting for services to be healthy"
docker ps -a
exit 1
Expand Down Expand Up @@ -173,7 +174,7 @@ jobs:
}')

echo "Response: $response"

# Verify we got a response
if echo "$response" | grep -q "choices"; then
echo "✅ Chat completions test passed"
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/performance-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,14 @@ jobs:
- name: Build Rust library (CPU-only)
run: make rust-ci

- name: Install HuggingFace CLI
- name: Install Model Manager dependencies
Comment thread
JaredforReal marked this conversation as resolved.
run: |
pip install -U "huggingface_hub[cli]" hf_transfer
pip install -r src/model_manager/requirements.txt

- name: Download models (minimal set for nightly)
env:
CI_MINIMAL_MODELS: true
CI_MINIMAL_MODELS: false
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_HUB_ENABLE_HF_TRANSFER: 1
HF_HUB_DISABLE_TELEMETRY: 1
run: make download-models
Expand Down
21 changes: 12 additions & 9 deletions .github/workflows/performance-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ on:
branches:
- main
paths:
- 'src/semantic-router/**'
- 'candle-binding/**'
- 'perf/**'
- '.github/workflows/performance-test.yml'
- "src/semantic-router/**"
- "candle-binding/**"
- "perf/**"
- ".github/workflows/performance-test.yml"
workflow_dispatch:

permissions:
contents: read
pull-requests: write # Required to comment on PRs
issues: write # Required to comment on PRs (PRs are issues)
pull-requests: write # Required to comment on PRs
issues: write # Required to comment on PRs (PRs are issues)

jobs:
component-benchmarks:
Expand All @@ -24,6 +24,8 @@ jobs:
steps:
- name: Check out the repo
uses: actions/checkout@v4
with:
fetch-depth: 0 # Need full history for baseline comparison

- name: Set up Go
uses: actions/setup-go@v5
Expand Down Expand Up @@ -70,13 +72,14 @@ jobs:
- name: Build Rust library (CPU-only)
run: make rust-ci

- name: Install HuggingFace CLI
- name: Install Model Manager dependencies
Comment thread
JaredforReal marked this conversation as resolved.
run: |
pip install -U "huggingface_hub[cli]" hf_transfer
pip install -r src/model_manager/requirements.txt

- name: Download models (minimal)
env:
CI_MINIMAL_MODELS: true
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_HUB_ENABLE_HF_TRANSFER: 1
HF_HUB_DISABLE_TELEMETRY: 1
run: make download-models
Expand Down Expand Up @@ -117,7 +120,7 @@ jobs:

- name: Comment PR with results
if: github.event_name == 'pull_request'
continue-on-error: true # May fail for PRs from forks due to GitHub security restrictions
continue-on-error: true # May fail for PRs from forks due to GitHub security restrictions
uses: actions/github-script@v7
with:
script: |
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/test-and-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ jobs:
|| needs.changes.outputs.make == 'true'
|| needs.changes.outputs.ci == 'true') }}
runs-on: ubuntu-latest

steps:
- name: Check out the repo
uses: actions/checkout@v4
Expand Down Expand Up @@ -86,11 +85,11 @@ jobs:
# This helps prevent "no space left on device" errors
echo "Disk space before setup:"
df -h / && df -h /mnt

# Create /mnt/models directory if it doesn't exist
sudo mkdir -p /mnt/models
sudo chown -R $USER:$USER /mnt/models

# If models directory already exists in workspace, move it to /mnt
if [ -d "models" ] && [ ! -L "models" ]; then
echo "Moving existing models directory to /mnt/models..."
Expand All @@ -103,7 +102,7 @@ jobs:
sudo mv models /mnt/models
fi
fi

# Create symlink from models/ to /mnt/models/ so existing code continues to work
if [ ! -e "models" ]; then
ln -s /mnt/models models
Expand All @@ -113,7 +112,7 @@ jobs:
else
echo "Warning: models exists but is not a symlink"
fi

echo "Disk space after setup:"
df -h / && df -h /mnt
echo "Models directory setup complete. Models will be stored in /mnt/models"
Expand All @@ -134,13 +133,14 @@ jobs:
- name: Build Rust library (CPU-only, no CUDA)
run: make rust-ci

- name: Install HuggingFace CLI
- name: Install Model Manager dependencies
run: |
pip install -U "huggingface_hub[cli]" hf_transfer
pip install -r src/model_manager/requirements.txt

- name: Download models (minimal on PRs)
env:
CI_MINIMAL_MODELS: ${{ github.event_name == 'pull_request' }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_HUB_ENABLE_HF_TRANSFER: 1
HF_HUB_DISABLE_TELEMETRY: 1
run: make download-models
Expand Down Expand Up @@ -169,7 +169,7 @@ jobs:

echo "Milvus is ready at localhost:19530"
docker ps --filter "name=milvus-semantic-cache"

- name: Start Redis service
run: |
echo "Starting Redis Stack..."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This file contains only LoRA adapter models for incremental CI downloads.
#
# Usage:
# python -m model_manager --config config/models.lora.yaml
# python -m model_manager --config config/model_manager/models.lora.yaml
#
# Equivalent to: make download-models-lora

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
# Usage:
# CI_MINIMAL_MODELS=true python -m model_manager
# # or explicitly:
# python -m model_manager --config config/models.minimal.yaml
# python -m model_manager --config config/model_manager/models.minimal.yaml
#
# Equivalent to: make download-models-minimal
# or CI_MINIMAL_MODELS=true make download-models
#
# Note: embeddinggemma-300m is gated and requires HF_TOKEN, so it's excluded.
# Note: This is the minimal set for fast CI runs. Larger models like
# embeddinggemma-300m are in models.yaml (full set) for local development.

cache_dir: "models"
verify: "size" # Use size for faster CI runs
Expand Down
2 changes: 1 addition & 1 deletion config/models.yaml → config/model_manager/models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Usage:
# python -m model_manager
# python -m model_manager --config config/models.yaml
# python -m model_manager --config config/model_manager/models.yaml
#
# Includes additional LoRA variants (roberta, modernbert) and gated models not in minimal set.
#
Expand Down
Loading
Loading