diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index cae4785cd..0d23aaba9 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -5,12 +5,12 @@ on: workflow_call: inputs: tag_suffix: - description: 'Custom tag suffix for the Docker image' + description: "Custom tag suffix for the Docker image" required: false type: string - default: '' + default: "" is_nightly: - description: 'Whether this is a nightly build' + description: "Whether this is a nightly build" required: false type: boolean default: false @@ -20,7 +20,7 @@ on: type: boolean default: true push: - branches: [ "main" ] + branches: ["main"] pull_request: paths: - ".github/workflows/docker-publish.yml" @@ -42,16 +42,32 @@ jobs: # Multi-architecture build strategy: # - AMD64: Native build on ubuntu-latest (fast) # - ARM64: Cross-compilation on ubuntu-latest (faster than emulation) -# arch: ${{ github.event_name == 'pull_request' && fromJSON('["amd64"]') || fromJSON('["amd64", "arm64"]') }} + # arch: ${{ github.event_name == 'pull_request' && fromJSON('["amd64"]') || fromJSON('["amd64", "arm64"]') }} arch: ["amd64", "arm64"] fail-fast: false steps: + - name: Free up disk space + run: | + echo "Before cleanup:" + df -h + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + echo "After cleanup:" + df -h + - name: Check out the repo uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + with: + driver-opts: | + image=moby/buildkit:latest + network=host - name: Set up QEMU for cross-compilation if: matrix.arch == 'arm64' @@ -172,6 +188,14 @@ jobs: fi fi + - name: Additional cleanup for llm-katan (large Python packages) + if: matrix.image == 'llm-katan' + run: | + echo "Freeing up more space for llm-katan build..." + sudo apt-get clean + sudo rm -rf /var/lib/apt/lists/* + df -h + - name: Build and push ${{ matrix.image }} Docker image id: build uses: docker/build-push-action@v5 @@ -182,10 +206,8 @@ jobs: push: ${{ github.event_name != 'pull_request' }} load: ${{ github.event_name == 'pull_request' }} tags: ${{ steps.tags.outputs.tags }} - cache-from: | - type=gha - type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache,mode=max + cache-from: type=gha + cache-to: type=gha,mode=max build-args: | BUILDKIT_INLINE_CACHE=1 CARGO_BUILD_JOBS=${{ github.event_name == 'pull_request' && '8' || '16' }} diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index 864c31593..d77545f5d 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -64,6 +64,7 @@ jobs: key: ${{ runner.os }}-models-v1-${{ hashFiles('tools/make/models.mk') }} restore-keys: | ${{ runner.os }}-models-v1- + continue-on-error: true # Don't fail the job if caching fails - name: Check go mod tidy run: make check-go-mod-tidy diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8367a1244..41c055b43 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,100 +1,100 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -# Basic hooks for Go, Rust, Python And JavaScript files only -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v6.0.0 - hooks: - - id: trailing-whitespace - files: \.(go|rs|py|js)$ - - id: end-of-file-fixer - files: \.(go|rs|py|js)$ - - id: check-added-large-files - args: ['--maxkb=500'] - files: \.(go|rs|py|js)$ + # Basic hooks for Go, Rust, Python And JavaScript files only + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.2.0 + hooks: + - id: trailing-whitespace + files: \.(go|rs|py|js)$ + - id: end-of-file-fixer + files: \.(go|rs|py|js)$ + - id: check-added-large-files + args: ["--maxkb=500"] + files: \.(go|rs|py|js)$ -# Go specific hooks -- repo: local - hooks: - - id: go-fmt - name: go fmt - entry: gofmt -w - language: system - files: \.go$ + # Go specific hooks + - repo: local + hooks: + - id: go-fmt + name: go fmt + entry: gofmt -w + language: system + files: \.go$ -- repo: local - hooks: - - id: golang-lint - name: go lint - entry: make go-lint - language: system - files: \.go$ - pass_filenames: false + - repo: local + hooks: + - id: golang-lint + name: go lint + entry: make go-lint + language: system + files: \.go$ + pass_filenames: false -# Markdown specific hooks -- repo: local - hooks: - - id: md-fmt - name: md fmt - entry: bash -c "make markdown-lint" - language: system - files: \.md$ - exclude: ^(\node_modules/|CLAUDE\.md) + # Markdown specific hooks + - repo: local + hooks: + - id: md-fmt + name: md fmt + entry: bash -c "make markdown-lint" + language: system + files: \.md$ + exclude: ^(\node_modules/|CLAUDE\.md) -# Yaml specific hooks -- repo: local - hooks: - - id: yaml-and-yml-fmt - name: yaml/yml fmt - entry: bash -c "make markdown-lint" - language: system - files: \.(yaml|yml)$ - exclude: ^(\node_modules/) + # Yaml specific hooks + - repo: local + hooks: + - id: yaml-and-yml-fmt + name: yaml/yml fmt + entry: bash -c "make markdown-lint" + language: system + files: \.(yaml|yml)$ + exclude: ^(\node_modules/) -# JavaScript and TypeScript specific hooks -- repo: local - hooks: - - id: js-ts-lint - name: js/ts lint - entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint' - language: system - files: \.(js|ts|tsx)$ - exclude: ^(\node_modules/) - pass_filenames: false + # JavaScript and TypeScript specific hooks + - repo: local + hooks: + - id: js-ts-lint + name: js/ts lint + entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint' + language: system + files: \.(js|ts|tsx)$ + exclude: ^(\node_modules/) + pass_filenames: false -# Rust specific hooks -- repo: local - hooks: - - id: cargo-fmt - name: cargo fmt - entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt' - language: system - files: \.rs$ - pass_filenames: false - - id: cargo-check - name: cargo check - entry: bash -c 'cd candle-binding && cargo check' - language: system - files: \.rs$ - pass_filenames: false + # Rust specific hooks + - repo: local + hooks: + - id: cargo-fmt + name: cargo fmt + entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt' + language: system + files: \.rs$ + pass_filenames: false + - id: cargo-check + name: cargo check + entry: bash -c 'cd candle-binding && cargo check' + language: system + files: \.rs$ + pass_filenames: false -# Python specific hooks -- repo: https://github.com/psf/black - rev: 25.1.0 - hooks: - - id: black - language_version: python3 - files: \.py$ - exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/) - -- repo: https://github.com/PyCQA/isort - rev: 6.0.1 - hooks: - - id: isort - args: ["--profile", "black"] - files: \.py$ - exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/) + # Python specific hooks + # isort must run before black + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort + args: ["--profile", "black", "--line-length", "88"] + files: \.py$ + exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/) + - repo: https://github.com/psf/black + rev: 25.1.0 + hooks: + - id: black + language_version: python3 + files: \.py$ + exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/) # Commented out flake8 - only reports issues, doesn't auto-fix # - repo: https://github.com/PyCQA/flake8 # rev: 7.3.0 diff --git a/config/config-mcp-classifier-example.yaml b/config/config-mcp-classifier-example.yaml index 22468df69..4d7f6530a 100644 --- a/config/config-mcp-classifier-example.yaml +++ b/config/config-mcp-classifier-example.yaml @@ -14,7 +14,7 @@ # BERT model for semantic caching and tool selection bert_model: - model_id: "sentence-transformers/all-MiniLM-L6-v2" + model_id: models/all-MiniLM-L12-v2 threshold: 0.85 use_cpu: true diff --git a/config/config.development.yaml b/config/config.development.yaml index fa7afdef8..31051e7c4 100644 --- a/config/config.development.yaml +++ b/config/config.development.yaml @@ -3,7 +3,7 @@ # for local development and debugging. bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true diff --git a/config/config.e2e.yaml b/config/config.e2e.yaml index 42167503f..b588849f2 100644 --- a/config/config.e2e.yaml +++ b/config/config.e2e.yaml @@ -1,5 +1,5 @@ bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true semantic_cache: diff --git a/config/config.production.yaml b/config/config.production.yaml index edd049a31..9c4dd4f80 100644 --- a/config/config.production.yaml +++ b/config/config.production.yaml @@ -3,7 +3,7 @@ # for production deployment with Jaeger or other OTLP-compatible backends. bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true diff --git a/config/config.recipe-accuracy.yaml b/config/config.recipe-accuracy.yaml index 18f2751d8..584b02917 100644 --- a/config/config.recipe-accuracy.yaml +++ b/config/config.recipe-accuracy.yaml @@ -13,7 +13,7 @@ # - Jailbreak protection enabled bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.7 # Higher threshold for better precision use_cpu: true diff --git a/config/config.recipe-latency.yaml b/config/config.recipe-latency.yaml index 00b3ae007..ce31a36fd 100644 --- a/config/config.recipe-latency.yaml +++ b/config/config.recipe-latency.yaml @@ -13,7 +13,7 @@ # - Minimal observability overhead bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.4 # Very low threshold for fast matching use_cpu: true diff --git a/config/config.recipe-token-efficiency.yaml b/config/config.recipe-token-efficiency.yaml index b76aeec4d..49008db52 100644 --- a/config/config.recipe-token-efficiency.yaml +++ b/config/config.recipe-token-efficiency.yaml @@ -13,7 +13,7 @@ # - Larger batch sizes for efficient processing bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.5 # Lower threshold for faster matching use_cpu: true diff --git a/config/config.testing.yaml b/config/config.testing.yaml index 9dc59e5cc..91722f564 100644 --- a/config/config.testing.yaml +++ b/config/config.testing.yaml @@ -1,5 +1,5 @@ bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true diff --git a/config/config.yaml b/config/config.yaml index 5ad29d5ac..1e2c43d7f 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,15 +1,15 @@ bert_model: - model_id: sentence-transformers/all-MiniLM-L12-v2 + model_id: models/all-MiniLM-L12-v2 threshold: 0.6 use_cpu: true semantic_cache: enabled: true - backend_type: "memory" # Options: "memory" or "milvus" + backend_type: "memory" # Options: "memory" or "milvus" similarity_threshold: 0.8 - max_entries: 1000 # Only applies to memory backend + max_entries: 1000 # Only applies to memory backend ttl_seconds: 3600 - eviction_policy: "fifo" + eviction_policy: "fifo" tools: enabled: true @@ -32,13 +32,13 @@ prompt_guard: # NOT supported: domain names (example.com), protocol prefixes (http://), paths (/api), ports in address (use 'port' field) vllm_endpoints: - name: "endpoint1" - address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network + address: "172.28.0.20" # Static IPv4 of llm-katan within docker compose network port: 8002 weight: 1 model_config: "qwen3": - reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax + reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax preferred_endpoints: ["endpoint1"] pii_policy: allow_by_default: true @@ -65,7 +65,7 @@ categories: model_scores: - model: qwen3 score: 0.7 - use_reasoning: false # Business performs better without reasoning + use_reasoning: false # Business performs better without reasoning - name: law system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters." model_scores: @@ -89,7 +89,7 @@ categories: model_scores: - model: qwen3 score: 0.6 - use_reasoning: true # Enable reasoning for complex chemistry + use_reasoning: true # Enable reasoning for complex chemistry - name: history system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis." model_scores: @@ -119,13 +119,13 @@ categories: model_scores: - model: qwen3 score: 1.0 - use_reasoning: true # Enable reasoning for complex math + use_reasoning: true # Enable reasoning for complex math - name: physics system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate." model_scores: - model: qwen3 score: 0.7 - use_reasoning: true # Enable reasoning for physics + use_reasoning: true # Enable reasoning for physics - name: computer science system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful." model_scores: @@ -178,23 +178,23 @@ api: detailed_goroutine_tracking: true high_resolution_timing: false sample_rate: 1.0 - duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] + duration_buckets: + [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] # Observability Configuration observability: tracing: - enabled: false # Enable distributed tracing (default: false) - provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry + enabled: false # Enable distributed tracing (default: false) + provider: "opentelemetry" # Provider: opentelemetry, openinference, openllmetry exporter: - type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout - endpoint: "localhost:4317" # OTLP endpoint (when type: otlp) - insecure: true # Use insecure connection (no TLS) + type: "stdout" # Exporter: otlp, jaeger, zipkin, stdout + endpoint: "localhost:4317" # OTLP endpoint (when type: otlp) + insecure: true # Use insecure connection (no TLS) sampling: - type: "always_on" # Sampling: always_on, always_off, probabilistic - rate: 1.0 # Sampling rate for probabilistic (0.0-1.0) + type: "always_on" # Sampling: always_on, always_off, probabilistic + rate: 1.0 # Sampling rate for probabilistic (0.0-1.0) resource: service_name: "vllm-semantic-router" service_version: "v0.1.0" deployment_environment: "development" - diff --git a/e2e-tests/llm-katan/Dockerfile b/e2e-tests/llm-katan/Dockerfile index 9e29080e9..303fc016c 100644 --- a/e2e-tests/llm-katan/Dockerfile +++ b/e2e-tests/llm-katan/Dockerfile @@ -17,7 +17,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Copy requirements first for better layer caching COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements.txt +# Install PyTorch CPU-only version to save space (no CUDA for testing server) +RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \ + pip install --no-cache-dir -r requirements.txt # Copy the llm_katan package COPY llm_katan/ ./llm_katan/ diff --git a/src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py b/src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py index 783de39a5..ba7c0ab6a 100644 --- a/src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py +++ b/src/training/training_lora/classifier_model_fine_tuning_lora/ft_linear_lora.py @@ -69,13 +69,7 @@ import torch import torch.nn as nn from datasets import Dataset, load_dataset -from peft import ( - LoraConfig, - PeftConfig, - PeftModel, - TaskType, - get_peft_model, -) +from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support from sklearn.model_selection import train_test_split from transformers import ( diff --git a/src/training/training_lora/classifier_model_fine_tuning_lora/ft_qwen3_generative_lora.py b/src/training/training_lora/classifier_model_fine_tuning_lora/ft_qwen3_generative_lora.py index 01378b038..147d564fa 100644 --- a/src/training/training_lora/classifier_model_fine_tuning_lora/ft_qwen3_generative_lora.py +++ b/src/training/training_lora/classifier_model_fine_tuning_lora/ft_qwen3_generative_lora.py @@ -53,13 +53,7 @@ import torch from datasets import Dataset, load_dataset -from peft import ( - LoraConfig, - PeftConfig, - PeftModel, - TaskType, - get_peft_model, -) +from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model from sklearn.metrics import accuracy_score, f1_score from sklearn.model_selection import train_test_split from transformers import ( diff --git a/src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py b/src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py index e9147caf0..a48c4d1d3 100644 --- a/src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py +++ b/src/training/training_lora/pii_model_fine_tuning_lora/pii_bert_finetuning_lora.py @@ -70,13 +70,7 @@ import torch import torch.nn as nn from datasets import Dataset, load_dataset -from peft import ( - LoraConfig, - PeftConfig, - PeftModel, - TaskType, - get_peft_model, -) +from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support from sklearn.model_selection import train_test_split from transformers import ( diff --git a/src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py b/src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py index 408792dc0..da5007cd6 100644 --- a/src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py +++ b/src/training/training_lora/prompt_guard_fine_tuning_lora/jailbreak_bert_finetuning_lora.py @@ -77,13 +77,7 @@ import torch import torch.nn as nn from datasets import Dataset, load_dataset -from peft import ( - LoraConfig, - PeftConfig, - PeftModel, - TaskType, - get_peft_model, -) +from peft import LoraConfig, PeftConfig, PeftModel, TaskType, get_peft_model from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support from sklearn.model_selection import train_test_split from transformers import ( diff --git a/tools/make/docker.mk b/tools/make/docker.mk index 975d91f40..9437354a4 100644 --- a/tools/make/docker.mk +++ b/tools/make/docker.mk @@ -130,9 +130,24 @@ docker-compose-rebuild-llm-katan: docker-compose-up-llm-katan docker-compose-down: @$(LOG_TARGET) - @echo "Stopping docker-compose services..." + @echo "Stopping docker-compose services (default includes llm-katan)..." + @docker compose --profile llm-katan down + +docker-compose-down-core: + @$(LOG_TARGET) + @echo "Stopping core services only (no llm-katan)..." @docker compose down +docker-compose-down-testing: + @$(LOG_TARGET) + @echo "Stopping services with testing profile..." + @docker compose --profile testing down + +docker-compose-down-llm-katan: + @$(LOG_TARGET) + @echo "Stopping services with llm-katan profile..." + @docker compose --profile llm-katan down + # Help target for Docker commands docker-help: @echo "Docker Make Targets:" @@ -152,7 +167,10 @@ docker-help: @echo " docker-compose-rebuild - Force rebuild then start" @echo " docker-compose-rebuild-testing - Force rebuild (testing profile)" @echo " docker-compose-rebuild-llm-katan - Force rebuild (llm-katan profile)" - @echo " docker-compose-down - Stop docker-compose services" + @echo " docker-compose-down - Stop services (default includes llm-katan)" + @echo " docker-compose-down-core - Stop core services only (no llm-katan)" + @echo " docker-compose-down-testing - Stop services with testing profile" + @echo " docker-compose-down-llm-katan - Stop services with llm-katan profile" @echo "" @echo "Environment Variables:" @echo " DOCKER_REGISTRY - Docker registry (default: ghcr.io/vllm-project/semantic-router)" diff --git a/tools/make/models.mk b/tools/make/models.mk index 500b8031c..a22828e05 100644 --- a/tools/make/models.mk +++ b/tools/make/models.mk @@ -28,6 +28,9 @@ download-models-minimal: @if [ ! -f "models/Qwen/Qwen3-0.6B/.downloaded" ] || [ ! -d "models/Qwen/Qwen3-0.6B" ]; then \ hf download Qwen/Qwen3-0.6B --local-dir models/Qwen/Qwen3-0.6B && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/Qwen/Qwen3-0.6B/.downloaded; \ fi + @if [ ! -f "models/all-MiniLM-L12-v2/.downloaded" ] || [ ! -d "models/all-MiniLM-L12-v2" ]; then \ + hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir models/all-MiniLM-L12-v2 && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/all-MiniLM-L12-v2/.downloaded; \ + fi @if [ ! -f "models/category_classifier_modernbert-base_model/.downloaded" ] || [ ! -d "models/category_classifier_modernbert-base_model" ]; then \ hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir models/category_classifier_modernbert-base_model && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/category_classifier_modernbert-base_model/.downloaded; \ fi @@ -49,6 +52,9 @@ download-models-full: @if [ ! -f "models/Qwen/Qwen3-0.6B/.downloaded" ] || [ ! -d "models/Qwen/Qwen3-0.6B" ]; then \ hf download Qwen/Qwen3-0.6B --local-dir models/Qwen/Qwen3-0.6B && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/Qwen/Qwen3-0.6B/.downloaded; \ fi + @if [ ! -f "models/all-MiniLM-L12-v2/.downloaded" ] || [ ! -d "models/all-MiniLM-L12-v2" ]; then \ + hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir models/all-MiniLM-L12-v2 && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/all-MiniLM-L12-v2/.downloaded; \ + fi @if [ ! -f "models/category_classifier_modernbert-base_model/.downloaded" ] || [ ! -d "models/category_classifier_modernbert-base_model" ]; then \ hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir models/category_classifier_modernbert-base_model && printf '%s\n' "$$(date -u +%Y-%m-%dT%H:%M:%SZ)" > models/category_classifier_modernbert-base_model/.downloaded; \ fi