Quentin-M · Quentin-M · Feb 16, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/.dockerignore b/.dockerignore
@@ -48,7 +48,7 @@ dist/
 build/
 *.egg-info/
 .DS_Store
-node_modules/
+**/node_modules
 *.log
 .env
 .env.local
diff --git a/.github/actions/helm-oci-chart-releaser/action.yml b/.github/actions/helm-oci-chart-releaser/action.yml
@@ -40,38 +40,33 @@ outputs:
 runs:
   using: composite
   steps:
+    - name: Helm | Setup
+      uses: azure/setup-helm@v4
+      with:
+        version: v3.20.0
+
     - name: Helm | Login
       shell: bash
       run: echo ${{ inputs.registry_password }} | helm registry login -u ${{ inputs.registry_username }} --password-stdin ${{ inputs.registry }}
-      env:
-        HELM_EXPERIMENTAL_OCI: '1'
-
+
     - name: Helm | Dependency
       if: inputs.update_dependencies == 'true'
       shell: bash
       run: helm dependency update ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
-      env:
-        HELM_EXPERIMENTAL_OCI: '1'
 
     - name: Helm | Package
       shell: bash
       run: helm package ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }} --version ${{ inputs.tag }} --app-version ${{ inputs.app_version }}
-      env:
-        HELM_EXPERIMENTAL_OCI: '1'
 
     - name: Helm | Push
       shell: bash
       run: helm push ${{ inputs.name }}-${{ inputs.tag }}.tgz oci://${{ inputs.registry }}/${{ inputs.repository }}
-      env:
-        HELM_EXPERIMENTAL_OCI: '1'
 
     - name: Helm | Logout
       shell: bash
       run: helm registry logout ${{ inputs.registry }}
-      env:
-        HELM_EXPERIMENTAL_OCI: '1'
 
     - name: Helm | Output
       id: output
       shell: bash
-      run: echo "image=${{ inputs.registry }}/${{ inputs.repository }}/${{ inputs.name }}:${{ inputs.tag }}" >> $GITHUB_OUTPUT
+      run: echo "image=${{ inputs.registry }}/${{ inputs.repository }}/${{ inputs.name }}:${{ inputs.tag }}" >> $GITHUB_OUTPUT
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -9,6 +9,7 @@
 - [ ] I have Added testing in the [`tests/litellm/`](https://github.com/BerriAI/litellm/tree/main/tests/litellm) directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code)
 - [ ] My PR passes all unit tests on [`make test-unit`](https://docs.litellm.ai/docs/extras/contributing_code)
 - [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem
+- [ ] I have requested a Greptile review by commenting `@greptileai` and received a **Confidence Score of at least 4/5** before requesting a maintainer review
 
 ## CI (LiteLLM team)
 

diff --git a/.github/workflows/test-litellm-matrix.yml b/.github/workflows/test-litellm-matrix.yml
@@ -0,0 +1,118 @@
+name: LiteLLM Unit Tests (Matrix)
+
+on:
+  pull_request:
+    branches: [main]
+
+# Cancel in-progress runs for the same PR
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20  # Increased from 15 to 20
+    strategy:
+      fail-fast: false
+      matrix:
+        test-group:
+          # tests/test_litellm split by subdirectory (~560 files total)
+          # Vertex AI tests separated for better isolation (prevent auth/env pollution)
+          - name: "llms-vertex"
+            path: "tests/test_litellm/llms/vertex_ai"
+            workers: 1
+            reruns: 2
+          - name: "llms-other"
+            path: "tests/test_litellm/llms --ignore=tests/test_litellm/llms/vertex_ai"
+            workers: 2
+            reruns: 2
+          # tests/test_litellm/proxy split by subdirectory (~180 files total)
+          - name: "proxy-guardrails"
+            path: "tests/test_litellm/proxy/guardrails tests/test_litellm/proxy/management_endpoints tests/test_litellm/proxy/management_helpers"
+            workers: 2
+            reruns: 2
+          - name: "proxy-core"
+            path: "tests/test_litellm/proxy/auth tests/test_litellm/proxy/client tests/test_litellm/proxy/db tests/test_litellm/proxy/hooks tests/test_litellm/proxy/policy_engine"
+            workers: 2
+            reruns: 2
+          - name: "proxy-misc"
+            path: "tests/test_litellm/proxy/_experimental tests/test_litellm/proxy/agent_endpoints tests/test_litellm/proxy/anthropic_endpoints tests/test_litellm/proxy/common_utils tests/test_litellm/proxy/discovery_endpoints tests/test_litellm/proxy/experimental tests/test_litellm/proxy/google_endpoints tests/test_litellm/proxy/health_endpoints tests/test_litellm/proxy/image_endpoints tests/test_litellm/proxy/middleware tests/test_litellm/proxy/openai_files_endpoint tests/test_litellm/proxy/pass_through_endpoints tests/test_litellm/proxy/prompts tests/test_litellm/proxy/public_endpoints tests/test_litellm/proxy/response_api_endpoints tests/test_litellm/proxy/spend_tracking tests/test_litellm/proxy/ui_crud_endpoints tests/test_litellm/proxy/vector_store_endpoints tests/test_litellm/proxy/test_*.py"
+            workers: 2
+            reruns: 2
+          - name: "integrations"
+            path: "tests/test_litellm/integrations"
+            workers: 2
+            reruns: 3  # Integration tests tend to be flakier
+          - name: "core-utils"
+            path: "tests/test_litellm/litellm_core_utils"
+            workers: 2
+            reruns: 1
+          - name: "other"
+            path: "tests/test_litellm/caching tests/test_litellm/responses tests/test_litellm/secret_managers tests/test_litellm/vector_stores tests/test_litellm/a2a_protocol tests/test_litellm/anthropic_interface tests/test_litellm/completion_extras tests/test_litellm/containers tests/test_litellm/enterprise tests/test_litellm/experimental_mcp_client tests/test_litellm/google_genai tests/test_litellm/images tests/test_litellm/interactions tests/test_litellm/passthrough tests/test_litellm/router_strategy tests/test_litellm/router_utils tests/test_litellm/types"
+            workers: 2
+            reruns: 2
+          - name: "root"
+            path: "tests/test_litellm/test_*.py"
+            workers: 2
+            reruns: 2
+          # tests/proxy_unit_tests split alphabetically (~48 files total)
+          - name: "proxy-unit-a"
+            path: "tests/proxy_unit_tests/test_[a-o]*.py"
+            workers: 2
+            reruns: 1
+          - name: "proxy-unit-b"
+            path: "tests/proxy_unit_tests/test_[p-z]*.py"
+            workers: 2
+            reruns: 1
+
+    name: test (${{ matrix.test-group.name }})
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.cache/pip
+            .venv
+          key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+
+      - name: Install dependencies
+        run: |
+          poetry config virtualenvs.in-project true
+          poetry install --with dev,proxy-dev --extras "proxy semantic-router"
+          # pytest-rerunfailures and pytest-xdist are in pyproject.toml dev dependencies
+          poetry run pip install google-genai==1.22.0 \
+            google-cloud-aiplatform>=1.38 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core
+
+      - name: Setup litellm-enterprise
+        run: |
+          cd enterprise && poetry run pip install -e . && cd ..
+
+      - name: Generate Prisma client
+        run: |
+          poetry run prisma generate --schema litellm/proxy/schema.prisma
+
+      - name: Run tests - ${{ matrix.test-group.name }}
+        run: |
+          poetry run pytest ${{ matrix.test-group.path }} \
+            --tb=short -vv \
+            --maxfail=10 \
+            -n ${{ matrix.test-group.workers }} \
+            --reruns ${{ matrix.test-group.reruns }} \
+            --reruns-delay 1 \
+            --dist=loadscope \
+            --durations=20
diff --git a/.github/workflows/test-litellm-ui-build.yml b/.github/workflows/test-litellm-ui-build.yml
@@ -0,0 +1,32 @@
+name: UI Build Check
+permissions:
+  contents: read
+
+on:
+  pull_request:
+    branches: [main]
+
+jobs:
+  build-ui:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    defaults:
+      run:
+        working-directory: ui/litellm-dashboard
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          cache: "npm"
+          cache-dependency-path: ui/litellm-dashboard/package-lock.json
+
+      - name: Install dependencies
+        run: npm install
+
+      - name: Build
+        run: npm run build
diff --git a/.github/workflows/test-litellm.yml b/.github/workflows/test-litellm.yml
@@ -1,8 +1,12 @@
 name: LiteLLM Mock Tests (folder - tests/test_litellm)
 
+# DEPRECATED: This workflow is replaced by test-litellm-matrix.yml which runs
+# the same tests in parallel across 10 jobs for faster CI times.
+# Kept for manual debugging only.
 on:
-  pull_request:
-    branches: [ main ]
+  workflow_dispatch:  # Manual trigger only
+  # pull_request:
+  #   branches: [ main ]
 
 jobs:
   test:

diff --git a/.github/workflows/test_server_root_path.yml b/.github/workflows/test_server_root_path.yml
@@ -0,0 +1,96 @@
+name: Test Proxy SERVER_ROOT_PATH Routing
+permissions:
+  contents: read
+
+on:
+  pull_request:
+    branches: [main]
+
+jobs:
+  test-server-root-path:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    strategy:
+      matrix:
+        root_path: ["/api/v1", "/llmproxy"]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./docker/Dockerfile.database
+          tags: litellm-test:${{ github.sha }}
+          load: true
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Start LiteLLM container with SERVER_ROOT_PATH
+        run: |
+          docker run -d \
+            --name litellm-test \
+            -p 4000:4000 \
+            -e SERVER_ROOT_PATH="${{ matrix.root_path }}" \
+            -e LITELLM_MASTER_KEY="sk-1234" \
+            litellm-test:${{ github.sha }} \
+            --detailed_debug
+
+      - name: Wait for container to be healthy
+        run: |
+          echo "Waiting for LiteLLM to start..."
+          max_attempts=30
+          attempt=0
+
+          while [ $attempt -lt $max_attempts ]; do
+            if docker logs litellm-test 2>&1 | grep -q "Uvicorn running"; then
+              echo "LiteLLM started successfully"
+              break
+            fi
+            attempt=$((attempt + 1))
+            echo "Attempt $attempt/$max_attempts - waiting for server to start..."
+            sleep 2
+          done
+
+          if [ $attempt -eq $max_attempts ]; then
+            echo "Server failed to start within timeout"
+            docker logs litellm-test
+            exit 1
+          fi
+
+          sleep 5
+
+      - name: Show container logs
+        if: always()
+        run: docker logs litellm-test
+
+      - name: Test UI endpoint with root path
+        run: |
+          ROOT_PATH="${{ matrix.root_path }}"
+          echo "Testing UI at: http://localhost:4000${ROOT_PATH}/ui/"
+
+          for i in 1 2 3; do
+            content=$(curl -sL --max-time 5 -H "Authorization: Bearer sk-1234" "http://localhost:4000${ROOT_PATH}/ui/")
+            if echo "$content" | grep -q -E "(html|<!DOCTYPE|<head|<body)"; then
+              echo "UI page contains valid HTML content"
+              exit 0
+            fi
+            echo "Attempt $i/3 - no valid HTML, retrying in 5s..."
+            sleep 5
+          done
+          echo "UI page does not contain expected HTML content"
+          echo "Response: $content"
+          docker logs litellm-test
+          exit 1
+
+      - name: Cleanup
+        if: always()
+        run: |
+          docker stop litellm-test || true
+          docker rm litellm-test || true
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 .venv
 .venv_policy_test
 .env
+.claude
 .newenv
 newenv/*
 litellm/proxy/myenv/*

diff --git a/.semgrep/rules/README.md b/.semgrep/rules/README.md
@@ -0,0 +1,22 @@
+# Custom Semgrep rules for LiteLLM
+
+Add custom rule YAML files here. Semgrep loads all `.yml`/`.yaml` files under this directory.
+
+**Run only custom rules (CI / fail on findings):**
+
+```bash
+semgrep scan --config .semgrep/rules . --error
+```
+
+**Run with registry + custom rules:**
+
+```bash
+semgrep scan --config auto --config .semgrep/rules .
+```
+
+**Layout:**
+
+- `python/` – Python-specific rules (security, patterns)
+- Add more subdirs as needed (e.g. `generic/` for language-agnostic rules)
+
+See [Semgrep rule syntax](https://semgrep.dev/docs/writing-rules/rule-syntax/).
diff --git a/.semgrep/rules/python/reliability/unbounded-memory.yml b/.semgrep/rules/python/reliability/unbounded-memory.yml
@@ -0,0 +1,17 @@
+# Unbounded memory growth – data structures without a clear max limit
+# Can lead to OOM under load.
+
+rules:
+  - id: unbounded-asyncio-queue
+    message: asyncio.Queue() with no maxsize can grow unbounded. Use asyncio.Queue(maxsize=N) for integrations (e.g. log queues).
+    severity: ERROR
+    languages: [python]
+    pattern-either:
+      - pattern: asyncio.Queue()
+      - pattern: asyncio.Queue(maxsize=0)
+    metadata:
+      category: reliability
+      cwe: "CWE-400: Uncontrolled Resource Consumption"
+      tags: [python, reliability]
+      confidence: HIGH
+      source: https://docs.python.org/3/library/asyncio-queue.html
diff --git a/.semgrep/rules/python/unbounded-memory.yml b/.semgrep/rules/python/unbounded-memory.yml
@@ -0,0 +1,14 @@
+# Unbounded memory growth – data structures without a clear max limit
+# Can lead to OOM under load.
+
+rules:
+  - id: unbounded-asyncio-queue
+    message: asyncio.Queue() with no maxsize can grow unbounded. Use asyncio.Queue(maxsize=N) for integrations (e.g. log queues).
+    severity: ERROR
+    languages: [python]
+    pattern-either:
+      - pattern: asyncio.Queue()
+      - pattern: asyncio.Queue(maxsize=0)
+    metadata:
+      category: correctness
+      cwe: "CWE-400: Uncontrolled Resource Consumption"
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ @@
     .venv
     .venv_policy_test
     .env
+    .claude
     .newenv
     newenv/*
     litellm/proxy/myenv/*
@@ Expand Down @@