napmany · napmany · Feb 14, 2026 · Dec 24, 2025 · Dec 24, 2025 · Dec 28, 2025
diff --git a/.coderabbit.yaml b/.coderabbit.yaml
@@ -8,8 +8,15 @@ reviews:
   poem: false
   review_status: true
   collapse_walkthrough: false
+  sequence_diagrams: false
+  finishing_touches:
+    docstrings:
+      enabled: false
   auto_review:
     enabled: true
     drafts: false
 chat:
   auto_reply: true
+issue_enrichment:
+  planning:
+    enabled: false
diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml
@@ -10,17 +10,37 @@ on:
   # Allows manual triggering of the workflow
   workflow_dispatch:
 
+  # Run on workflow file changes (without pushing)
+  push:
+    paths:
+      - '.github/workflows/containers.yml'
+      - 'docker/build-container.sh'
+      - 'docker/*.Containerfile'
+
 jobs:
   build-and-push:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        platform: [intel, cuda, vulkan, cpu, musa]
+        platform: [intel, cuda, vulkan, cpu, musa, rocm]
       fail-fast: false
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Free up disk space
+        if: matrix.platform == 'rocm'
+        run: |
+          echo "Before cleanup:"
+          df -h
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker system prune -af
+          echo "After cleanup:"
+          df -h
+
       - name: Log in to GitHub Container Registry
         uses: docker/login-action@v2
         with:
@@ -31,7 +51,7 @@ jobs:
       - name: Run build-container
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: ./docker/build-container.sh ${{ matrix.platform }} true
+        run: ./docker/build-container.sh ${{ matrix.platform }} ${{ github.event_name != 'push' }}
 
   # note make sure napmany/llmsnap has admin rights to the llmsnap package
   # see: https://github.com/actions/delete-package-versions/issues/74

diff --git a/.github/workflows/go-ci-windows.yml b/.github/workflows/go-ci-windows.yml
@@ -3,9 +3,25 @@ name: Windows CI
 on:
   push:
     branches: [ "main" ]
+    # only run when backend source changes
+    # cmd/ is excluded because it contains utilities without tests
+    paths:
+      - '**/*.go'
+      - '!cmd/**'
+      - 'go.mod'
+      - 'go.sum'
+      - 'Makefile'
+      - '.github/workflows/go-ci-windows.yml'
 
   pull_request:
     branches: [ "main" ]
+    paths:
+      - '**/*.go'
+      - '!cmd/**'
+      - 'go.mod'
+      - 'go.sum'
+      - 'Makefile'
+      - '.github/workflows/go-ci-windows.yml'
 
   # Allows manual triggering of the workflow
   workflow_dispatch:
@@ -28,7 +44,7 @@ jobs:
       uses: actions/cache/restore@v4
       with:
         path: ./build
-        key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
+        key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
 
     # necessary for testing proxy/Process swapping
     - name: Create simple-responder
@@ -42,7 +58,7 @@ jobs:
       uses: actions/cache/save@v4
       with:
         path: ./build
-        key: ${{ runner.os }}-simple-responder-${{ hashFiles('misc/simple-responder/simple-responder.go') }}
+        key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }}
 
     - name: Test all
       shell: bash

diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml
@@ -3,9 +3,25 @@ name: Linux CI
 on:
   push:
     branches: [ "main" ]
+    # only run when backend source changes
+    # cmd/ is excluded because it contains utilities without tests
+    paths:
+      - '**/*.go'
+      - '!cmd/**'
+      - 'go.mod'
+      - 'go.sum'
+      - 'Makefile'
+      - '.github/workflows/go-ci.yml'
 
   pull_request:
     branches: [ "main" ]
+    paths:
+      - '**/*.go'
+      - '!cmd/**'
+      - 'go.mod'
+      - 'go.sum'
+      - 'Makefile'
+      - '.github/workflows/go-ci.yml'
 
   # Allows manual triggering of the workflow
   workflow_dispatch:

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -3,13 +3,13 @@ name: goreleaser
 on:
   push:
     tags:
-      - '*'
+      - "*"
 
   # Allows manual triggering of the workflow
   workflow_dispatch:
     inputs:
       tag:
-        description: 'Tag version to release (e.g. v144)'
+        description: "Tag version to release (e.g. v144)"
         required: true
 
 permissions:
@@ -19,35 +19,30 @@ jobs:
   goreleaser:
     runs-on: ubuntu-latest
     steps:
-      -
-        name: Checkout
+      - name: Checkout
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
           ref: ${{ github.event.inputs.tag || github.ref }}
-      -
-        name: Set up Go
+      - name: Set up Go
         uses: actions/setup-go@v5
-      -
-        name: Set up Node.js
+      - name: Set up Node.js
         uses: actions/setup-node@v4
         with:
-          node-version: '23'
-      -
-        name: Install dependencies and build UI
+          node-version: "24"
+      - name: Install dependencies and build UI
         run: |
-          cd ui
+          cd ui-svelte
           npm ci
           npm run build
 
-      -
-        name: Run GoReleaser
+      - name: Run GoReleaser
         uses: goreleaser/goreleaser-action@v6
         with:
           # either 'goreleaser' (default) or 'goreleaser-pro'
           distribution: goreleaser
           # 'latest', 'nightly', or a semver
-          version: '~> v2'
+          version: "~> v2"
           args: release --clean
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -76,4 +71,4 @@ jobs:
               "release": {
                 "tag_name": "${{ steps.tag.outputs.tag }}"
               }
-            }
+            }
diff --git a/.github/workflows/ui-tests.yml b/.github/workflows/ui-tests.yml
@@ -0,0 +1,42 @@
+name: UI Tests
+
+on:
+  push:
+    branches: [ "main" ]
+    paths:
+      - 'ui-svelte/**'
+      - '.github/workflows/ui-tests.yml'
+
+  pull_request:
+    branches: [ "main" ]
+    paths:
+      - 'ui-svelte/**'
+      - '.github/workflows/ui-tests.yml'
+
+  workflow_dispatch:
+
+jobs:
+
+  run-tests:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ui-svelte
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Node.js
+      uses: actions/setup-node@v4
+      with:
+        node-version: '24'
+        cache: 'npm'
+        cache-dependency-path: ui-svelte/package-lock.json
+
+    - name: Install dependencies
+      run: npm ci
+
+    - name: Type check
+      run: npm run check
+
+    - name: Run tests
+      run: npm test
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -7,37 +7,45 @@ llmsnap is a light weight, transparent proxy server that provides automatic mode
 ## Tech stack
 
 - golang
-- typescript, vite and react for UI (ui/)
-
-## Testing
-
-- `make test-dev` - Use this when making iterative changes. Runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
-- `make test-all` - runs at the end before completing work. Includes long running concurrency tests.
+- typescript, vite and react for UI (located in ui/)
 
 ## Workflow Tasks
 
-### Plan Improvements
+- when summarizing changes only include details that require further action
+- just say "Done." when there is no further action
+- use `gh` to create PRs and load issues
+- do include Co-Authored-By or created by when committing changes or creating PRs
+- keep PR descriptions short and focused on changes.
+  - never include a test plan
+
+## Testing
 
-Work plans are located in ai-plans/. Plans written by the user may be incomplete, contain inconsistencies or errors.
+- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
+- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
+- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
+- Use `make test-all` before completing work. This includes long running concurrency tests.
 
-When the user asks to improve a plan follow these guidelines for expanding and improving it.
+### Commit message example format:
 
-- Identify any inconsistencies.
-- Expand plans out to be detailed specification of requirements and changes to be made.
-- Plans should have at least these sections:
-  - Title - very short, describes changes
-  - Overview: A more detailed summary of goal and outcomes desired
-  - Design Requirements: Detailed descriptions of what needs to be done
-  - Testing Plan: Tests to be implemented
-  - Checklist: A detailed list of changes to be made
+```
+proxy: add new feature
 
-Look for "plan expansion" as explicit instructions to improve a plan.
+Add new feature that implements functionality X and Y.
 
-### Implementation of plans
+- key change 1
+- key change 2
+- key change 3
 
-When the user says "paint it", respond with "commencing automated assembly". Then implement the changes as described by the plan. Update the checklist as you complete items.
+fixes #123
+```
 
-## General Rules
+## Code Reviews
 
-- when summarizing changes only include details that require further action (action items)
-- when there are no action items, just say "Done."
+- use three levels High, Medium, Low severity
+- label each discovered issue with a label like H1, M2, L3 respectively
+- High severity are must fix issues (security, race conditions, critical bugs)
+- Medium severity are recommended improvements (coding style, missing functionality, inconsistencies)
+- Low severity are nice to have changes and nits
+- Include a suggestion with each discovered item
+- Limit your code review to three items with the highest priority first
+- Double check your discovered items and recommended remediations
diff --git a/Makefile b/Makefile
@@ -36,11 +36,11 @@ test-all: proxy/ui_dist/placeholder.txt
 	go test -race -count=1 ./proxy/...
 
 ui/node_modules:
-	cd ui && npm install
+	cd ui-svelte && npm install
 
 # build react UI
 ui: ui/node_modules
-	cd ui && npm run build
+	cd ui-svelte && npm run build
 
 # Build OSX binary
 mac: ui

diff --git a/README.md b/README.md
@@ -13,16 +13,21 @@ Built in Go for performance and simplicity, llmsnap has zero dependencies and is
 
 - ✅ Easy to deploy and configure: one binary, one configuration file. no external dependencies
 - ✅ On-demand model switching
-- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc.)
+- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, stable-diffusion.cpp, etc.)
   - future proof, upgrade your inference servers at any time.
 - ✅ OpenAI API supported endpoints:
   - `v1/completions`
   - `v1/chat/completions`
+  - `v1/responses`
   - `v1/embeddings`
   - `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
   - `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
+  - `v1/audio/voices`
+  - `v1/images/generations`
+  - `v1/images/edits`
 - ✅ Anthropic API supported endpoints:
   - `v1/messages`
+  - `v1/messages/count_tokens`
 - ✅ llama-server (llama.cpp) supported endpoints
   - `v1/rerank`, `v1/reranking`, `/rerank`
   - `/infill` - for code infilling
@@ -35,6 +40,7 @@ Built in Go for performance and simplicity, llmsnap has zero dependencies and is
   - `/running` - list currently running models ([#61](https://github.com/mostlygeek/llama-swap/issues/61))
   - `/log` - remote log monitoring
   - `/health` - just returns "OK"
+- ✅ API Key support - define keys to restrict access to API endpoints
 - ✅ Customizable
   - Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
   - Automatic unloading of models after timeout by setting a `ttl`
@@ -65,6 +71,7 @@ llmsnap can be installed in multiple ways
 ### Docker Install ([download images](https://github.com/napmany/llmsnap/pkgs/container/llmsnap))
 
 Nightly container images with llmsnap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md).
+The stable-diffusion.cpp server is also included for the musa and vulkan platforms.
 
 ```shell
 $ docker pull ghcr.io/napmany/llmsnap:cuda

diff --git a/cmd/simple-responder/simple-responder.go b/cmd/simple-responder/simple-responder.go
@@ -211,6 +211,11 @@ func main() {
 		})
 	})
 
+	r.GET("/v1/audio/voices", func(c *gin.Context) {
+		model := c.Query("model")
+		c.JSON(http.StatusOK, gin.H{"voices": []string{"voice1"}, "model": model})
+	})
+
 	r.GET("/slow-respond", func(c *gin.Context) {
 		echo := c.Query("echo")
 		delay := c.Query("delay")