From 3989c17d1b63f1f9ec90f5820eb27fd674b38530 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 30 Jan 2026 01:29:53 +0100
Subject: [PATCH 01/13] add timeout feature

---
 config-schema.json           |  6 ++++++
 config.example.yaml          | 10 ++++++++++
 docs/configuration.md        | 10 ++++++++++
 proxy/config/model_config.go |  5 +++++
 proxy/process.go             | 26 ++++++++++++++++++++++++++
 5 files changed, 57 insertions(+)

diff --git a/config-schema.json b/config-schema.json
index 8baa0cc4..9b77344a 100644
--- a/config-schema.json
+++ b/config-schema.json
@@ -216,6 +216,12 @@
                         "type": "boolean",
                         "description": "Overrides the global sendLoadingState for this model. Ommitting this property will use the global setting."
                     },
+                    "requestTimeout": {
+                        "type": "integer",
+                        "minimum": 0,
+                        "default": 0,
+                        "description": "Maximum time in seconds for a single request to complete before forcefully killing the model process. This prevents runaway inference processes from blocking the GPU indefinitely. 0 disables timeout (default). When exceeded, the process is terminated and must be restarted for the next request."
+                    },
                     "unlisted": {
                         "type": "boolean",
                         "default": false,
diff --git a/config.example.yaml b/config.example.yaml
index d8282fc1..0ef80c02 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -249,6 +249,16 @@ models:
     # - recommended to be omitted and the default used
     concurrencyLimit: 0
 
+    # requestTimeout: maximum time in seconds for a single request to complete
+    # - optional, default: 0 (no timeout)
+    # - useful for preventing runaway inference processes that never complete
+    # - when exceeded, the model process is forcefully stopped
+    # - protects against GPU overheating and blocking from stuck processes
+    # - the process must be restarted for the next request
+    # - set to 0 to disable timeout
+    # - recommended for models that may have infinite loops or excessive generation
+    requestTimeout: 0  # disabled by default, set to e.g., 300 for 5 minutes
+
     # sendLoadingState: overrides the global sendLoadingState setting for this model
     # - optional, default: undefined (use global setting)
     sendLoadingState: false
diff --git a/docs/configuration.md b/docs/configuration.md
index 5aac2706..32713d57 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -319,6 +319,16 @@ models:
     # - recommended to be omitted and the default used
     concurrencyLimit: 0
 
+    # requestTimeout: maximum time in seconds for a single request to complete
+    # - optional, default: 0 (no timeout)
+    # - useful for preventing runaway inference processes that never complete
+    # - when exceeded, the model process is forcefully stopped
+    # - protects against GPU overheating and blocking from stuck processes
+    # - the process must be restarted for the next request
+    # - set to 0 to disable timeout
+    # - recommended for models that may have infinite loops or excessive generation
+    requestTimeout: 300  # 5 minutes
+
     # sendLoadingState: overrides the global sendLoadingState setting for this model
     # - optional, default: undefined (use global setting)
     sendLoadingState: false
diff --git a/proxy/config/model_config.go b/proxy/config/model_config.go
index 9dc37aea..6b2ba742 100644
--- a/proxy/config/model_config.go
+++ b/proxy/config/model_config.go
@@ -36,6 +36,10 @@ type ModelConfig struct {
 
 	// override global setting
 	SendLoadingState *bool `yaml:"sendLoadingState"`
+
+	// Maximum time in seconds for a request to complete before killing the process
+	// 0 means no timeout (default)
+	RequestTimeout int `yaml:"requestTimeout"`
 }
 
 func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
@@ -53,6 +57,7 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
 		ConcurrencyLimit: 0,
 		Name:             "",
 		Description:      "",
+		RequestTimeout:   0,
 	}
 
 	// the default cmdStop to taskkill /f /t /pid ${PID}
diff --git a/proxy/process.go b/proxy/process.go
index 41427059..5ada9723 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -500,6 +500,32 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
 		p.inFlightRequests.Done()
 	}()
 
+	// Start timeout monitoring if requestTimeout is configured
+	var timeoutCancel context.CancelFunc
+	if p.config.RequestTimeout > 0 {
+		timeoutCtx, cancel := context.WithCancel(context.Background())
+		timeoutCancel = cancel
+
+		go func() {
+			timeoutDuration := time.Duration(p.config.RequestTimeout) * time.Second
+			timer := time.NewTimer(timeoutDuration)
+			defer timer.Stop()
+
+			select {
+			case <-timer.C:
+				p.proxyLogger.Warnf("<%s> Request timeout exceeded (%v), force stopping process to prevent GPU blocking", p.ID, timeoutDuration)
+				// Force stop the process - this will kill the underlying inference process
+				p.StopImmediately()
+			case <-timeoutCtx.Done():
+				// Request completed normally, cancel timeout
+				return
+			}
+		}()
+
+		// Ensure timeout goroutine is cancelled when request completes
+		defer timeoutCancel()
+	}
+
 	// for #366
 	// - extract streaming param from request context, should have been set by proxymanager
 	var srw *statusResponseWriter

From c34372c60f47b5f3fae12aa34f03c9724726a5aa Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 30 Jan 2026 15:59:32 +0100
Subject: [PATCH 02/13] implement first draft of new feature

---
 README.md                    |   2 +
 config-schema.json           |   5 ++
 config.example.yaml          |  18 ++++++
 docs/configuration.md        |  21 +++----
 proxy/config/config.go       |  43 ++++++++++++++
 proxy/config/config_test.go  | 105 +++++++++++++++++++++++++++++++++
 proxy/config/model_config.go |   4 ++
 proxy/process.go             | 111 ++++++++++++++++++++++++++++++++---
 proxy/process_test.go        |  33 ++++++-----
 proxy/processgroup.go        |   8 ++-
 proxy/processgroup_test.go   |   9 +--
 proxy/proxymanager.go        |  21 ++++++-
 12 files changed, 340 insertions(+), 40 deletions(-)

diff --git a/README.md b/README.md
index c2696235..b73ece6f 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
   - Automatic unloading of models after timeout by setting a `ttl`
   - Reliable Docker and Podman support using `cmd` and `cmdStop` together
   - Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235))
+  - RPC health checking for distributed inference - conditionally expose models based on RPC server availability
 
 ### Web UI
 
@@ -174,6 +175,7 @@ Almost all configuration settings are optional and can be added one step at a ti
   - `useModelName` to override model names sent to upstream servers
   - `${PORT}` automatic port variables for dynamic port assignment
   - `filters` rewrite parts of requests before sending to the upstream server
+  - `rpcHealthCheck` monitor RPC server health for distributed inference models
 
 See the [configuration documentation](docs/configuration.md) for all options.
 
diff --git a/config-schema.json b/config-schema.json
index 8baa0cc4..63c04ae1 100644
--- a/config-schema.json
+++ b/config-schema.json
@@ -220,6 +220,11 @@
                         "type": "boolean",
                         "default": false,
                         "description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests."
+                    },
+                    "rpcHealthCheck": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "Enable TCP health checks for RPC endpoints specified in cmd. When enabled, parses --rpc host:port[,host:port,...] from cmd and performs health checks every 30 seconds. Models with unhealthy RPC endpoints are filtered from /v1/models and return 503 on inference requests."
                     }
                 }
             }
diff --git a/config.example.yaml b/config.example.yaml
index d8282fc1..ea827099 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -262,6 +262,24 @@ models:
     unlisted: true
     cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
 
+  # RPC health check example for distributed inference:
+  "qwen-distributed":
+    # rpcHealthCheck: enable TCP health checks for RPC endpoints
+    # - optional, default: false
+    # - when enabled, parses --rpc host:port[,host:port,...] from cmd
+    # - performs TCP connectivity checks every 30 seconds
+    # - model is only listed in /v1/models when ALL RPC endpoints are healthy
+    # - inference requests to unhealthy models return HTTP 503
+    # - useful for distributed inference with llama.cpp's rpc-server
+    rpcHealthCheck: true
+    cmd: |
+      llama-server --port ${PORT}
+      --rpc 192.168.1.10:50051,192.168.1.11:50051
+      -m Qwen2.5-32B-Instruct-Q4_K_M.gguf
+      -ngl 99
+    name: "Qwen 32B (Distributed)"
+    description: "Large model using distributed RPC inference"
+
   # Docker example:
   # container runtimes like Docker and Podman can be used reliably with
   # a combination of cmd, cmdStop, and ${MODEL_ID}
diff --git a/docs/configuration.md b/docs/configuration.md
index 5aac2706..3c7e9363 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -72,16 +72,17 @@ models:
 
 llama-swap supports many more features to customize how you want to manage your environment.
 
-| Feature   | Description                                    |
-| --------- | ---------------------------------------------- |
-| `ttl`     | automatic unloading of models after a timeout  |
-| `macros`  | reusable snippets to use in configurations     |
-| `groups`  | run multiple models at a time                  |
-| `hooks`   | event driven functionality                     |
-| `env`     | define environment variables per model         |
-| `aliases` | serve a model with different names             |
-| `filters` | modify requests before sending to the upstream |
-| `...`     | And many more tweaks                           |
+| Feature           | Description                                             |
+| ----------------- | ------------------------------------------------------- |
+| `ttl`             | automatic unloading of models after a timeout           |
+| `macros`          | reusable snippets to use in configurations              |
+| `groups`          | run multiple models at a time                           |
+| `hooks`           | event driven functionality                              |
+| `env`             | define environment variables per model                  |
+| `aliases`         | serve a model with different names                      |
+| `filters`         | modify requests before sending to the upstream          |
+| `rpcHealthCheck`  | monitor RPC server health for distributed inference     |
+| `...`             | And many more tweaks                                    |
 
 ## Full Configuration Example
 
diff --git a/proxy/config/config.go b/proxy/config/config.go
index c4387f40..4b7dbb2d 100644
--- a/proxy/config/config.go
+++ b/proxy/config/config.go
@@ -3,6 +3,7 @@ package config
 import (
 	"fmt"
 	"io"
+	"net"
 	"net/url"
 	"os"
 	"regexp"
@@ -533,6 +534,48 @@ func SanitizeCommand(cmdStr string) ([]string, error) {
 	return args, nil
 }
 
+// ParseRPCEndpoints extracts RPC endpoints from command string
+// Handles: --rpc host:port,host2:port2 or --rpc=host:port or -rpc host:port
+func ParseRPCEndpoints(cmdStr string) ([]string, error) {
+	args, err := SanitizeCommand(cmdStr)
+	if err != nil {
+		return nil, err
+	}
+
+	var endpoints []string
+	for i, arg := range args {
+		if arg == "--rpc" || arg == "-rpc" {
+			if i+1 < len(args) {
+				endpoints = parseEndpointList(args[i+1])
+			}
+		} else if strings.HasPrefix(arg, "--rpc=") {
+			endpoints = parseEndpointList(strings.TrimPrefix(arg, "--rpc="))
+		} else if strings.HasPrefix(arg, "-rpc=") {
+			endpoints = parseEndpointList(strings.TrimPrefix(arg, "-rpc="))
+		}
+	}
+
+	// Validate each endpoint
+	for _, ep := range endpoints {
+		if _, _, err := net.SplitHostPort(ep); err != nil {
+			return nil, fmt.Errorf("invalid RPC endpoint %q: %w", ep, err)
+		}
+	}
+
+	return endpoints, nil
+}
+
+func parseEndpointList(s string) []string {
+	parts := strings.Split(s, ",")
+	var result []string
+	for _, p := range parts {
+		if p = strings.TrimSpace(p); p != "" {
+			result = append(result, p)
+		}
+	}
+	return result
+}
+
 func StripComments(cmdStr string) string {
 	var cleanedLines []string
 	for _, line := range strings.Split(cmdStr, "\n") {
diff --git a/proxy/config/config_test.go b/proxy/config/config_test.go
index a19cbb56..11552f9d 100644
--- a/proxy/config/config_test.go
+++ b/proxy/config/config_test.go
@@ -1309,3 +1309,108 @@ peers:
 		assert.Contains(t, err.Error(), "unknown macro")
 	})
 }
+
+func TestParseRPCEndpoints_ValidFormats(t *testing.T) {
+	tests := []struct {
+		name     string
+		cmd      string
+		expected []string
+	}{
+		{
+			name:     "single endpoint with --rpc",
+			cmd:      "llama-server --rpc localhost:50051 -ngl 99",
+			expected: []string{"localhost:50051"},
+		},
+		{
+			name:     "single endpoint with --rpc=",
+			cmd:      "llama-server --rpc=192.168.1.100:50051 -ngl 99",
+			expected: []string{"192.168.1.100:50051"},
+		},
+		{
+			name:     "single endpoint with -rpc",
+			cmd:      "llama-server -rpc localhost:50051 -ngl 99",
+			expected: []string{"localhost:50051"},
+		},
+		{
+			name:     "single endpoint with -rpc=",
+			cmd:      "llama-server -rpc=localhost:50051 -ngl 99",
+			expected: []string{"localhost:50051"},
+		},
+		{
+			name:     "multiple endpoints comma-separated",
+			cmd:      "llama-server --rpc 192.168.1.10:50051,192.168.1.11:50051 -ngl 99",
+			expected: []string{"192.168.1.10:50051", "192.168.1.11:50051"},
+		},
+		{
+			name:     "multiple endpoints with spaces trimmed",
+			cmd:      "llama-server --rpc '192.168.1.10:50051, 192.168.1.11:50051' -ngl 99",
+			expected: []string{"192.168.1.10:50051", "192.168.1.11:50051"},
+		},
+		{
+			name:     "IPv6 endpoint",
+			cmd:      "llama-server --rpc [::1]:50051 -ngl 99",
+			expected: []string{"[::1]:50051"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			endpoints, err := ParseRPCEndpoints(tt.cmd)
+			assert.NoError(t, err)
+			assert.Equal(t, tt.expected, endpoints)
+		})
+	}
+}
+
+func TestParseRPCEndpoints_NoRPCFlag(t *testing.T) {
+	cmd := "llama-server -ngl 99 -m model.gguf"
+	endpoints, err := ParseRPCEndpoints(cmd)
+	assert.NoError(t, err)
+	assert.Empty(t, endpoints)
+}
+
+func TestParseRPCEndpoints_InvalidFormats(t *testing.T) {
+	tests := []struct {
+		name    string
+		cmd     string
+		wantErr string
+	}{
+		{
+			name:    "missing port",
+			cmd:     "llama-server --rpc localhost -ngl 99",
+			wantErr: "invalid RPC endpoint",
+		},
+		{
+			name:    "invalid host:port format",
+			cmd:     "llama-server --rpc not-a-valid-endpoint -ngl 99",
+			wantErr: "invalid RPC endpoint",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			_, err := ParseRPCEndpoints(tt.cmd)
+			assert.Error(t, err)
+			assert.Contains(t, err.Error(), tt.wantErr)
+		})
+	}
+}
+
+func TestParseRPCEndpoints_EmptyEndpointsFiltered(t *testing.T) {
+	// Empty strings after commas are filtered out
+	cmd := "llama-server --rpc 'localhost:50051,,' -ngl 99"
+	endpoints, err := ParseRPCEndpoints(cmd)
+	assert.NoError(t, err)
+	assert.Equal(t, []string{"localhost:50051"}, endpoints)
+}
+
+func TestParseRPCEndpoints_MultilineCommand(t *testing.T) {
+	cmd := `llama-server \
+		--rpc localhost:50051 \
+		-ngl 99 \
+		-m model.gguf`
+
+	endpoints, err := ParseRPCEndpoints(cmd)
+	assert.NoError(t, err)
+	assert.Equal(t, []string{"localhost:50051"}, endpoints)
+}
diff --git a/proxy/config/model_config.go b/proxy/config/model_config.go
index 9dc37aea..e5635b24 100644
--- a/proxy/config/model_config.go
+++ b/proxy/config/model_config.go
@@ -36,6 +36,9 @@ type ModelConfig struct {
 
 	// override global setting
 	SendLoadingState *bool `yaml:"sendLoadingState"`
+
+	// RPC health checking
+	RPCHealthCheck bool `yaml:"rpcHealthCheck"`
 }
 
 func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
@@ -53,6 +56,7 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
 		ConcurrencyLimit: 0,
 		Name:             "",
 		Description:      "",
+		RPCHealthCheck:   false,
 	}
 
 	// the default cmdStop to taskkill /f /t /pid ${PID}
diff --git a/proxy/process.go b/proxy/process.go
index 41427059..dee5f962 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -79,18 +79,25 @@ type Process struct {
 
 	// track the number of failed starts
 	failedStartCount int
+
+	// RPC health checking
+	rpcEndpoints    []string
+	rpcHealthy      atomic.Bool
+	rpcHealthTicker *time.Ticker
+	rpcHealthCancel context.CancelFunc
+	shutdownCtx     context.Context // from ProxyManager for graceful shutdown
 }
 
-func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor) *Process {
+func NewProcess(ID string, healthCheckTimeout int, modelConfig config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor, shutdownCtx context.Context) *Process {
 	concurrentLimit := 10
-	if config.ConcurrencyLimit > 0 {
-		concurrentLimit = config.ConcurrencyLimit
+	if modelConfig.ConcurrencyLimit > 0 {
+		concurrentLimit = modelConfig.ConcurrencyLimit
 	}
 
 	// Setup the reverse proxy.
-	proxyURL, err := url.Parse(config.Proxy)
+	proxyURL, err := url.Parse(modelConfig.Proxy)
 	if err != nil {
-		proxyLogger.Errorf("<%s> invalid proxy URL %q: %v", ID, config.Proxy, err)
+		proxyLogger.Errorf("<%s> invalid proxy URL %q: %v", ID, modelConfig.Proxy, err)
 	}
 
 	var reverseProxy *httputil.ReverseProxy
@@ -105,9 +112,9 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
 		}
 	}
 
-	return &Process{
+	p := &Process{
 		ID:                      ID,
-		config:                  config,
+		config:                  modelConfig,
 		cmd:                     nil,
 		reverseProxy:            reverseProxy,
 		cancelUpstream:          nil,
@@ -124,7 +131,23 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
 		// stop timeout
 		gracefulStopTimeout: 10 * time.Second,
 		cmdWaitChan:         make(chan struct{}),
+		shutdownCtx:         shutdownCtx,
+	}
+
+	// Parse RPC endpoints if health checking enabled
+	if modelConfig.RPCHealthCheck {
+		endpoints, err := config.ParseRPCEndpoints(modelConfig.Cmd)
+		if err != nil {
+			proxyLogger.Errorf("<%s> failed to parse RPC endpoints: %v", ID, err)
+		} else if len(endpoints) == 0 {
+			proxyLogger.Warnf("<%s> rpcHealthCheck enabled but no --rpc flag found in cmd", ID)
+		} else {
+			p.rpcEndpoints = endpoints
+			p.rpcHealthy.Store(true) // assume healthy initially
+		}
 	}
+
+	return p
 }
 
 // LogMonitor returns the log monitor associated with the process.
@@ -362,6 +385,7 @@ func (p *Process) start() error {
 		return fmt.Errorf("failed to set Process state to ready: current state: %v, error: %v", curState, err)
 	} else {
 		p.failedStartCount = 0
+		p.startRPCHealthChecker()
 		return nil
 	}
 }
@@ -385,6 +409,8 @@ func (p *Process) StopImmediately() {
 		return
 	}
 
+	p.stopRPCHealthChecker()
+
 	p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState())
 	if curState, err := p.swapState(StateReady, StateStopping); err != nil {
 		p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState)
@@ -877,3 +903,74 @@ func (s *statusResponseWriter) Flush() {
 		flusher.Flush()
 	}
 }
+
+// startRPCHealthChecker launches background goroutine for RPC health monitoring
+func (p *Process) startRPCHealthChecker() {
+	if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 {
+		return
+	}
+
+	ctx, cancel := context.WithCancel(p.shutdownCtx)
+	p.rpcHealthCancel = cancel
+	p.rpcHealthTicker = time.NewTicker(30 * time.Second)
+
+	go func() {
+		defer p.rpcHealthTicker.Stop()
+
+		// Run initial check immediately
+		p.checkRPCHealth()
+
+		for {
+			select {
+			case <-ctx.Done():
+				p.proxyLogger.Debugf("<%s> RPC health checker shutting down", p.ID)
+				return
+			case <-p.rpcHealthTicker.C:
+				if p.CurrentState() != StateReady {
+					return // Process no longer ready, exit
+				}
+				p.checkRPCHealth()
+			}
+		}
+	}()
+}
+
+func (p *Process) checkRPCHealth() {
+	allHealthy := true
+
+	for _, endpoint := range p.rpcEndpoints {
+		dialer := net.Dialer{Timeout: 500 * time.Millisecond}
+		conn, err := dialer.Dial("tcp", endpoint)
+		if err != nil {
+			p.proxyLogger.Warnf("<%s> RPC endpoint %s unhealthy: %v", p.ID, endpoint, err)
+			allHealthy = false
+			break
+		}
+		conn.Close()
+	}
+
+	wasHealthy := p.rpcHealthy.Load()
+	p.rpcHealthy.Store(allHealthy)
+
+	// Log state changes
+	if wasHealthy && !allHealthy {
+		p.proxyLogger.Infof("<%s> RPC endpoints now UNHEALTHY", p.ID)
+	} else if !wasHealthy && allHealthy {
+		p.proxyLogger.Infof("<%s> RPC endpoints now HEALTHY", p.ID)
+	}
+}
+
+func (p *Process) stopRPCHealthChecker() {
+	if p.rpcHealthCancel != nil {
+		p.rpcHealthCancel()
+		p.rpcHealthCancel = nil
+	}
+}
+
+// IsRPCHealthy returns true if RPC health checking is disabled or all endpoints healthy
+func (p *Process) IsRPCHealthy() bool {
+	if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 {
+		return true // not using RPC health checks
+	}
+	return p.rpcHealthy.Load()
+}
diff --git a/proxy/process_test.go b/proxy/process_test.go
index 3881c3dd..87e31d6d 100644
--- a/proxy/process_test.go
+++ b/proxy/process_test.go
@@ -1,6 +1,7 @@
 package proxy
 
 import (
+	"context"
 	"fmt"
 	"net/http"
 	"net/http/httptest"
@@ -33,7 +34,7 @@ func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {
 	config := getTestSimpleResponderConfig(expectedMessage)
 
 	// Create a process
-	process := NewProcess("test-process", 5, config, debugLogger, debugLogger)
+	process := NewProcess("test-process", 5, config, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	req := httptest.NewRequest("GET", "/test", nil)
@@ -69,7 +70,7 @@ func TestProcess_WaitOnMultipleStarts(t *testing.T) {
 	expectedMessage := "testing91931"
 	config := getTestSimpleResponderConfig(expectedMessage)
 
-	process := NewProcess("test-process", 5, config, debugLogger, debugLogger)
+	process := NewProcess("test-process", 5, config, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	var wg sync.WaitGroup
@@ -97,7 +98,7 @@ func TestProcess_BrokenModelConfig(t *testing.T) {
 		CheckEndpoint: "/health",
 	}
 
-	process := NewProcess("broken", 1, config, debugLogger, debugLogger)
+	process := NewProcess("broken", 1, config, debugLogger, debugLogger, context.Background())
 
 	req := httptest.NewRequest("GET", "/", nil)
 	w := httptest.NewRecorder()
@@ -122,7 +123,7 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
 	config.UnloadAfter = 3 // seconds
 	assert.Equal(t, 3, config.UnloadAfter)
 
-	process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger)
+	process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	// this should take 4 seconds
@@ -164,7 +165,7 @@ func TestProcess_LowTTLValue(t *testing.T) {
 	config.UnloadAfter = 1 // second
 	assert.Equal(t, 1, config.UnloadAfter)
 
-	process := NewProcess("ttl", 2, config, debugLogger, debugLogger)
+	process := NewProcess("ttl", 2, config, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	for i := 0; i < 100; i++ {
@@ -191,7 +192,7 @@ func TestProcess_HTTPRequestsHaveTimeToFinish(t *testing.T) {
 
 	expectedMessage := "12345"
 	config := getTestSimpleResponderConfig(expectedMessage)
-	process := NewProcess("t", 10, config, debugLogger, debugLogger)
+	process := NewProcess("t", 10, config, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	results := map[string]string{
@@ -264,7 +265,7 @@ func TestProcess_SwapState(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			p := NewProcess("test", 10, getTestSimpleResponderConfig("test"), debugLogger, debugLogger)
+			p := NewProcess("test", 10, getTestSimpleResponderConfig("test"), debugLogger, debugLogger, context.Background())
 			p.state = test.currentState
 
 			resultState, err := p.swapState(test.expectedState, test.newState)
@@ -297,7 +298,7 @@ func TestProcess_ShutdownInterruptsHealthCheck(t *testing.T) {
 	config.Proxy = "http://localhost:9998/test"
 
 	healthCheckTTLSeconds := 30
-	process := NewProcess("test-process", healthCheckTTLSeconds, config, debugLogger, debugLogger)
+	process := NewProcess("test-process", healthCheckTTLSeconds, config, debugLogger, debugLogger, context.Background())
 
 	// make it a lot faster
 	process.healthCheckLoopInterval = time.Second
@@ -332,7 +333,7 @@ func TestProcess_ExitInterruptsHealthCheck(t *testing.T) {
 		CheckEndpoint: "/health",
 	}
 
-	process := NewProcess("sleepy", checkHealthTimeout, config, debugLogger, debugLogger)
+	process := NewProcess("sleepy", checkHealthTimeout, config, debugLogger, debugLogger, context.Background())
 	process.healthCheckLoopInterval = time.Second // make it faster
 	err := process.start()
 	assert.Equal(t, "upstream command exited prematurely but successfully", err.Error())
@@ -350,7 +351,7 @@ func TestProcess_ConcurrencyLimit(t *testing.T) {
 	// only allow 1 concurrent request at a time
 	config.ConcurrencyLimit = 1
 
-	process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger)
+	process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger, context.Background())
 	assert.Equal(t, 1, cap(process.concurrencyLimitSemaphore))
 	defer process.Stop()
 
@@ -375,7 +376,7 @@ func TestProcess_StopImmediately(t *testing.T) {
 	expectedMessage := "test_stop_immediate"
 	config := getTestSimpleResponderConfig(expectedMessage)
 
-	process := NewProcess("stop_immediate", 2, config, debugLogger, debugLogger)
+	process := NewProcess("stop_immediate", 2, config, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	err := process.start()
@@ -415,7 +416,7 @@ func TestProcess_ForceStopWithKill(t *testing.T) {
 		CheckEndpoint: "/health",
 	}
 
-	process := NewProcess("stop_immediate", 2, conf, debugLogger, debugLogger)
+	process := NewProcess("stop_immediate", 2, conf, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	// reduce to make testing go faster
@@ -465,7 +466,7 @@ func TestProcess_StopCmd(t *testing.T) {
 		conf.CmdStop = "kill -TERM ${PID}"
 	}
 
-	process := NewProcess("testStopCmd", 2, conf, debugLogger, debugLogger)
+	process := NewProcess("testStopCmd", 2, conf, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	err := process.start()
@@ -485,8 +486,8 @@ func TestProcess_EnvironmentSetCorrectly(t *testing.T) {
 	// ensure the additiona variables are appended to the process' environment
 	configWEnv.Env = append(configWEnv.Env, "TEST_ENV1=1", "TEST_ENV2=2")
 
-	process1 := NewProcess("env_test", 2, conf, debugLogger, debugLogger)
-	process2 := NewProcess("env_test", 2, configWEnv, debugLogger, debugLogger)
+	process1 := NewProcess("env_test", 2, conf, debugLogger, debugLogger, context.Background())
+	process2 := NewProcess("env_test", 2, configWEnv, debugLogger, debugLogger, context.Background())
 
 	process1.start()
 	defer process1.Stop()
@@ -521,7 +522,7 @@ func TestProcess_ReverseProxyPanicIsHandled(t *testing.T) {
 	expectedMessage := "panic_test"
 	config := getTestSimpleResponderConfig(expectedMessage)
 
-	process := NewProcess("panic-test", 5, config, debugLogger, debugLogger)
+	process := NewProcess("panic-test", 5, config, debugLogger, debugLogger, context.Background())
 	defer process.Stop()
 
 	// Start the process
diff --git a/proxy/processgroup.go b/proxy/processgroup.go
index b401d8a6..c920f302 100644
--- a/proxy/processgroup.go
+++ b/proxy/processgroup.go
@@ -1,6 +1,7 @@
 package proxy
 
 import (
+	"context"
 	"fmt"
 	"net/http"
 	"slices"
@@ -24,9 +25,11 @@ type ProcessGroup struct {
 	// map of current processes
 	processes       map[string]*Process
 	lastUsedProcess string
+
+	shutdownCtx context.Context
 }
 
-func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup {
+func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor, shutdownCtx context.Context) *ProcessGroup {
 	groupConfig, ok := config.Groups[id]
 	if !ok {
 		panic("Unable to find configuration for group id: " + id)
@@ -41,13 +44,14 @@ func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, u
 		proxyLogger:    proxyLogger,
 		upstreamLogger: upstreamLogger,
 		processes:      make(map[string]*Process),
+		shutdownCtx:    shutdownCtx,
 	}
 
 	// Create a Process for each member in the group
 	for _, modelID := range groupConfig.Members {
 		modelConfig, modelID, _ := pg.config.FindConfig(modelID)
 		processLogger := NewLogMonitorWriter(upstreamLogger)
-		process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger)
+		process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger, shutdownCtx)
 		pg.processes[modelID] = process
 	}
 
diff --git a/proxy/processgroup_test.go b/proxy/processgroup_test.go
index 6b90f443..55e5276a 100644
--- a/proxy/processgroup_test.go
+++ b/proxy/processgroup_test.go
@@ -2,6 +2,7 @@ package proxy
 
 import (
 	"bytes"
+	"context"
 	"net/http"
 	"net/http/httptest"
 	"sync"
@@ -35,12 +36,12 @@ var processGroupTestConfig = config.AddDefaultGroupToConfig(config.Config{
 })
 
 func TestProcessGroup_DefaultHasCorrectModel(t *testing.T) {
-	pg := NewProcessGroup(config.DEFAULT_GROUP_ID, processGroupTestConfig, testLogger, testLogger)
+	pg := NewProcessGroup(config.DEFAULT_GROUP_ID, processGroupTestConfig, testLogger, testLogger, context.Background())
 	assert.True(t, pg.HasMember("model5"))
 }
 
 func TestProcessGroup_HasMember(t *testing.T) {
-	pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger)
+	pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger, context.Background())
 	assert.True(t, pg.HasMember("model1"))
 	assert.True(t, pg.HasMember("model2"))
 	assert.False(t, pg.HasMember("model3"))
@@ -74,7 +75,7 @@ func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) {
 		},
 	})
 
-	pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger)
+	pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger, context.Background())
 	defer pg.StopProcesses(StopWaitForInflightRequest)
 
 	tests := []string{"model1", "model2", "model3", "model4", "model5"}
@@ -96,7 +97,7 @@ func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) {
 }
 
 func TestProcessGroup_ProxyRequestSwapIsFalse(t *testing.T) {
-	pg := NewProcessGroup("G2", processGroupTestConfig, testLogger, testLogger)
+	pg := NewProcessGroup("G2", processGroupTestConfig, testLogger, testLogger, context.Background())
 	defer pg.StopProcesses(StopWaitForInflightRequest)
 
 	tests := []string{"model3", "model4"}
diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go
index 5a016bc5..446362ce 100644
--- a/proxy/proxymanager.go
+++ b/proxy/proxymanager.go
@@ -167,7 +167,7 @@ func New(proxyConfig config.Config) *ProxyManager {
 
 	// create the process groups
 	for groupID := range proxyConfig.Groups {
-		processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger)
+		processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger, shutdownCtx)
 		pm.processGroups[groupID] = processGroup
 	}
 
@@ -475,6 +475,16 @@ func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
 			continue
 		}
 
+		// Filter models with unhealthy RPC endpoints
+		if processGroup := pm.findGroupByModelName(id); processGroup != nil {
+			if process, ok := processGroup.GetMember(id); ok {
+				if !process.IsRPCHealthy() {
+					pm.proxyLogger.Debugf("<%s> filtered from /v1/models (unhealthy RPC)", id)
+					continue
+				}
+			}
+		}
+
 		data = append(data, newRecord(id, modelConfig))
 
 		// Include aliases
@@ -627,6 +637,15 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) {
 			return
 		}
 
+		// Check RPC health before processing request
+		if process, ok := processGroup.GetMember(modelID); ok {
+			if !process.IsRPCHealthy() {
+				pm.sendErrorResponse(c, http.StatusServiceUnavailable,
+					fmt.Sprintf("model %s unavailable (RPC endpoints unhealthy)", modelID))
+				return
+			}
+		}
+
 		// issue #69 allow custom model names to be sent to upstream
 		useModelName := pm.config.Models[modelID].UseModelName
 		if useModelName != "" {

From 29ef36405921a5239459f7968b9f22f006a2e4d0 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 30 Jan 2026 16:27:20 +0100
Subject: [PATCH 03/13] proxy/config: fix RPC endpoint parsing on Windows

Fix parseEndpointList to handle single and double quotes that are
treated as literal characters on Windows.

- Strip surrounding quotes before parsing comma-separated endpoints
- Fixes test failures on Windows CI

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 proxy/config/config.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/proxy/config/config.go b/proxy/config/config.go
index 4b7dbb2d..ac8a9b54 100644
--- a/proxy/config/config.go
+++ b/proxy/config/config.go
@@ -566,6 +566,15 @@ func ParseRPCEndpoints(cmdStr string) ([]string, error) {
 }
 
 func parseEndpointList(s string) []string {
+	// Strip surrounding quotes (both single and double) that may be present
+	// on Windows where single quotes are not handled by the shell parser
+	s = strings.TrimSpace(s)
+	if len(s) >= 2 {
+		if (s[0] == '\'' && s[len(s)-1] == '\'') || (s[0] == '"' && s[len(s)-1] == '"') {
+			s = s[1 : len(s)-1]
+		}
+	}
+
 	parts := strings.Split(s, ",")
 	var result []string
 	for _, p := range parts {

From ac074d15ea3962f0019b1647d60f0ccf3e05743a Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 30 Jan 2026 18:12:38 +0100
Subject: [PATCH 04/13] fix unit test

---
 proxy/config/config.go | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/proxy/config/config.go b/proxy/config/config.go
index ac8a9b54..1866d03f 100644
--- a/proxy/config/config.go
+++ b/proxy/config/config.go
@@ -566,9 +566,10 @@ func ParseRPCEndpoints(cmdStr string) ([]string, error) {
 }
 
 func parseEndpointList(s string) []string {
-	// Strip surrounding quotes (both single and double) that may be present
-	// on Windows where single quotes are not handled by the shell parser
 	s = strings.TrimSpace(s)
+
+	// Strip surrounding quotes (both single and double) from the whole string
+	// if they match. This handles cases like: "host:port,host2:port2"
 	if len(s) >= 2 {
 		if (s[0] == '\'' && s[len(s)-1] == '\'') || (s[0] == '"' && s[len(s)-1] == '"') {
 			s = s[1 : len(s)-1]
@@ -578,7 +579,12 @@ func parseEndpointList(s string) []string {
 	parts := strings.Split(s, ",")
 	var result []string
 	for _, p := range parts {
-		if p = strings.TrimSpace(p); p != "" {
+		p = strings.TrimSpace(p)
+		// Strip any remaining leading/trailing quotes from individual parts
+		// This handles Windows where shlex doesn't handle single quotes and
+		// may split 'host:port, host2:port' into "'host:port," and "host2:port'"
+		p = strings.Trim(p, "'\"")
+		if p != "" {
 			result = append(result, p)
 		}
 	}

From c8f27617df918bbb7eb53e35ed12a2534d36c4eb Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 30 Jan 2026 23:03:58 +0100
Subject: [PATCH 05/13] rework web interface

---
 llama-swap.go                          |  16 +-
 proxy/proxymanager.go                  |   9 +
 proxy/proxymanager_api.go              |  64 ++++++
 test-config.yaml                       | 264 ++++++++++++++++++++++
 ui-svelte/package-lock.json            | 200 ++++++++++++++++-
 ui-svelte/package.json                 |   6 +
 ui-svelte/src/App.svelte               |   2 +
 ui-svelte/src/components/Header.svelte |   8 +
 ui-svelte/src/routes/Config.svelte     | 300 +++++++++++++++++++++++++
 9 files changed, 857 insertions(+), 12 deletions(-)
 create mode 100644 test-config.yaml
 create mode 100644 ui-svelte/src/routes/Config.svelte

diff --git a/llama-swap.go b/llama-swap.go
index 9706e07d..60ccbc73 100644
--- a/llama-swap.go
+++ b/llama-swap.go
@@ -97,6 +97,7 @@ func main() {
 			currentPM.Shutdown()
 			newPM := proxy.New(conf)
 			newPM.SetVersion(date, commit, version)
+			newPM.SetConfigPath(*configPath)
 			srv.Handler = newPM
 			fmt.Println("Configuration Reloaded")
 
@@ -114,6 +115,7 @@ func main() {
 			}
 			newPM := proxy.New(conf)
 			newPM.SetVersion(date, commit, version)
+			newPM.SetConfigPath(*configPath)
 			srv.Handler = newPM
 		}
 	}
@@ -121,13 +123,15 @@ func main() {
 	// load the initial proxy manager
 	reloadProxyManager()
 	debouncedReload := debounce(time.Second, reloadProxyManager)
-	if *watchConfig {
-		defer event.On(func(e proxy.ConfigFileChangedEvent) {
-			if e.ReloadingState == proxy.ReloadingStateStart {
-				debouncedReload()
-			}
-		})()
 
+	// Always listen for API-triggered config changes
+	defer event.On(func(e proxy.ConfigFileChangedEvent) {
+		if e.ReloadingState == proxy.ReloadingStateStart {
+			debouncedReload()
+		}
+	})()
+
+	if *watchConfig {
 		fmt.Println("Watching Configuration for changes")
 		go func() {
 			absConfigPath, err := filepath.Abs(*configPath)
diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go
index 5a016bc5..bf227120 100644
--- a/proxy/proxymanager.go
+++ b/proxy/proxymanager.go
@@ -52,6 +52,9 @@ type ProxyManager struct {
 	commit    string
 	version   string
 
+	// config file path for editing
+	configPath string
+
 	// peer proxy see: #296, #433
 	peerProxy *PeerProxy
 }
@@ -966,3 +969,9 @@ func (pm *ProxyManager) SetVersion(buildDate string, commit string, version stri
 	pm.commit = commit
 	pm.version = version
 }
+
+func (pm *ProxyManager) SetConfigPath(configPath string) {
+	pm.Lock()
+	defer pm.Unlock()
+	pm.configPath = configPath
+}
diff --git a/proxy/proxymanager_api.go b/proxy/proxymanager_api.go
index fe4326d0..f590cd6e 100644
--- a/proxy/proxymanager_api.go
+++ b/proxy/proxymanager_api.go
@@ -4,7 +4,9 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"io"
 	"net/http"
+	"os"
 	"sort"
 	"strings"
 
@@ -31,6 +33,9 @@ func addApiHandlers(pm *ProxyManager) {
 		apiGroup.GET("/events", pm.apiSendEvents)
 		apiGroup.GET("/metrics", pm.apiGetMetrics)
 		apiGroup.GET("/version", pm.apiGetVersion)
+		apiGroup.GET("/config/current", pm.apiGetCurrentConfig)
+		apiGroup.GET("/config/example", pm.apiGetExampleConfig)
+		apiGroup.POST("/config", pm.apiUpdateConfig)
 	}
 }
 
@@ -250,3 +255,62 @@ func (pm *ProxyManager) apiGetVersion(c *gin.Context) {
 		"build_date": pm.buildDate,
 	})
 }
+
+func (pm *ProxyManager) apiGetCurrentConfig(c *gin.Context) {
+	pm.Lock()
+	configPath := pm.configPath
+	pm.Unlock()
+
+	if configPath == "" {
+		pm.sendErrorResponse(c, http.StatusNotFound, "Config file path not set")
+		return
+	}
+
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to read config file: %v", err))
+		return
+	}
+
+	c.Data(http.StatusOK, "text/yaml; charset=utf-8", data)
+}
+
+func (pm *ProxyManager) apiGetExampleConfig(c *gin.Context) {
+	data, err := os.ReadFile("config.example.yaml")
+	if err != nil {
+		pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to read example config: %v", err))
+		return
+	}
+
+	c.Data(http.StatusOK, "text/yaml; charset=utf-8", data)
+}
+
+func (pm *ProxyManager) apiUpdateConfig(c *gin.Context) {
+	pm.Lock()
+	configPath := pm.configPath
+	pm.Unlock()
+
+	if configPath == "" {
+		pm.sendErrorResponse(c, http.StatusBadRequest, "Config file path not set")
+		return
+	}
+
+	body, err := io.ReadAll(c.Request.Body)
+	if err != nil {
+		pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("Failed to read request body: %v", err))
+		return
+	}
+
+	// Write to config file
+	if err := os.WriteFile(configPath, body, 0644); err != nil {
+		pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to write config file: %v", err))
+		return
+	}
+
+	// Trigger config reload event
+	event.Emit(ConfigFileChangedEvent{
+		ReloadingState: ReloadingStateStart,
+	})
+
+	c.JSON(http.StatusOK, gin.H{"message": "Config updated successfully. Reloading..."})
+}
diff --git a/test-config.yaml b/test-config.yaml
new file mode 100644
index 00000000..15fd5784
--- /dev/null
+++ b/test-config.yaml
@@ -0,0 +1,264 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json
+#
+# llama-swap configuration for 16GB VRAM AMD Radeon RX 6800 XT (gfx1030)
+# Optimized for headless system with no display overhead
+# -------------------------------------
+
+healthCheckTimeout: 300
+logLevel: info
+logTimeFormat: "rfc3339"
+logToStdout: "proxy"
+metricsMaxInMemory: 1000
+startPort: 10001
+sendLoadingState: false
+includeAliasesInList: false
+
+macros:
+  "latest-llama": >
+    /home/svc-gpgpu/.local/bin/llama-server
+    --port ${PORT} --host 0.0.0.0 -b 512 -ub 32 -np 1
+  "default_ctx": 4096
+  "rocm_device": "0"
+
+models:
+  # ========================================
+  # GENERAL PURPOSE MODELS
+  # ========================================
+
+  "qwen3:14b-q5_k_m-32768":
+    cmd: |
+      ${latest-llama}
+      -hf Qwen/Qwen3-14B-GGUF:q5_k_m
+      --ctx-size 32768
+      -fa auto
+      -ctv q8_0
+      -ctk q8_0
+      -ngl 99
+      --jinja
+      --mmap
+      -b 512
+    name: "qwen3:14b-q5_k_m-32768"
+    description: "VRAM: 12505 MiB"
+    ttl: 600
+
+  "qwen3:8b-q5_k_m-40960":
+    cmd: |
+      ${latest-llama}
+      -hf Qwen/Qwen3-8B-GGUF:q5_k_m
+      --ctx-size 40960
+      -fa auto
+      -ctv q8_0
+      -ctk q8_0
+      -ngl 99
+      --jinja
+      --mmap
+      -b 512
+    name: "qwen3:8b-q5_k_m-40960"
+    description: "VRAM: 8491 MiB"
+    ttl: 600
+
+  "qwen3:8b-q8_0-32768":
+    cmd: |
+      ${latest-llama}
+      -hf Qwen/Qwen3-8B-GGUF:q8_0
+      --ctx-size 32768
+      -fa auto
+      -ctv q8_0
+      -ctk q8_0
+      -ngl 99
+      --jinja
+      --mmap
+      -b 512
+    name: "qwen3:8b-q8_0-32768"
+    description: "VRAM: 10381 MiB"
+    ttl: 600
+
+  "ministral-3:14b-instruct-q5_k_m-20480-vision":
+    cmd: |
+      ${latest-llama}
+      -hf mistralai/Ministral-3-14B-Instruct-2512-GGUF:q5_k_m
+      --ctx-size 20480
+      -fa off
+      -ngl 99
+      --mmap
+      --jinja
+      --mmproj-auto
+    name: "ministral-3:14b-instruct-q5_k_m-20480-vision"
+    description: "VRAM: 13184 MiB"
+    ttl: 600
+
+  "ministral-3:14b-reasoning-q5_k_m-20480-vision":
+    cmd: |
+      ${latest-llama}
+      -hf mistralai/Ministral-3-14B-Reasoning-2512-GGUF:q5_k_m
+      --ctx-size 20480
+      -fa off
+      -ngl 99
+      --mmap
+      --jinja
+      --mmproj-auto
+    name: "ministral-3:14b-reasoning-q5_k_m-20480-vision"
+    description: "VRAM: 13184 MiB"
+    ttl: 600
+
+  "ministral-3:14b-instruct-q5_k_m-32768":
+    cmd: |
+      ${latest-llama}
+      -hf mistralai/Ministral-3-14B-Instruct-2512-GGUF:q5_k_m
+      --ctx-size 32768
+      -fa off
+      -ngl 99
+      --mmap
+      --jinja
+      --no-mmproj
+    name: "ministral-3:14b-instruct-q5_k_m-32768"
+    description: "VRAM: 14224 MiB"
+    ttl: 600
+
+  "ministral-3:14b-reasoning-q5_k_m-32768":
+    cmd: |
+      ${latest-llama}
+      -hf mistralai/Ministral-3-14B-Reasoning-2512-GGUF:q5_k_m
+      --ctx-size 32768
+      -fa off
+      -ngl 99
+      --mmap
+      --jinja
+      --no-mmproj
+    name: "ministral-3:14b-reasoning-q5_k_m-32768"
+    description: "VRAM: 14224 MiB"
+    ttl: 600
+
+  # ========================================
+  # UTILITY MODELS (General Purpose)
+  # ========================================
+
+  "embeddinggemma:300m":
+    cmd: |
+      ${latest-llama}
+      -hf gaianet/embeddinggemma-300m-GGUF
+      --ctx-size 2048
+      -fa off
+      -ngl 99
+      --embeddings
+      --pooling mean
+      -b 1024
+      -ub 1024
+    name: "embeddinggemma:300m"
+    description: "VRAM: 512 MiB"
+    ttl: 3600
+
+  "bge-reranker-v2-m3":
+    cmd: |
+      ${latest-llama}
+      -hf Felladrin/bge-reranker-v2-m3-Q8_0-GGUF
+      --ctx-size 8192
+      -ngl 99
+      --mmap
+      --rerank
+      --embedding
+      --pooling rank
+      -b 8192
+      -ub 8192
+    name: "bge-reranker-v2-m3"
+    description: "VRAM: 1077 MiB"
+    ttl: 3600
+
+  # ========================================
+  # CODING MODELS
+  # ========================================
+
+  "qwen2.5-coder:14b-q5_k_m-32768":
+    cmd: |
+      ${latest-llama}
+      -hf Qwen/Qwen2.5-Coder-14B-Instruct-GGUF:q5_k_m
+      --ctx-size 32768
+      -fa auto
+      -ctv q8_0
+      -ctk q8_0
+      -ngl 99
+      --jinja
+      --mmap
+      -b 512
+    name: "qwen2.5-coder:14b-q5_k_m-32768"
+    description: "VRAM: ~12500 MiB"
+    ttl: 600
+
+  "qwen2.5-coder:1.5b-q4_k_m-autocomplete":
+    cmd: |
+      ${latest-llama}
+      -hf Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF:q4_k_m
+      --ctx-size 2048
+      -fa off
+      -ngl 99
+      -b 128
+      -ub 32
+      --mmap
+      --no-warmup
+    name: "qwen2.5-coder:1.5b-q4_k_m-autocomplete"
+    description: "VRAM: ~1000 MiB"
+    ttl: 3600
+
+  # ========================================
+  # PERSISTENT CPU MODEL
+  # ========================================
+
+  "qwen3:1.7b-cpu-json":
+    cmd: |
+      ${latest-llama}
+      -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_M
+      --ctx-size 8192
+      -fa off
+      -ngl 0
+      -b 512
+      --jinja
+      --mmap
+    name: "qwen3:1.7b-cpu-json"
+    description: "CPU-only - permanent RAM resident for tags/titles/queries"
+    ttl: 0
+
+# ========================================
+# GROUPS CONFIGURATION
+# ========================================
+
+groups:
+  # General purpose models can coexist with utility models
+  # When loaded, they prevent coding group from running
+  "general-purpose":
+    swap: false       # All models in group can run simultaneously
+    exclusive: true   # Unloads other exclusive groups when active
+    members:
+      - "qwen3:14b-q5_k_m-32768"
+      - "qwen3:8b-q5_k_m-40960"
+      - "qwen3:8b-q8_0-32768"
+      - "ministral-3:14b-instruct-q5_k_m-20480-vision"
+      - "ministral-3:14b-reasoning-q5_k_m-20480-vision"
+      - "ministral-3:14b-instruct-q5_k_m-32768"
+      - "ministral-3:14b-reasoning-q5_k_m-32768"
+      - "bge-reranker-v2-m3"
+
+  # Coding models can coexist with each other
+  # When loaded, they prevent general-purpose group from running
+  "coding":
+    swap: false       # Both coder models can run simultaneously
+    exclusive: true   # Unloads other exclusive groups when active
+    members:
+      - "qwen2.5-coder:14b-q5_k_m-32768"
+      - "qwen2.5-coder:1.5b-q4_k_m-autocomplete"
+
+  # CPU-based persistent model - never unloaded, doesn't interfere
+  "persistent-cpu":
+    swap: false       # No swapping (only one model anyway)
+    exclusive: false  # Doesn't unload other groups
+    persistent: true  # Other groups cannot unload this
+    members:
+      - "qwen3:1.7b-cpu-json"
+
+# ========================================
+# STARTUP HOOKS
+# ========================================
+
+hooks:
+  on_startup:
+    preload:
+      - "qwen3:1.7b-cpu-json"
diff --git a/ui-svelte/package-lock.json b/ui-svelte/package-lock.json
index 93150075..57723b8e 100644
--- a/ui-svelte/package-lock.json
+++ b/ui-svelte/package-lock.json
@@ -8,6 +8,12 @@
       "name": "ui-svelte",
       "version": "0.0.0",
       "dependencies": {
+        "@codemirror/lang-yaml": "^6.1.2",
+        "@codemirror/language": "^6.12.1",
+        "@codemirror/state": "^6.5.4",
+        "@codemirror/view": "^6.39.12",
+        "codemirror": "^6.0.2",
+        "js-yaml": "^4.1.1",
         "svelte-spa-router": "^4.0.1"
       },
       "devDependencies": {
@@ -21,6 +27,102 @@
         "vite": "^6.3.5"
       }
     },
+    "node_modules/@codemirror/autocomplete": {
+      "version": "6.20.0",
+      "resolved": "https://registry.npmjs.org/@codemirror/autocomplete/-/autocomplete-6.20.0.tgz",
+      "integrity": "sha512-bOwvTOIJcG5FVo5gUUupiwYh8MioPLQ4UcqbcRf7UQ98X90tCa9E1kZ3Z7tqwpZxYyOvh1YTYbmZE9RTfTp5hg==",
+      "license": "MIT",
+      "dependencies": {
+        "@codemirror/language": "^6.0.0",
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.17.0",
+        "@lezer/common": "^1.0.0"
+      }
+    },
+    "node_modules/@codemirror/commands": {
+      "version": "6.10.1",
+      "resolved": "https://registry.npmjs.org/@codemirror/commands/-/commands-6.10.1.tgz",
+      "integrity": "sha512-uWDWFypNdQmz2y1LaNJzK7fL7TYKLeUAU0npEC685OKTF3KcQ2Vu3klIM78D7I6wGhktme0lh3CuQLv0ZCrD9Q==",
+      "license": "MIT",
+      "dependencies": {
+        "@codemirror/language": "^6.0.0",
+        "@codemirror/state": "^6.4.0",
+        "@codemirror/view": "^6.27.0",
+        "@lezer/common": "^1.1.0"
+      }
+    },
+    "node_modules/@codemirror/lang-yaml": {
+      "version": "6.1.2",
+      "resolved": "https://registry.npmjs.org/@codemirror/lang-yaml/-/lang-yaml-6.1.2.tgz",
+      "integrity": "sha512-dxrfG8w5Ce/QbT7YID7mWZFKhdhsaTNOYjOkSIMt1qmC4VQnXSDSYVHHHn8k6kJUfIhtLo8t1JJgltlxWdsITw==",
+      "license": "MIT",
+      "dependencies": {
+        "@codemirror/autocomplete": "^6.0.0",
+        "@codemirror/language": "^6.0.0",
+        "@codemirror/state": "^6.0.0",
+        "@lezer/common": "^1.2.0",
+        "@lezer/highlight": "^1.2.0",
+        "@lezer/lr": "^1.0.0",
+        "@lezer/yaml": "^1.0.0"
+      }
+    },
+    "node_modules/@codemirror/language": {
+      "version": "6.12.1",
+      "resolved": "https://registry.npmjs.org/@codemirror/language/-/language-6.12.1.tgz",
+      "integrity": "sha512-Fa6xkSiuGKc8XC8Cn96T+TQHYj4ZZ7RdFmXA3i9xe/3hLHfwPZdM+dqfX0Cp0zQklBKhVD8Yzc8LS45rkqcwpQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.23.0",
+        "@lezer/common": "^1.5.0",
+        "@lezer/highlight": "^1.0.0",
+        "@lezer/lr": "^1.0.0",
+        "style-mod": "^4.0.0"
+      }
+    },
+    "node_modules/@codemirror/lint": {
+      "version": "6.9.3",
+      "resolved": "https://registry.npmjs.org/@codemirror/lint/-/lint-6.9.3.tgz",
+      "integrity": "sha512-y3YkYhdnhjDBAe0VIA0c4wVoFOvnp8CnAvfLqi0TqotIv92wIlAAP7HELOpLBsKwjAX6W92rSflA6an/2zBvXw==",
+      "license": "MIT",
+      "dependencies": {
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.35.0",
+        "crelt": "^1.0.5"
+      }
+    },
+    "node_modules/@codemirror/search": {
+      "version": "6.6.0",
+      "resolved": "https://registry.npmjs.org/@codemirror/search/-/search-6.6.0.tgz",
+      "integrity": "sha512-koFuNXcDvyyotWcgOnZGmY7LZqEOXZaaxD/j6n18TCLx2/9HieZJ5H6hs1g8FiRxBD0DNfs0nXn17g872RmYdw==",
+      "license": "MIT",
+      "dependencies": {
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.37.0",
+        "crelt": "^1.0.5"
+      }
+    },
+    "node_modules/@codemirror/state": {
+      "version": "6.5.4",
+      "resolved": "https://registry.npmjs.org/@codemirror/state/-/state-6.5.4.tgz",
+      "integrity": "sha512-8y7xqG/hpB53l25CIoit9/ngxdfoG+fx+V3SHBrinnhOtLvKHRyAJJuHzkWrR4YXXLX8eXBsejgAAxHUOdW1yw==",
+      "license": "MIT",
+      "dependencies": {
+        "@marijn/find-cluster-break": "^1.0.0"
+      }
+    },
+    "node_modules/@codemirror/view": {
+      "version": "6.39.12",
+      "resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.39.12.tgz",
+      "integrity": "sha512-f+/VsHVn/kOA9lltk/GFzuYwVVAKmOnNjxbrhkk3tPHntFqjWeI2TbIXx006YkBkqC10wZ4NsnWXCQiFPeAISQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@codemirror/state": "^6.5.0",
+        "crelt": "^1.0.6",
+        "style-mod": "^4.1.0",
+        "w3c-keyname": "^2.2.4"
+      }
+    },
     "node_modules/@esbuild/aix-ppc64": {
       "version": "0.25.12",
       "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz",
@@ -513,6 +615,47 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
+    "node_modules/@lezer/common": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.5.0.tgz",
+      "integrity": "sha512-PNGcolp9hr4PJdXR4ix7XtixDrClScvtSCYW3rQG106oVMOOI+jFb+0+J3mbeL/53g1Zd6s0kJzaw6Ri68GmAA==",
+      "license": "MIT"
+    },
+    "node_modules/@lezer/highlight": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.3.tgz",
+      "integrity": "sha512-qXdH7UqTvGfdVBINrgKhDsVTJTxactNNxLk7+UMwZhU13lMHaOBlJe9Vqp907ya56Y3+ed2tlqzys7jDkTmW0g==",
+      "license": "MIT",
+      "dependencies": {
+        "@lezer/common": "^1.3.0"
+      }
+    },
+    "node_modules/@lezer/lr": {
+      "version": "1.4.8",
+      "resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.8.tgz",
+      "integrity": "sha512-bPWa0Pgx69ylNlMlPvBPryqeLYQjyJjqPx+Aupm5zydLIF3NE+6MMLT8Yi23Bd9cif9VS00aUebn+6fDIGBcDA==",
+      "license": "MIT",
+      "dependencies": {
+        "@lezer/common": "^1.0.0"
+      }
+    },
+    "node_modules/@lezer/yaml": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@lezer/yaml/-/yaml-1.0.4.tgz",
+      "integrity": "sha512-2lrrHqxalACEbxIbsjhqGpSW8kWpUKuY6RHgnSAFZa6qK62wvnPxA8hGOwOoDbwHcOFs5M4o27mjGu+P7TvBmw==",
+      "license": "MIT",
+      "dependencies": {
+        "@lezer/common": "^1.2.0",
+        "@lezer/highlight": "^1.0.0",
+        "@lezer/lr": "^1.4.0"
+      }
+    },
+    "node_modules/@marijn/find-cluster-break": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@marijn/find-cluster-break/-/find-cluster-break-1.0.2.tgz",
+      "integrity": "sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==",
+      "license": "MIT"
+    },
     "node_modules/@rollup/rollup-android-arm-eabi": {
       "version": "4.57.0",
       "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.57.0.tgz",
@@ -879,7 +1022,6 @@
       "integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
         "debug": "^4.4.1",
@@ -1206,7 +1348,6 @@
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -1214,6 +1355,12 @@
         "node": ">=0.4.0"
       }
     },
+    "node_modules/argparse": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
+      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
+      "license": "Python-2.0"
+    },
     "node_modules/aria-query": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz",
@@ -1260,6 +1407,27 @@
         "node": ">=6"
       }
     },
+    "node_modules/codemirror": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/codemirror/-/codemirror-6.0.2.tgz",
+      "integrity": "sha512-VhydHotNW5w1UGK0Qj96BwSk/Zqbp9WbnyK2W/eVMv4QyF41INRGpjUhFJY7/uDNuudSc33a/PKr4iDqRduvHw==",
+      "license": "MIT",
+      "dependencies": {
+        "@codemirror/autocomplete": "^6.0.0",
+        "@codemirror/commands": "^6.0.0",
+        "@codemirror/language": "^6.0.0",
+        "@codemirror/lint": "^6.0.0",
+        "@codemirror/search": "^6.0.0",
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.0.0"
+      }
+    },
+    "node_modules/crelt": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/crelt/-/crelt-1.0.6.tgz",
+      "integrity": "sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==",
+      "license": "MIT"
+    },
     "node_modules/debug": {
       "version": "4.4.3",
       "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -1438,6 +1606,18 @@
         "jiti": "lib/jiti-cli.mjs"
       }
     },
+    "node_modules/js-yaml": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
+      "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
+      "license": "MIT",
+      "dependencies": {
+        "argparse": "^2.0.1"
+      },
+      "bin": {
+        "js-yaml": "bin/js-yaml.js"
+      }
+    },
     "node_modules/kleur": {
       "version": "4.1.5",
       "resolved": "https://registry.npmjs.org/kleur/-/kleur-4.1.5.tgz",
@@ -1775,7 +1955,6 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -1903,13 +2082,18 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/style-mod": {
+      "version": "4.1.3",
+      "resolved": "https://registry.npmjs.org/style-mod/-/style-mod-4.1.3.tgz",
+      "integrity": "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ==",
+      "license": "MIT"
+    },
     "node_modules/svelte": {
       "version": "5.48.5",
       "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.48.5.tgz",
       "integrity": "sha512-NB3o70OxfmnE5UPyLr8uH3IV02Q43qJVAuWigYmsSOYsS0s/rHxP0TF81blG0onF/xkhNvZw4G8NfzIX+By5ZQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@jridgewell/remapping": "^2.3.4",
         "@jridgewell/sourcemap-codec": "^1.5.0",
@@ -2011,7 +2195,6 @@
       "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
       "dev": true,
       "license": "Apache-2.0",
-      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -2026,7 +2209,6 @@
       "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "esbuild": "^0.25.0",
         "fdir": "^6.4.4",
@@ -2116,6 +2298,12 @@
         }
       }
     },
+    "node_modules/w3c-keyname": {
+      "version": "2.2.8",
+      "resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz",
+      "integrity": "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==",
+      "license": "MIT"
+    },
     "node_modules/zimmerframe": {
       "version": "1.1.4",
       "resolved": "https://registry.npmjs.org/zimmerframe/-/zimmerframe-1.1.4.tgz",
diff --git a/ui-svelte/package.json b/ui-svelte/package.json
index 53296797..fbe35b9d 100644
--- a/ui-svelte/package.json
+++ b/ui-svelte/package.json
@@ -20,6 +20,12 @@
     "vite": "^6.3.5"
   },
   "dependencies": {
+    "@codemirror/lang-yaml": "^6.1.2",
+    "@codemirror/language": "^6.12.1",
+    "@codemirror/state": "^6.5.4",
+    "@codemirror/view": "^6.39.12",
+    "codemirror": "^6.0.2",
+    "js-yaml": "^4.1.1",
     "svelte-spa-router": "^4.0.1"
   }
 }
diff --git a/ui-svelte/src/App.svelte b/ui-svelte/src/App.svelte
index 69216703..2e4ab9d1 100644
--- a/ui-svelte/src/App.svelte
+++ b/ui-svelte/src/App.svelte
@@ -5,6 +5,7 @@
   import LogViewer from "./routes/LogViewer.svelte";
   import Models from "./routes/Models.svelte";
   import Activity from "./routes/Activity.svelte";
+  import Config from "./routes/Config.svelte";
   import { enableAPIEvents } from "./stores/api";
   import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme";
 
@@ -12,6 +13,7 @@
     "/": Models,
     "/logs": LogViewer,
     "/activity": Activity,
+    "/config": Config,
     "*": Models,
   };
 
diff --git a/ui-svelte/src/components/Header.svelte b/ui-svelte/src/components/Header.svelte
index 73c66874..4c7553ee 100644
--- a/ui-svelte/src/components/Header.svelte
+++ b/ui-svelte/src/components/Header.svelte
@@ -68,6 +68,14 @@
     >
       Logs
     </a>
+    <a
+      href="/config"
+      use:link
+      class="text-gray-600 hover:text-black dark:text-gray-300 dark:hover:text-gray-100 p-1"
+      class:font-semibold={isActive("/config", $location)}
+    >
+      Config
+    </a>
     <button onclick={toggleTheme} title="Toggle theme">
       {#if $isDarkMode}
         <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="w-5 h-5">
diff --git a/ui-svelte/src/routes/Config.svelte b/ui-svelte/src/routes/Config.svelte
new file mode 100644
index 00000000..cc830401
--- /dev/null
+++ b/ui-svelte/src/routes/Config.svelte
@@ -0,0 +1,300 @@
+<script lang="ts">
+  import { onMount } from "svelte";
+  import { isNarrow, isDarkMode } from "../stores/theme";
+  import { EditorView, basicSetup } from "codemirror";
+  import { yaml } from "@codemirror/lang-yaml";
+  import { EditorState, Compartment } from "@codemirror/state";
+  import * as jsyaml from "js-yaml";
+
+  let currentConfig = $state("");
+  let exampleConfig = $state("");
+  let loading = $state(true);
+  let error = $state("");
+  let validationError = $state("");
+  let saving = $state(false);
+  let direction = $derived<"horizontal" | "vertical">($isNarrow ? "vertical" : "horizontal");
+  
+  let editorContainer: HTMLDivElement;
+  let exampleContainer: HTMLDivElement;
+  let editorView: EditorView | null = null;
+  let exampleView: EditorView | null = null;
+  let themeCompartment = new Compartment();
+
+  function validateYAML(text: string): string | null {
+    try {
+      jsyaml.load(text);
+      return null;
+    } catch (e) {
+      return e instanceof Error ? e.message : "Invalid YAML";
+    }
+  }
+
+  function getTheme(dark: boolean, readOnly: boolean) {
+    return EditorView.theme({
+      "&": { 
+        height: "100%",
+        backgroundColor: dark ? (readOnly ? "#1a1a1a" : "#1f1f1f") : (readOnly ? "#f9fafb" : "#ffffff"),
+      },
+      ".cm-scroller": { 
+        overflow: "auto",
+      },
+      ".cm-content": { 
+        fontFamily: "monospace",
+        color: dark ? "#e0e0e0" : "#1f2937",
+      },
+      ".cm-gutters": {
+        backgroundColor: dark ? "#2a2a2a" : "#f3f4f6",
+        color: dark ? "#6b7280" : "#9ca3af",
+        border: "none",
+      },
+      ".cm-activeLineGutter": {
+        backgroundColor: dark ? "#374151" : "#e5e7eb",
+      },
+      ".cm-activeLine": {
+        backgroundColor: dark ? "#374151" : "#f3f4f6",
+      },
+      ".cm-selectionBackground, ::selection": {
+        backgroundColor: dark ? "#3b82f6" : "#bfdbfe",
+      },
+      ".cm-cursor": {
+        borderLeftColor: dark ? "#60a5fa" : "#2563eb",
+      },
+      // YAML syntax colors
+      ".cm-atom": { color: dark ? "#fbbf24" : "#d97706" }, // true/false/null
+      ".cm-number": { color: dark ? "#a78bfa" : "#7c3aed" }, // numbers
+      ".cm-string": { color: dark ? "#34d399" : "#059669" }, // strings
+      ".cm-property": { color: dark ? "#60a5fa" : "#2563eb" }, // keys
+      ".cm-comment": { color: dark ? "#6b7280" : "#9ca3af" }, // comments
+    }, { dark });
+  }
+
+  function createEditor(parent: HTMLElement, content: string, readOnly: boolean) {
+    const state = EditorState.create({
+      doc: content,
+      extensions: [
+        basicSetup,
+        yaml(),
+        EditorView.lineWrapping,
+        EditorView.editable.of(!readOnly),
+        themeCompartment.of(getTheme($isDarkMode, readOnly)),
+        EditorView.updateListener.of((update) => {
+          if (!readOnly && update.docChanged) {
+            currentConfig = update.state.doc.toString();
+            const err = validateYAML(currentConfig);
+            validationError = err || "";
+          }
+        }),
+      ],
+    });
+
+    return new EditorView({
+      state,
+      parent,
+    });
+  }
+
+  // Update theme when dark mode changes
+  $effect(() => {
+    if (editorView) {
+      editorView.dispatch({
+        effects: themeCompartment.reconfigure(getTheme($isDarkMode, false))
+      });
+    }
+    if (exampleView) {
+      exampleView.dispatch({
+        effects: themeCompartment.reconfigure(getTheme($isDarkMode, true))
+      });
+    }
+  });
+
+  async function loadConfigs() {
+    loading = true;
+    error = "";
+    validationError = "";
+    try {
+      const [currentRes, exampleRes] = await Promise.all([
+        fetch("/api/config/current"),
+        fetch("/api/config/example"),
+      ]);
+
+      if (!currentRes.ok) {
+        throw new Error(`Failed to load current config: ${currentRes.statusText}`);
+      }
+      if (!exampleRes.ok) {
+        throw new Error(`Failed to load example config: ${exampleRes.statusText}`);
+      }
+
+      currentConfig = await currentRes.text();
+      exampleConfig = await exampleRes.text();
+      
+      // Validate on load
+      const err = validateYAML(currentConfig);
+      validationError = err || "";
+    } catch (e) {
+      error = e instanceof Error ? e.message : "Failed to load configs";
+    } finally {
+      loading = false;
+    }
+  }
+
+  async function saveConfig() {
+    // Validate before saving
+    const validationErr = validateYAML(currentConfig);
+    if (validationErr) {
+      alert(`Cannot save: ${validationErr}`);
+      return;
+    }
+
+    saving = true;
+    error = "";
+    try {
+      const res = await fetch("/api/config", {
+        method: "POST",
+        headers: { "Content-Type": "text/yaml" },
+        body: currentConfig,
+      });
+
+      if (!res.ok) {
+        const errData = await res.json();
+        throw new Error(errData.error || "Failed to save config");
+      }
+
+      alert("Config saved successfully! Application is reloading...");
+      // Reload after a delay to see the changes
+      setTimeout(() => window.location.reload(), 2000);
+    } catch (e) {
+      error = e instanceof Error ? e.message : "Failed to save config";
+      alert(`Error: ${error}`);
+    } finally {
+      saving = false;
+    }
+  }
+
+  function exportConfig() {
+    const blob = new Blob([currentConfig], { type: "text/yaml" });
+    const url = URL.createObjectURL(blob);
+    const a = document.createElement("a");
+    a.href = url;
+    a.download = "config.yaml";
+    document.body.appendChild(a);
+    a.click();
+    document.body.removeChild(a);
+    URL.revokeObjectURL(url);
+  }
+
+  function importConfig() {
+    const input = document.createElement("input");
+    input.type = "file";
+    input.accept = ".yaml,.yml";
+    input.onchange = async (e) => {
+      const file = (e.target as HTMLInputElement).files?.[0];
+      if (file) {
+        try {
+          const text = await file.text();
+          const err = validateYAML(text);
+          if (err) {
+            alert(`Invalid YAML file: ${err}`);
+            return;
+          }
+          currentConfig = text;
+          if (editorView) {
+            editorView.dispatch({
+              changes: { from: 0, to: editorView.state.doc.length, insert: text }
+            });
+          }
+        } catch (e) {
+          error = e instanceof Error ? e.message : "Failed to read file";
+          alert(`Error: ${error}`);
+        }
+      }
+    };
+    input.click();
+  }
+
+  onMount(() => {
+    loadConfigs();
+  });
+
+  $effect(() => {
+    if (!loading && editorContainer && !editorView && currentConfig) {
+      editorView = createEditor(editorContainer, currentConfig, false);
+    }
+  });
+
+  $effect(() => {
+    if (!loading && exampleContainer && !exampleView && exampleConfig) {
+      exampleView = createEditor(exampleContainer, exampleConfig, true);
+    }
+  });
+</script>
+
+<div class="flex flex-col h-full">
+  <div class="mb-4 flex items-center justify-between">
+    <h2 class="text-xl font-semibold">Configuration Editor</h2>
+    <div class="flex gap-2">
+      <button
+        onclick={importConfig}
+        class="px-4 py-2 bg-blue-500 hover:bg-blue-600 text-white rounded disabled:opacity-50"
+        disabled={loading || saving}
+      >
+        Import
+      </button>
+      <button
+        onclick={exportConfig}
+        class="px-4 py-2 bg-green-500 hover:bg-green-600 text-white rounded disabled:opacity-50"
+        disabled={loading || saving || !currentConfig}
+      >
+        Export
+      </button>
+      <button
+        onclick={saveConfig}
+        class="px-4 py-2 bg-orange-500 hover:bg-orange-600 text-white rounded disabled:opacity-50"
+        disabled={loading || saving || !currentConfig || !!validationError}
+      >
+        {saving ? "Saving..." : "Save & Reload"}
+      </button>
+    </div>
+  </div>
+
+  {#if validationError}
+    <div class="mb-4 p-3 bg-yellow-100 dark:bg-yellow-900 text-yellow-800 dark:text-yellow-200 rounded">
+      <strong>Validation Error:</strong> {validationError}
+    </div>
+  {/if}
+
+  {#if error}
+    <div class="mb-4 p-3 bg-red-100 dark:bg-red-900 text-red-800 dark:text-red-200 rounded">
+      {error}
+    </div>
+  {/if}
+
+  {#if loading}
+    <div class="flex items-center justify-center h-full">
+      <div class="text-gray-500">Loading configuration...</div>
+    </div>
+  {:else}
+    <div
+      class="flex-1 flex gap-4 min-h-0"
+      class:flex-col={direction === "vertical"}
+      class:flex-row={direction === "horizontal"}
+    >
+      <!-- Left panel: Editable config -->
+      <div class="flex-1 flex flex-col min-h-0 min-w-0">
+        <h3 class="text-lg font-semibold mb-2">Current Config (Editable)</h3>
+        <div 
+          bind:this={editorContainer}
+          class="flex-1 w-full border border-gray-300 dark:border-gray-600 rounded overflow-hidden bg-white dark:bg-gray-800"
+        ></div>
+      </div>
+
+      <!-- Right panel: Example config (read-only) -->
+      <div class="flex-1 flex flex-col min-h-0 min-w-0">
+        <h3 class="text-lg font-semibold mb-2">Example Config (Reference)</h3>
+        <div 
+          bind:this={exampleContainer}
+          class="flex-1 w-full border border-gray-300 dark:border-gray-600 rounded overflow-hidden bg-gray-50 dark:bg-gray-900"
+        ></div>
+      </div>
+    </div>
+  {/if}
+</div>

From 6f023c7993403a5d543769b7113a012c675e7b98 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 30 Jan 2026 22:32:56 +0000
Subject: [PATCH 06/13] fix error assumption healthy

---
 proxy/process.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/proxy/process.go b/proxy/process.go
index dee5f962..253ab189 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -143,7 +143,7 @@ func NewProcess(ID string, healthCheckTimeout int, modelConfig config.ModelConfi
 			proxyLogger.Warnf("<%s> rpcHealthCheck enabled but no --rpc flag found in cmd", ID)
 		} else {
 			p.rpcEndpoints = endpoints
-			p.rpcHealthy.Store(true) // assume healthy initially
+			p.rpcHealthy.Store(false) // start unhealthy until first check passes
 		}
 	}
 

From c17df42f43f4dd5070ef575566634ba90b888abf Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 30 Jan 2026 23:26:32 +0000
Subject: [PATCH 07/13] proxy: make RPC health checks independent of process
 state

RPC health checking now runs continuously from process creation until
proxy shutdown, completely independent of whether the model is loaded,
starting, stopped, or in any other state.

- Start health checker in NewProcess when rpcHealthCheck is enabled
- Remove stopRPCHealthChecker - only stops on proxy shutdown
- Remove state checks from health checker goroutine
- Health status always reflects current RPC endpoint availability

Previously, the health checker only ran while a process was in StateReady,
causing stale health data when processes stopped. Now /v1/models always
shows accurate RPC health regardless of model state.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 proxy/process.go                 | 20 +++-----
 proxy/process_rpc_health_test.go | 84 ++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 14 deletions(-)
 create mode 100644 proxy/process_rpc_health_test.go

diff --git a/proxy/process.go b/proxy/process.go
index 253ab189..b9658e44 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -144,6 +144,8 @@ func NewProcess(ID string, healthCheckTimeout int, modelConfig config.ModelConfi
 		} else {
 			p.rpcEndpoints = endpoints
 			p.rpcHealthy.Store(false) // start unhealthy until first check passes
+			// Start health checker immediately - runs independent of process state
+			p.startRPCHealthChecker()
 		}
 	}
 
@@ -385,7 +387,6 @@ func (p *Process) start() error {
 		return fmt.Errorf("failed to set Process state to ready: current state: %v, error: %v", curState, err)
 	} else {
 		p.failedStartCount = 0
-		p.startRPCHealthChecker()
 		return nil
 	}
 }
@@ -409,8 +410,6 @@ func (p *Process) StopImmediately() {
 		return
 	}
 
-	p.stopRPCHealthChecker()
-
 	p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState())
 	if curState, err := p.swapState(StateReady, StateStopping); err != nil {
 		p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState)
@@ -904,7 +903,9 @@ func (s *statusResponseWriter) Flush() {
 	}
 }
 
-// startRPCHealthChecker launches background goroutine for RPC health monitoring
+// startRPCHealthChecker launches background goroutine for RPC health monitoring.
+// Runs independently of process state - checks RPC endpoints regardless of whether
+// the model is loaded, starting, stopped, etc.
 func (p *Process) startRPCHealthChecker() {
 	if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 {
 		return
@@ -926,9 +927,7 @@ func (p *Process) startRPCHealthChecker() {
 				p.proxyLogger.Debugf("<%s> RPC health checker shutting down", p.ID)
 				return
 			case <-p.rpcHealthTicker.C:
-				if p.CurrentState() != StateReady {
-					return // Process no longer ready, exit
-				}
+				// Check regardless of process state
 				p.checkRPCHealth()
 			}
 		}
@@ -960,13 +959,6 @@ func (p *Process) checkRPCHealth() {
 	}
 }
 
-func (p *Process) stopRPCHealthChecker() {
-	if p.rpcHealthCancel != nil {
-		p.rpcHealthCancel()
-		p.rpcHealthCancel = nil
-	}
-}
-
 // IsRPCHealthy returns true if RPC health checking is disabled or all endpoints healthy
 func (p *Process) IsRPCHealthy() bool {
 	if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 {
diff --git a/proxy/process_rpc_health_test.go b/proxy/process_rpc_health_test.go
new file mode 100644
index 00000000..cb9d1d25
--- /dev/null
+++ b/proxy/process_rpc_health_test.go
@@ -0,0 +1,84 @@
+package proxy
+
+import (
+	"context"
+	"io"
+	"testing"
+
+	"github.com/mostlygeek/llama-swap/proxy/config"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestProcess_RPCHealthIndependentOfState(t *testing.T) {
+	testLogger := NewLogMonitorWriter(io.Discard)
+	proxyLogger := NewLogMonitorWriter(io.Discard)
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	modelConfig := config.ModelConfig{
+		Cmd:            "llama-server --rpc 127.0.0.1:50051",
+		Proxy:          "http://localhost:8080",
+		RPCHealthCheck: true,
+	}
+
+	process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx)
+
+	// Verify endpoints were parsed
+	assert.NotEmpty(t, process.rpcEndpoints, "RPC endpoints should be parsed from cmd")
+	assert.Equal(t, []string{"127.0.0.1:50051"}, process.rpcEndpoints)
+
+	// Initially should be unhealthy (false) until first check
+	assert.False(t, process.rpcHealthy.Load(), "RPC health should start as false")
+
+	// Health checker should be running regardless of process state
+	assert.NotNil(t, process.rpcHealthTicker, "Health checker ticker should be running")
+	assert.NotNil(t, process.rpcHealthCancel, "Health checker should have cancel func")
+
+	// Process state should not affect health checking
+	assert.Equal(t, StateStopped, process.CurrentState(), "Process should be in stopped state")
+
+	// Health check runs independently - simulate RPC becoming healthy
+	process.rpcHealthy.Store(true)
+	assert.True(t, process.IsRPCHealthy(), "Process should report healthy regardless of state")
+}
+
+func TestProcess_RPCHealthCheckDisabled(t *testing.T) {
+	testLogger := NewLogMonitorWriter(io.Discard)
+	proxyLogger := NewLogMonitorWriter(io.Discard)
+	ctx := context.Background()
+
+	modelConfig := config.ModelConfig{
+		Cmd:            "llama-server --rpc 127.0.0.1:50051",
+		Proxy:          "http://localhost:8080",
+		RPCHealthCheck: false, // Disabled
+	}
+
+	process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx)
+
+	// Should always return healthy when disabled
+	assert.True(t, process.IsRPCHealthy(), "Should return true when RPC health check is disabled")
+}
+
+func TestProcess_RPCHealthCheckNoEndpoints(t *testing.T) {
+	testLogger := NewLogMonitorWriter(io.Discard)
+	proxyLogger := NewLogMonitorWriter(io.Discard)
+	ctx := context.Background()
+
+	modelConfig := config.ModelConfig{
+		Cmd:            "llama-server --port 8080", // No --rpc flag
+		Proxy:          "http://localhost:8080",
+		RPCHealthCheck: true, // Enabled but no endpoints
+	}
+
+	process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx)
+
+	// Should have no endpoints
+	assert.Empty(t, process.rpcEndpoints, "Should have no RPC endpoints when --rpc flag is missing")
+
+	// Should return healthy when no endpoints configured (treat as not using RPC)
+	assert.True(t, process.IsRPCHealthy(), "Should return true when no RPC endpoints found")
+
+	// Health checker should NOT start when no endpoints
+	assert.Nil(t, process.rpcHealthTicker, "Health checker should not run without endpoints")
+	assert.Nil(t, process.rpcHealthCancel, "Health checker cancel should be nil")
+}

From 4987dafa545bc35b8cce8afc481ca245c367b41b Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Fri, 30 Jan 2026 23:45:19 +0000
Subject: [PATCH 08/13] WIP: web config changes

Work in progress on web configuration feature.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 config_embed.go           |  13 +++
 llama-swap.go             |   2 +
 proxy/proxymanager.go     |   9 ++
 proxy/proxymanager_api.go |   9 +-
 ui/package-lock.json      | 178 +++++++++++++++++++++++++++++++++++---
 ui/package.json           |   5 ++
 6 files changed, 199 insertions(+), 17 deletions(-)
 create mode 100644 config_embed.go

diff --git a/config_embed.go b/config_embed.go
new file mode 100644
index 00000000..b158e944
--- /dev/null
+++ b/config_embed.go
@@ -0,0 +1,13 @@
+package main
+
+import (
+	_ "embed"
+)
+
+//go:embed config.example.yaml
+var configExampleYAML []byte
+
+// GetConfigExampleYAML returns the embedded example config file
+func GetConfigExampleYAML() []byte {
+	return configExampleYAML
+}
diff --git a/llama-swap.go b/llama-swap.go
index 60ccbc73..1c68a25c 100644
--- a/llama-swap.go
+++ b/llama-swap.go
@@ -98,6 +98,7 @@ func main() {
 			newPM := proxy.New(conf)
 			newPM.SetVersion(date, commit, version)
 			newPM.SetConfigPath(*configPath)
+			newPM.SetConfigExample(GetConfigExampleYAML())
 			srv.Handler = newPM
 			fmt.Println("Configuration Reloaded")
 
@@ -116,6 +117,7 @@ func main() {
 			newPM := proxy.New(conf)
 			newPM.SetVersion(date, commit, version)
 			newPM.SetConfigPath(*configPath)
+			newPM.SetConfigExample(GetConfigExampleYAML())
 			srv.Handler = newPM
 		}
 	}
diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go
index bf227120..c33c9f96 100644
--- a/proxy/proxymanager.go
+++ b/proxy/proxymanager.go
@@ -55,6 +55,9 @@ type ProxyManager struct {
 	// config file path for editing
 	configPath string
 
+	// embedded example config
+	configExample []byte
+
 	// peer proxy see: #296, #433
 	peerProxy *PeerProxy
 }
@@ -975,3 +978,9 @@ func (pm *ProxyManager) SetConfigPath(configPath string) {
 	defer pm.Unlock()
 	pm.configPath = configPath
 }
+
+func (pm *ProxyManager) SetConfigExample(configExample []byte) {
+	pm.Lock()
+	defer pm.Unlock()
+	pm.configExample = configExample
+}
diff --git a/proxy/proxymanager_api.go b/proxy/proxymanager_api.go
index f590cd6e..05058193 100644
--- a/proxy/proxymanager_api.go
+++ b/proxy/proxymanager_api.go
@@ -276,9 +276,12 @@ func (pm *ProxyManager) apiGetCurrentConfig(c *gin.Context) {
 }
 
 func (pm *ProxyManager) apiGetExampleConfig(c *gin.Context) {
-	data, err := os.ReadFile("config.example.yaml")
-	if err != nil {
-		pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to read example config: %v", err))
+	pm.Lock()
+	data := pm.configExample
+	pm.Unlock()
+
+	if data == nil {
+		pm.sendErrorResponse(c, http.StatusInternalServerError, "Example config not available")
 		return
 	}
 
diff --git a/ui/package-lock.json b/ui/package-lock.json
index c88133e7..097d3bcc 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -8,6 +8,10 @@
       "name": "ui",
       "version": "0.0.0",
       "dependencies": {
+        "@codemirror/lang-yaml": "^6.1.1",
+        "@codemirror/state": "^6.4.1",
+        "codemirror": "^6.0.1",
+        "js-yaml": "^4.1.0",
         "react": "^19.1.0",
         "react-dom": "^19.1.0",
         "react-icons": "^5.5.0",
@@ -17,6 +21,7 @@
       "devDependencies": {
         "@eslint/js": "^9.25.0",
         "@tailwindcss/vite": "^4.1.8",
+        "@types/js-yaml": "^4.0.9",
         "@types/react": "^19.1.2",
         "@types/react-dom": "^19.1.2",
         "@vitejs/plugin-react": "^4.4.1",
@@ -75,7 +80,6 @@
       "integrity": "sha512-bXYxrXFubeYdvB0NhD/NBB3Qi6aZeV20GOWVI47t2dkecCEoneR4NPVcb7abpXDEvejgrUfFtG6vG/zxAKmg+g==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@ampproject/remapping": "^2.2.0",
         "@babel/code-frame": "^7.27.1",
@@ -327,6 +331,94 @@
         "node": ">=6.9.0"
       }
     },
+    "node_modules/@codemirror/autocomplete": {
+      "version": "6.20.0",
+      "resolved": "https://registry.npmjs.org/@codemirror/autocomplete/-/autocomplete-6.20.0.tgz",
+      "integrity": "sha512-bOwvTOIJcG5FVo5gUUupiwYh8MioPLQ4UcqbcRf7UQ98X90tCa9E1kZ3Z7tqwpZxYyOvh1YTYbmZE9RTfTp5hg==",
+      "dependencies": {
+        "@codemirror/language": "^6.0.0",
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.17.0",
+        "@lezer/common": "^1.0.0"
+      }
+    },
+    "node_modules/@codemirror/commands": {
+      "version": "6.10.1",
+      "resolved": "https://registry.npmjs.org/@codemirror/commands/-/commands-6.10.1.tgz",
+      "integrity": "sha512-uWDWFypNdQmz2y1LaNJzK7fL7TYKLeUAU0npEC685OKTF3KcQ2Vu3klIM78D7I6wGhktme0lh3CuQLv0ZCrD9Q==",
+      "dependencies": {
+        "@codemirror/language": "^6.0.0",
+        "@codemirror/state": "^6.4.0",
+        "@codemirror/view": "^6.27.0",
+        "@lezer/common": "^1.1.0"
+      }
+    },
+    "node_modules/@codemirror/lang-yaml": {
+      "version": "6.1.2",
+      "resolved": "https://registry.npmjs.org/@codemirror/lang-yaml/-/lang-yaml-6.1.2.tgz",
+      "integrity": "sha512-dxrfG8w5Ce/QbT7YID7mWZFKhdhsaTNOYjOkSIMt1qmC4VQnXSDSYVHHHn8k6kJUfIhtLo8t1JJgltlxWdsITw==",
+      "dependencies": {
+        "@codemirror/autocomplete": "^6.0.0",
+        "@codemirror/language": "^6.0.0",
+        "@codemirror/state": "^6.0.0",
+        "@lezer/common": "^1.2.0",
+        "@lezer/highlight": "^1.2.0",
+        "@lezer/lr": "^1.0.0",
+        "@lezer/yaml": "^1.0.0"
+      }
+    },
+    "node_modules/@codemirror/language": {
+      "version": "6.12.1",
+      "resolved": "https://registry.npmjs.org/@codemirror/language/-/language-6.12.1.tgz",
+      "integrity": "sha512-Fa6xkSiuGKc8XC8Cn96T+TQHYj4ZZ7RdFmXA3i9xe/3hLHfwPZdM+dqfX0Cp0zQklBKhVD8Yzc8LS45rkqcwpQ==",
+      "dependencies": {
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.23.0",
+        "@lezer/common": "^1.5.0",
+        "@lezer/highlight": "^1.0.0",
+        "@lezer/lr": "^1.0.0",
+        "style-mod": "^4.0.0"
+      }
+    },
+    "node_modules/@codemirror/lint": {
+      "version": "6.9.3",
+      "resolved": "https://registry.npmjs.org/@codemirror/lint/-/lint-6.9.3.tgz",
+      "integrity": "sha512-y3YkYhdnhjDBAe0VIA0c4wVoFOvnp8CnAvfLqi0TqotIv92wIlAAP7HELOpLBsKwjAX6W92rSflA6an/2zBvXw==",
+      "dependencies": {
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.35.0",
+        "crelt": "^1.0.5"
+      }
+    },
+    "node_modules/@codemirror/search": {
+      "version": "6.6.0",
+      "resolved": "https://registry.npmjs.org/@codemirror/search/-/search-6.6.0.tgz",
+      "integrity": "sha512-koFuNXcDvyyotWcgOnZGmY7LZqEOXZaaxD/j6n18TCLx2/9HieZJ5H6hs1g8FiRxBD0DNfs0nXn17g872RmYdw==",
+      "dependencies": {
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.37.0",
+        "crelt": "^1.0.5"
+      }
+    },
+    "node_modules/@codemirror/state": {
+      "version": "6.5.4",
+      "resolved": "https://registry.npmjs.org/@codemirror/state/-/state-6.5.4.tgz",
+      "integrity": "sha512-8y7xqG/hpB53l25CIoit9/ngxdfoG+fx+V3SHBrinnhOtLvKHRyAJJuHzkWrR4YXXLX8eXBsejgAAxHUOdW1yw==",
+      "dependencies": {
+        "@marijn/find-cluster-break": "^1.0.0"
+      }
+    },
+    "node_modules/@codemirror/view": {
+      "version": "6.39.12",
+      "resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.39.12.tgz",
+      "integrity": "sha512-f+/VsHVn/kOA9lltk/GFzuYwVVAKmOnNjxbrhkk3tPHntFqjWeI2TbIXx006YkBkqC10wZ4NsnWXCQiFPeAISQ==",
+      "dependencies": {
+        "@codemirror/state": "^6.5.0",
+        "crelt": "^1.0.6",
+        "style-mod": "^4.1.0",
+        "w3c-keyname": "^2.2.4"
+      }
+    },
     "node_modules/@esbuild/aix-ppc64": {
       "version": "0.25.5",
       "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz",
@@ -1041,6 +1133,42 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
+    "node_modules/@lezer/common": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.5.0.tgz",
+      "integrity": "sha512-PNGcolp9hr4PJdXR4ix7XtixDrClScvtSCYW3rQG106oVMOOI+jFb+0+J3mbeL/53g1Zd6s0kJzaw6Ri68GmAA=="
+    },
+    "node_modules/@lezer/highlight": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.3.tgz",
+      "integrity": "sha512-qXdH7UqTvGfdVBINrgKhDsVTJTxactNNxLk7+UMwZhU13lMHaOBlJe9Vqp907ya56Y3+ed2tlqzys7jDkTmW0g==",
+      "dependencies": {
+        "@lezer/common": "^1.3.0"
+      }
+    },
+    "node_modules/@lezer/lr": {
+      "version": "1.4.8",
+      "resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.8.tgz",
+      "integrity": "sha512-bPWa0Pgx69ylNlMlPvBPryqeLYQjyJjqPx+Aupm5zydLIF3NE+6MMLT8Yi23Bd9cif9VS00aUebn+6fDIGBcDA==",
+      "dependencies": {
+        "@lezer/common": "^1.0.0"
+      }
+    },
+    "node_modules/@lezer/yaml": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@lezer/yaml/-/yaml-1.0.4.tgz",
+      "integrity": "sha512-2lrrHqxalACEbxIbsjhqGpSW8kWpUKuY6RHgnSAFZa6qK62wvnPxA8hGOwOoDbwHcOFs5M4o27mjGu+P7TvBmw==",
+      "dependencies": {
+        "@lezer/common": "^1.2.0",
+        "@lezer/highlight": "^1.0.0",
+        "@lezer/lr": "^1.4.0"
+      }
+    },
+    "node_modules/@marijn/find-cluster-break": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@marijn/find-cluster-break/-/find-cluster-break-1.0.2.tgz",
+      "integrity": "sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g=="
+    },
     "node_modules/@nodelib/fs.scandir": {
       "version": "2.1.5",
       "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -1755,6 +1883,12 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/@types/js-yaml": {
+      "version": "4.0.9",
+      "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
+      "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==",
+      "dev": true
+    },
     "node_modules/@types/json-schema": {
       "version": "7.0.15",
       "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
@@ -1768,7 +1902,6 @@
       "integrity": "sha512-JeG0rEWak0N6Itr6QUx+X60uQmN+5t3j9r/OVDtWzFXKaj6kD1BwJzOksD0FF6iWxZlbE1kB0q9vtnU2ekqa1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "csstype": "^3.0.2"
       }
@@ -1829,7 +1962,6 @@
       "integrity": "sha512-qwxv6dq682yVvgKKp2qWwLgRbscDAYktPptK4JPojCwwi3R9cwrvIxS4lvBpzmcqzR4bdn54Z0IG1uHFskW4dA==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.33.1",
         "@typescript-eslint/types": "8.33.1",
@@ -2081,7 +2213,6 @@
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -2136,7 +2267,6 @@
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
       "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
-      "dev": true,
       "license": "Python-2.0"
     },
     "node_modules/balanced-match": {
@@ -2190,7 +2320,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "caniuse-lite": "^1.0.30001718",
         "electron-to-chromium": "^1.5.160",
@@ -2262,6 +2391,20 @@
         "node": ">=18"
       }
     },
+    "node_modules/codemirror": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/codemirror/-/codemirror-6.0.2.tgz",
+      "integrity": "sha512-VhydHotNW5w1UGK0Qj96BwSk/Zqbp9WbnyK2W/eVMv4QyF41INRGpjUhFJY7/uDNuudSc33a/PKr4iDqRduvHw==",
+      "dependencies": {
+        "@codemirror/autocomplete": "^6.0.0",
+        "@codemirror/commands": "^6.0.0",
+        "@codemirror/language": "^6.0.0",
+        "@codemirror/lint": "^6.0.0",
+        "@codemirror/search": "^6.0.0",
+        "@codemirror/state": "^6.0.0",
+        "@codemirror/view": "^6.0.0"
+      }
+    },
     "node_modules/color-convert": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@@ -2309,6 +2452,11 @@
         "url": "https://opencollective.com/express"
       }
     },
+    "node_modules/crelt": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/crelt/-/crelt-1.0.6.tgz",
+      "integrity": "sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g=="
+    },
     "node_modules/cross-spawn": {
       "version": "7.0.6",
       "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
@@ -2457,7 +2605,6 @@
       "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -2927,7 +3074,6 @@
       "version": "4.1.1",
       "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
       "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "argparse": "^2.0.1"
@@ -3567,7 +3713,6 @@
       "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz",
       "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -3577,7 +3722,6 @@
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz",
       "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "scheduler": "^0.26.0"
       },
@@ -3812,6 +3956,11 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/style-mod": {
+      "version": "4.1.3",
+      "resolved": "https://registry.npmjs.org/style-mod/-/style-mod-4.1.3.tgz",
+      "integrity": "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ=="
+    },
     "node_modules/supports-color": {
       "version": "7.2.0",
       "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
@@ -3907,7 +4056,6 @@
       "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -3960,7 +4108,6 @@
       "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
       "dev": true,
       "license": "Apache-2.0",
-      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -4039,7 +4186,6 @@
       "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "esbuild": "^0.25.0",
         "fdir": "^6.4.4",
@@ -4130,7 +4276,6 @@
       "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -4138,6 +4283,11 @@
         "url": "https://github.com/sponsors/jonschlinkert"
       }
     },
+    "node_modules/w3c-keyname": {
+      "version": "2.2.8",
+      "resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz",
+      "integrity": "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ=="
+    },
     "node_modules/which": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
diff --git a/ui/package.json b/ui/package.json
index d8a5e090..07cc38ea 100644
--- a/ui/package.json
+++ b/ui/package.json
@@ -10,6 +10,10 @@
     "preview": "vite preview"
   },
   "dependencies": {
+    "@codemirror/lang-yaml": "^6.1.1",
+    "@codemirror/state": "^6.4.1",
+    "codemirror": "^6.0.1",
+    "js-yaml": "^4.1.0",
     "react": "^19.1.0",
     "react-dom": "^19.1.0",
     "react-icons": "^5.5.0",
@@ -19,6 +23,7 @@
   "devDependencies": {
     "@eslint/js": "^9.25.0",
     "@tailwindcss/vite": "^4.1.8",
+    "@types/js-yaml": "^4.0.9",
     "@types/react": "^19.1.2",
     "@types/react-dom": "^19.1.2",
     "@vitejs/plugin-react": "^4.4.1",

From e6f9f9a9990cea88228dc67a62e072ef0854da5e Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Sat, 31 Jan 2026 00:27:33 +0000
Subject: [PATCH 09/13] proxy: fix requestTimeout feature to actually terminate
 requests

The requestTimeout feature was not working because the timeout
context was not connected to the HTTP request. When the timeout
fired, it attempted to kill the process but the reverse proxy
continued waiting for a response indefinitely.

- Use context.WithTimeout() to create a timeout context for the HTTP request
- Clone the request with the timeout context before proxying
- When timeout fires, the HTTP request is immediately cancelled
- Fix StopImmediately() to handle timeouts during model loading (StateStarting)
- Add unit test to verify timeout behavior

Before: requests would run for 60+ seconds despite requestTimeout: 20
After: requests terminate in exactly 20 seconds as configured

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 proxy/process.go              |  36 ++++++-----
 proxy/process_timeout_test.go | 109 ++++++++++++++++++++++++++++++++++
 2 files changed, 130 insertions(+), 15 deletions(-)
 create mode 100644 proxy/process_timeout_test.go

diff --git a/proxy/process.go b/proxy/process.go
index 5ada9723..7e311d11 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -381,13 +381,17 @@ func (p *Process) Stop() {
 // StopImmediately will transition the process to the stopping state and stop the process with a SIGTERM.
 // If the process does not stop within the specified timeout, it will be forcefully stopped with a SIGKILL.
 func (p *Process) StopImmediately() {
-	if !isValidTransition(p.CurrentState(), StateStopping) {
+	currentState := p.CurrentState()
+	if !isValidTransition(currentState, StateStopping) {
 		return
 	}
 
-	p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState())
-	if curState, err := p.swapState(StateReady, StateStopping); err != nil {
-		p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState)
+	p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, currentState)
+
+	// Try to transition from current state to StateStopping
+	// Process might be in StateReady or StateStarting when timeout fires
+	if _, err := p.swapState(currentState, StateStopping); err != nil {
+		p.proxyLogger.Infof("<%s> Stop() %s -> StateStopping err: %v", p.ID, currentState, err)
 		return
 	}
 
@@ -502,30 +506,32 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
 
 	// Start timeout monitoring if requestTimeout is configured
 	var timeoutCancel context.CancelFunc
+	var requestCtx context.Context = r.Context()
+
 	if p.config.RequestTimeout > 0 {
-		timeoutCtx, cancel := context.WithCancel(context.Background())
+		timeoutDuration := time.Duration(p.config.RequestTimeout) * time.Second
+		var cancel context.CancelFunc
+		requestCtx, cancel = context.WithTimeout(r.Context(), timeoutDuration)
 		timeoutCancel = cancel
 
 		go func() {
-			timeoutDuration := time.Duration(p.config.RequestTimeout) * time.Second
-			timer := time.NewTimer(timeoutDuration)
-			defer timer.Stop()
-
-			select {
-			case <-timer.C:
+			<-requestCtx.Done()
+			if requestCtx.Err() == context.DeadlineExceeded {
 				p.proxyLogger.Warnf("<%s> Request timeout exceeded (%v), force stopping process to prevent GPU blocking", p.ID, timeoutDuration)
 				// Force stop the process - this will kill the underlying inference process
 				p.StopImmediately()
-			case <-timeoutCtx.Done():
-				// Request completed normally, cancel timeout
-				return
 			}
 		}()
 
-		// Ensure timeout goroutine is cancelled when request completes
+		// Ensure timeout is cancelled when request completes
 		defer timeoutCancel()
 	}
 
+	// Create a new request with the timeout context
+	if requestCtx != r.Context() {
+		r = r.Clone(requestCtx)
+	}
+
 	// for #366
 	// - extract streaming param from request context, should have been set by proxymanager
 	var srw *statusResponseWriter
diff --git a/proxy/process_timeout_test.go b/proxy/process_timeout_test.go
new file mode 100644
index 00000000..9f048d9e
--- /dev/null
+++ b/proxy/process_timeout_test.go
@@ -0,0 +1,109 @@
+package proxy
+
+import (
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/mostlygeek/llama-swap/proxy/config"
+)
+
+// TestProcess_RequestTimeout verifies that requestTimeout actually kills the process
+func TestProcess_RequestTimeout(t *testing.T) {
+	// Create a mock server that simulates a long-running inference
+	mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		t.Logf("Mock server received request")
+
+		// Simulate streaming response that takes 60 seconds
+		w.Header().Set("Content-Type", "text/event-stream")
+		w.WriteHeader(http.StatusOK)
+
+		flusher, ok := w.(http.Flusher)
+		if !ok {
+			t.Fatal("Expected http.ResponseWriter to be an http.Flusher")
+		}
+
+		// Stream data for 60 seconds
+		for i := 0; i < 60; i++ {
+			select {
+			case <-r.Context().Done():
+				t.Logf("Mock server: client disconnected after %d seconds", i)
+				return
+			default:
+				fmt.Fprintf(w, "data: token %d\n\n", i)
+				flusher.Flush()
+				time.Sleep(1 * time.Second)
+			}
+		}
+		t.Logf("Mock server completed full 60 second response")
+	}))
+	defer mockServer.Close()
+
+	// Setup process logger - use NewLogMonitor() to avoid race in test
+	processLogger := NewLogMonitor()
+	proxyLogger := NewLogMonitor()
+
+	// Create process with 5 second request timeout
+	cfg := config.ModelConfig{
+		Proxy:          mockServer.URL,
+		CheckEndpoint:  "none", // skip health check
+		RequestTimeout: 5,      // 5 second timeout
+	}
+
+	p := NewProcess("test-timeout", 30, cfg, processLogger, proxyLogger)
+	p.gracefulStopTimeout = 2 * time.Second // shorter for testing
+
+	// Manually set state to ready (skip actual process start)
+	p.forceState(StateReady)
+
+	// Make a request that should timeout
+	req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
+	w := httptest.NewRecorder()
+
+	start := time.Now()
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		p.ProxyRequest(w, req)
+	}()
+
+	// Wait for either completion or timeout
+	done := make(chan struct{})
+	go func() {
+		wg.Wait()
+		close(done)
+	}()
+
+	select {
+	case <-done:
+		elapsed := time.Since(start)
+		t.Logf("Request completed after %v", elapsed)
+
+		// Request should complete within timeout + gracefulStopTimeout + some buffer
+		maxExpected := time.Duration(cfg.RequestTimeout+2)*time.Second + 3*time.Second
+		if elapsed > maxExpected {
+			t.Errorf("Request took %v, expected less than %v with 5s timeout", elapsed, maxExpected)
+		} else {
+			t.Logf("✓ Request was properly terminated by timeout")
+		}
+
+	case <-time.After(15 * time.Second):
+		t.Fatalf("Test timed out after 15 seconds - request should have been killed by requestTimeout")
+	}
+}
+
+// TestProcess_RequestTimeoutWithRealProcess tests with an actual process
+func TestProcess_RequestTimeoutWithRealProcess(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping test with real process in short mode")
+	}
+
+	// This test would require a real llama.cpp server or similar
+	// For now, we can skip it or mock it
+	t.Skip("Requires real inference server")
+}

From 0e86bbcb688011bd625f23bf4b1a19f4d3aafae4 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Sat, 31 Jan 2026 00:33:41 +0000
Subject: [PATCH 10/13] docs: add requestTimeout to README features list

Add brief mention of requestTimeout feature in the customizable
features section of README.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c2696235..8d372c10 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,8 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
 - ✅ API Key support - define keys to restrict access to API endpoints
 - ✅ Customizable
   - Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107))
-  - Automatic unloading of models after timeout by setting a `ttl`
+  - Automatic unloading of models after idle timeout by setting a `ttl`
+  - Request timeout protection with `requestTimeout` to prevent runaway inference
   - Reliable Docker and Podman support using `cmd` and `cmdStop` together
   - Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235))
 

From 79332e309eefcae4434b92458ac9081e82b0f861 Mon Sep 17 00:00:00 2001
From: Overcuriousity <user01@voodoo.mikoshi.dd>
Date: Sat, 31 Jan 2026 18:57:32 +0100
Subject: [PATCH 11/13] ui-svelte: improve Config editor dark mode styling

Fix editor cleanup and improve dark mode appearance with better colors,
contrast, and styling.

- Add proper editor disposal in $effect cleanup
- Update theme colors for better dark mode visibility
- Improve button styling with teal export button
- Better text contrast and subtle borders
- Refine error message styling

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 ui-svelte/src/routes/Config.svelte | 66 ++++++++++++++++++------------
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/ui-svelte/src/routes/Config.svelte b/ui-svelte/src/routes/Config.svelte
index cc830401..4d62bd38 100644
--- a/ui-svelte/src/routes/Config.svelte
+++ b/ui-svelte/src/routes/Config.svelte
@@ -31,39 +31,39 @@
 
   function getTheme(dark: boolean, readOnly: boolean) {
     return EditorView.theme({
-      "&": { 
+      "&": {
         height: "100%",
-        backgroundColor: dark ? (readOnly ? "#1a1a1a" : "#1f1f1f") : (readOnly ? "#f9fafb" : "#ffffff"),
+        backgroundColor: dark ? (readOnly ? "#1a1a1a" : "#252525") : (readOnly ? "#f9fafb" : "#ffffff"),
       },
-      ".cm-scroller": { 
+      ".cm-scroller": {
         overflow: "auto",
       },
-      ".cm-content": { 
+      ".cm-content": {
         fontFamily: "monospace",
-        color: dark ? "#e0e0e0" : "#1f2937",
+        color: dark ? "#d1d5db" : "#1f2937",
       },
       ".cm-gutters": {
-        backgroundColor: dark ? "#2a2a2a" : "#f3f4f6",
+        backgroundColor: dark ? (readOnly ? "#151515" : "#1f1f1f") : "#f3f4f6",
         color: dark ? "#6b7280" : "#9ca3af",
         border: "none",
       },
       ".cm-activeLineGutter": {
-        backgroundColor: dark ? "#374151" : "#e5e7eb",
+        backgroundColor: dark ? "#2d3748" : "#e5e7eb",
       },
       ".cm-activeLine": {
-        backgroundColor: dark ? "#374151" : "#f3f4f6",
+        backgroundColor: dark ? "#2d3748" : "#f3f4f6",
       },
       ".cm-selectionBackground, ::selection": {
-        backgroundColor: dark ? "#3b82f6" : "#bfdbfe",
+        backgroundColor: dark ? "#2d5a7b" : "#bfdbfe",
       },
       ".cm-cursor": {
-        borderLeftColor: dark ? "#60a5fa" : "#2563eb",
+        borderLeftColor: dark ? "#14b8a6" : "#2563eb",
       },
       // YAML syntax colors
       ".cm-atom": { color: dark ? "#fbbf24" : "#d97706" }, // true/false/null
-      ".cm-number": { color: dark ? "#a78bfa" : "#7c3aed" }, // numbers
-      ".cm-string": { color: dark ? "#34d399" : "#059669" }, // strings
-      ".cm-property": { color: dark ? "#60a5fa" : "#2563eb" }, // keys
+      ".cm-number": { color: dark ? "#c4b5fd" : "#7c3aed" }, // numbers
+      ".cm-string": { color: dark ? "#6ee7b7" : "#059669" }, // strings
+      ".cm-property": { color: dark ? "#7dd3fc" : "#2563eb" }, // keys
       ".cm-comment": { color: dark ? "#6b7280" : "#9ca3af" }, // comments
     }, { dark });
   }
@@ -219,36 +219,50 @@
     if (!loading && editorContainer && !editorView && currentConfig) {
       editorView = createEditor(editorContainer, currentConfig, false);
     }
+
+    return () => {
+      if (editorView) {
+        editorView.destroy();
+        editorView = null;
+      }
+    };
   });
 
   $effect(() => {
     if (!loading && exampleContainer && !exampleView && exampleConfig) {
       exampleView = createEditor(exampleContainer, exampleConfig, true);
     }
+
+    return () => {
+      if (exampleView) {
+        exampleView.destroy();
+        exampleView = null;
+      }
+    };
   });
 </script>
 
 <div class="flex flex-col h-full">
   <div class="mb-4 flex items-center justify-between">
-    <h2 class="text-xl font-semibold">Configuration Editor</h2>
+    <h2 class="text-xl font-semibold text-gray-900 dark:text-gray-100">Configuration Editor</h2>
     <div class="flex gap-2">
       <button
         onclick={importConfig}
-        class="px-4 py-2 bg-blue-500 hover:bg-blue-600 text-white rounded disabled:opacity-50"
+        class="px-4 py-2 bg-blue-600 hover:bg-blue-700 dark:bg-blue-700 dark:hover:bg-blue-800 text-white rounded disabled:opacity-50"
         disabled={loading || saving}
       >
         Import
       </button>
       <button
         onclick={exportConfig}
-        class="px-4 py-2 bg-green-500 hover:bg-green-600 text-white rounded disabled:opacity-50"
+        class="px-4 py-2 bg-teal-600 hover:bg-teal-700 dark:bg-teal-700 dark:hover:bg-teal-800 text-white rounded disabled:opacity-50"
         disabled={loading || saving || !currentConfig}
       >
         Export
       </button>
       <button
         onclick={saveConfig}
-        class="px-4 py-2 bg-orange-500 hover:bg-orange-600 text-white rounded disabled:opacity-50"
+        class="px-4 py-2 bg-gray-500 hover:bg-gray-600 dark:bg-gray-600 dark:hover:bg-gray-700 text-white rounded disabled:opacity-50"
         disabled={loading || saving || !currentConfig || !!validationError}
       >
         {saving ? "Saving..." : "Save & Reload"}
@@ -257,20 +271,20 @@
   </div>
 
   {#if validationError}
-    <div class="mb-4 p-3 bg-yellow-100 dark:bg-yellow-900 text-yellow-800 dark:text-yellow-200 rounded">
+    <div class="mb-4 p-3 bg-yellow-50 dark:bg-yellow-900/30 border border-yellow-200 dark:border-yellow-700 text-yellow-900 dark:text-yellow-200 rounded">
       <strong>Validation Error:</strong> {validationError}
     </div>
   {/if}
 
   {#if error}
-    <div class="mb-4 p-3 bg-red-100 dark:bg-red-900 text-red-800 dark:text-red-200 rounded">
+    <div class="mb-4 p-3 bg-red-50 dark:bg-red-900/30 border border-red-200 dark:border-red-700 text-red-900 dark:text-red-200 rounded">
       {error}
     </div>
   {/if}
 
   {#if loading}
     <div class="flex items-center justify-center h-full">
-      <div class="text-gray-500">Loading configuration...</div>
+      <div class="text-gray-500 dark:text-gray-400">Loading configuration...</div>
     </div>
   {:else}
     <div
@@ -280,19 +294,19 @@
     >
       <!-- Left panel: Editable config -->
       <div class="flex-1 flex flex-col min-h-0 min-w-0">
-        <h3 class="text-lg font-semibold mb-2">Current Config (Editable)</h3>
-        <div 
+        <h3 class="text-lg font-semibold mb-2 text-gray-900 dark:text-gray-100">Current Config (Editable)</h3>
+        <div
           bind:this={editorContainer}
-          class="flex-1 w-full border border-gray-300 dark:border-gray-600 rounded overflow-hidden bg-white dark:bg-gray-800"
+          class="flex-1 w-full border border-gray-300 dark:border-gray-700 rounded overflow-hidden bg-white dark:bg-[#252525]"
         ></div>
       </div>
 
       <!-- Right panel: Example config (read-only) -->
       <div class="flex-1 flex flex-col min-h-0 min-w-0">
-        <h3 class="text-lg font-semibold mb-2">Example Config (Reference)</h3>
-        <div 
+        <h3 class="text-lg font-semibold mb-2 text-gray-900 dark:text-gray-100">Example Config (Reference)</h3>
+        <div
           bind:this={exampleContainer}
-          class="flex-1 w-full border border-gray-300 dark:border-gray-600 rounded overflow-hidden bg-gray-50 dark:bg-gray-900"
+          class="flex-1 w-full border border-gray-300 dark:border-gray-700 rounded overflow-hidden bg-gray-50 dark:bg-[#1a1a1a]"
         ></div>
       </div>
     </div>

From 59db9f0754b65e80bd57e419134748be81500ea2 Mon Sep 17 00:00:00 2001
From: Overcuriousity <user01@voodoo.mikoshi.dd>
Date: Sat, 31 Jan 2026 19:50:27 +0100
Subject: [PATCH 12/13] ui-svelte: fix Config editor compartment collision and
 error handling

Fix theme compartment sharing bug and improve error response handling.

- create separate Compartment instances for each CodeMirror editor
- update createEditor to accept compartment parameter
- improve saveConfig error handling to parse both JSON and non-JSON responses
- include status code and statusText in error messages
---
 ui-svelte/src/routes/Config.svelte | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/ui-svelte/src/routes/Config.svelte b/ui-svelte/src/routes/Config.svelte
index 4d62bd38..e13e9e79 100644
--- a/ui-svelte/src/routes/Config.svelte
+++ b/ui-svelte/src/routes/Config.svelte
@@ -19,6 +19,7 @@
   let editorView: EditorView | null = null;
   let exampleView: EditorView | null = null;
   let themeCompartment = new Compartment();
+  let exampleThemeCompartment = new Compartment();
 
   function validateYAML(text: string): string | null {
     try {
@@ -68,7 +69,7 @@
     }, { dark });
   }
 
-  function createEditor(parent: HTMLElement, content: string, readOnly: boolean) {
+  function createEditor(parent: HTMLElement, content: string, readOnly: boolean, compartment: Compartment) {
     const state = EditorState.create({
       doc: content,
       extensions: [
@@ -76,7 +77,7 @@
         yaml(),
         EditorView.lineWrapping,
         EditorView.editable.of(!readOnly),
-        themeCompartment.of(getTheme($isDarkMode, readOnly)),
+        compartment.of(getTheme($isDarkMode, readOnly)),
         EditorView.updateListener.of((update) => {
           if (!readOnly && update.docChanged) {
             currentConfig = update.state.doc.toString();
@@ -102,7 +103,7 @@
     }
     if (exampleView) {
       exampleView.dispatch({
-        effects: themeCompartment.reconfigure(getTheme($isDarkMode, true))
+        effects: exampleThemeCompartment.reconfigure(getTheme($isDarkMode, true))
       });
     }
   });
@@ -155,8 +156,14 @@
       });
 
       if (!res.ok) {
-        const errData = await res.json();
-        throw new Error(errData.error || "Failed to save config");
+        let errMsg: string;
+        try {
+          const errData = await res.json();
+          errMsg = errData.error || JSON.stringify(errData);
+        } catch {
+          errMsg = await res.text();
+        }
+        throw new Error(`${res.status} ${res.statusText}: ${errMsg || "Failed to save config"}`);
       }
 
       alert("Config saved successfully! Application is reloading...");
@@ -217,7 +224,7 @@
 
   $effect(() => {
     if (!loading && editorContainer && !editorView && currentConfig) {
-      editorView = createEditor(editorContainer, currentConfig, false);
+      editorView = createEditor(editorContainer, currentConfig, false, themeCompartment);
     }
 
     return () => {
@@ -230,7 +237,7 @@
 
   $effect(() => {
     if (!loading && exampleContainer && !exampleView && exampleConfig) {
-      exampleView = createEditor(exampleContainer, exampleConfig, true);
+      exampleView = createEditor(exampleContainer, exampleConfig, true, exampleThemeCompartment);
     }
 
     return () => {

From 8e62ce1cd84f99acd98da5a8c473b8bb0310929b Mon Sep 17 00:00:00 2001
From: Overcuriousity <user01@voodoo.mikoshi.dd>
Date: Sat, 31 Jan 2026 21:43:51 +0100
Subject: [PATCH 13/13] ui-svelte: fix Config editor cursor jumping on input

Fix cursor jumping to top after typing by preventing reactive effect
from re-running on content changes. Use untrack() to read config state
without creating reactive dependency, ensuring editor is only created
once and not destroyed/recreated on each keystroke.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 ui-svelte/src/routes/Config.svelte | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ui-svelte/src/routes/Config.svelte b/ui-svelte/src/routes/Config.svelte
index e13e9e79..d3e1c036 100644
--- a/ui-svelte/src/routes/Config.svelte
+++ b/ui-svelte/src/routes/Config.svelte
@@ -1,5 +1,5 @@
 <script lang="ts">
-  import { onMount } from "svelte";
+  import { onMount, untrack } from "svelte";
   import { isNarrow, isDarkMode } from "../stores/theme";
   import { EditorView, basicSetup } from "codemirror";
   import { yaml } from "@codemirror/lang-yaml";
@@ -223,8 +223,8 @@
   });
 
   $effect(() => {
-    if (!loading && editorContainer && !editorView && currentConfig) {
-      editorView = createEditor(editorContainer, currentConfig, false, themeCompartment);
+    if (!loading && editorContainer && !editorView) {
+      editorView = createEditor(editorContainer, untrack(() => currentConfig), false, themeCompartment);
     }
 
     return () => {
@@ -236,8 +236,8 @@
   });
 
   $effect(() => {
-    if (!loading && exampleContainer && !exampleView && exampleConfig) {
-      exampleView = createEditor(exampleContainer, exampleConfig, true, exampleThemeCompartment);
+    if (!loading && exampleContainer && !exampleView) {
+      exampleView = createEditor(exampleContainer, untrack(() => exampleConfig), true, exampleThemeCompartment);
     }
 
     return () => {