From 3989c17d1b63f1f9ec90f5820eb27fd674b38530 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 30 Jan 2026 01:29:53 +0100 Subject: [PATCH 01/13] add timeout feature --- config-schema.json | 6 ++++++ config.example.yaml | 10 ++++++++++ docs/configuration.md | 10 ++++++++++ proxy/config/model_config.go | 5 +++++ proxy/process.go | 26 ++++++++++++++++++++++++++ 5 files changed, 57 insertions(+) diff --git a/config-schema.json b/config-schema.json index 8baa0cc4..9b77344a 100644 --- a/config-schema.json +++ b/config-schema.json @@ -216,6 +216,12 @@ "type": "boolean", "description": "Overrides the global sendLoadingState for this model. Ommitting this property will use the global setting." }, + "requestTimeout": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "Maximum time in seconds for a single request to complete before forcefully killing the model process. This prevents runaway inference processes from blocking the GPU indefinitely. 0 disables timeout (default). When exceeded, the process is terminated and must be restarted for the next request." + }, "unlisted": { "type": "boolean", "default": false, diff --git a/config.example.yaml b/config.example.yaml index d8282fc1..0ef80c02 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -249,6 +249,16 @@ models: # - recommended to be omitted and the default used concurrencyLimit: 0 + # requestTimeout: maximum time in seconds for a single request to complete + # - optional, default: 0 (no timeout) + # - useful for preventing runaway inference processes that never complete + # - when exceeded, the model process is forcefully stopped + # - protects against GPU overheating and blocking from stuck processes + # - the process must be restarted for the next request + # - set to 0 to disable timeout + # - recommended for models that may have infinite loops or excessive generation + requestTimeout: 0 # disabled by default, set to e.g., 300 for 5 minutes + # sendLoadingState: overrides the global sendLoadingState setting for this model # - optional, default: undefined (use global setting) sendLoadingState: false diff --git a/docs/configuration.md b/docs/configuration.md index 5aac2706..32713d57 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -319,6 +319,16 @@ models: # - recommended to be omitted and the default used concurrencyLimit: 0 + # requestTimeout: maximum time in seconds for a single request to complete + # - optional, default: 0 (no timeout) + # - useful for preventing runaway inference processes that never complete + # - when exceeded, the model process is forcefully stopped + # - protects against GPU overheating and blocking from stuck processes + # - the process must be restarted for the next request + # - set to 0 to disable timeout + # - recommended for models that may have infinite loops or excessive generation + requestTimeout: 300 # 5 minutes + # sendLoadingState: overrides the global sendLoadingState setting for this model # - optional, default: undefined (use global setting) sendLoadingState: false diff --git a/proxy/config/model_config.go b/proxy/config/model_config.go index 9dc37aea..6b2ba742 100644 --- a/proxy/config/model_config.go +++ b/proxy/config/model_config.go @@ -36,6 +36,10 @@ type ModelConfig struct { // override global setting SendLoadingState *bool `yaml:"sendLoadingState"` + + // Maximum time in seconds for a request to complete before killing the process + // 0 means no timeout (default) + RequestTimeout int `yaml:"requestTimeout"` } func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { @@ -53,6 +57,7 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { ConcurrencyLimit: 0, Name: "", Description: "", + RequestTimeout: 0, } // the default cmdStop to taskkill /f /t /pid ${PID} diff --git a/proxy/process.go b/proxy/process.go index 41427059..5ada9723 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -500,6 +500,32 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) { p.inFlightRequests.Done() }() + // Start timeout monitoring if requestTimeout is configured + var timeoutCancel context.CancelFunc + if p.config.RequestTimeout > 0 { + timeoutCtx, cancel := context.WithCancel(context.Background()) + timeoutCancel = cancel + + go func() { + timeoutDuration := time.Duration(p.config.RequestTimeout) * time.Second + timer := time.NewTimer(timeoutDuration) + defer timer.Stop() + + select { + case <-timer.C: + p.proxyLogger.Warnf("<%s> Request timeout exceeded (%v), force stopping process to prevent GPU blocking", p.ID, timeoutDuration) + // Force stop the process - this will kill the underlying inference process + p.StopImmediately() + case <-timeoutCtx.Done(): + // Request completed normally, cancel timeout + return + } + }() + + // Ensure timeout goroutine is cancelled when request completes + defer timeoutCancel() + } + // for #366 // - extract streaming param from request context, should have been set by proxymanager var srw *statusResponseWriter From c34372c60f47b5f3fae12aa34f03c9724726a5aa Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 30 Jan 2026 15:59:32 +0100 Subject: [PATCH 02/13] implement first draft of new feature --- README.md | 2 + config-schema.json | 5 ++ config.example.yaml | 18 ++++++ docs/configuration.md | 21 +++---- proxy/config/config.go | 43 ++++++++++++++ proxy/config/config_test.go | 105 +++++++++++++++++++++++++++++++++ proxy/config/model_config.go | 4 ++ proxy/process.go | 111 ++++++++++++++++++++++++++++++++--- proxy/process_test.go | 33 ++++++----- proxy/processgroup.go | 8 ++- proxy/processgroup_test.go | 9 +-- proxy/proxymanager.go | 21 ++++++- 12 files changed, 340 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index c2696235..b73ece6f 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and - Automatic unloading of models after timeout by setting a `ttl` - Reliable Docker and Podman support using `cmd` and `cmdStop` together - Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235)) + - RPC health checking for distributed inference - conditionally expose models based on RPC server availability ### Web UI @@ -174,6 +175,7 @@ Almost all configuration settings are optional and can be added one step at a ti - `useModelName` to override model names sent to upstream servers - `${PORT}` automatic port variables for dynamic port assignment - `filters` rewrite parts of requests before sending to the upstream server + - `rpcHealthCheck` monitor RPC server health for distributed inference models See the [configuration documentation](docs/configuration.md) for all options. diff --git a/config-schema.json b/config-schema.json index 8baa0cc4..63c04ae1 100644 --- a/config-schema.json +++ b/config-schema.json @@ -220,6 +220,11 @@ "type": "boolean", "default": false, "description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests." + }, + "rpcHealthCheck": { + "type": "boolean", + "default": false, + "description": "Enable TCP health checks for RPC endpoints specified in cmd. When enabled, parses --rpc host:port[,host:port,...] from cmd and performs health checks every 30 seconds. Models with unhealthy RPC endpoints are filtered from /v1/models and return 503 on inference requests." } } } diff --git a/config.example.yaml b/config.example.yaml index d8282fc1..ea827099 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -262,6 +262,24 @@ models: unlisted: true cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0 + # RPC health check example for distributed inference: + "qwen-distributed": + # rpcHealthCheck: enable TCP health checks for RPC endpoints + # - optional, default: false + # - when enabled, parses --rpc host:port[,host:port,...] from cmd + # - performs TCP connectivity checks every 30 seconds + # - model is only listed in /v1/models when ALL RPC endpoints are healthy + # - inference requests to unhealthy models return HTTP 503 + # - useful for distributed inference with llama.cpp's rpc-server + rpcHealthCheck: true + cmd: | + llama-server --port ${PORT} + --rpc 192.168.1.10:50051,192.168.1.11:50051 + -m Qwen2.5-32B-Instruct-Q4_K_M.gguf + -ngl 99 + name: "Qwen 32B (Distributed)" + description: "Large model using distributed RPC inference" + # Docker example: # container runtimes like Docker and Podman can be used reliably with # a combination of cmd, cmdStop, and ${MODEL_ID} diff --git a/docs/configuration.md b/docs/configuration.md index 5aac2706..3c7e9363 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -72,16 +72,17 @@ models: llama-swap supports many more features to customize how you want to manage your environment. -| Feature | Description | -| --------- | ---------------------------------------------- | -| `ttl` | automatic unloading of models after a timeout | -| `macros` | reusable snippets to use in configurations | -| `groups` | run multiple models at a time | -| `hooks` | event driven functionality | -| `env` | define environment variables per model | -| `aliases` | serve a model with different names | -| `filters` | modify requests before sending to the upstream | -| `...` | And many more tweaks | +| Feature | Description | +| ----------------- | ------------------------------------------------------- | +| `ttl` | automatic unloading of models after a timeout | +| `macros` | reusable snippets to use in configurations | +| `groups` | run multiple models at a time | +| `hooks` | event driven functionality | +| `env` | define environment variables per model | +| `aliases` | serve a model with different names | +| `filters` | modify requests before sending to the upstream | +| `rpcHealthCheck` | monitor RPC server health for distributed inference | +| `...` | And many more tweaks | ## Full Configuration Example diff --git a/proxy/config/config.go b/proxy/config/config.go index c4387f40..4b7dbb2d 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -3,6 +3,7 @@ package config import ( "fmt" "io" + "net" "net/url" "os" "regexp" @@ -533,6 +534,48 @@ func SanitizeCommand(cmdStr string) ([]string, error) { return args, nil } +// ParseRPCEndpoints extracts RPC endpoints from command string +// Handles: --rpc host:port,host2:port2 or --rpc=host:port or -rpc host:port +func ParseRPCEndpoints(cmdStr string) ([]string, error) { + args, err := SanitizeCommand(cmdStr) + if err != nil { + return nil, err + } + + var endpoints []string + for i, arg := range args { + if arg == "--rpc" || arg == "-rpc" { + if i+1 < len(args) { + endpoints = parseEndpointList(args[i+1]) + } + } else if strings.HasPrefix(arg, "--rpc=") { + endpoints = parseEndpointList(strings.TrimPrefix(arg, "--rpc=")) + } else if strings.HasPrefix(arg, "-rpc=") { + endpoints = parseEndpointList(strings.TrimPrefix(arg, "-rpc=")) + } + } + + // Validate each endpoint + for _, ep := range endpoints { + if _, _, err := net.SplitHostPort(ep); err != nil { + return nil, fmt.Errorf("invalid RPC endpoint %q: %w", ep, err) + } + } + + return endpoints, nil +} + +func parseEndpointList(s string) []string { + parts := strings.Split(s, ",") + var result []string + for _, p := range parts { + if p = strings.TrimSpace(p); p != "" { + result = append(result, p) + } + } + return result +} + func StripComments(cmdStr string) string { var cleanedLines []string for _, line := range strings.Split(cmdStr, "\n") { diff --git a/proxy/config/config_test.go b/proxy/config/config_test.go index a19cbb56..11552f9d 100644 --- a/proxy/config/config_test.go +++ b/proxy/config/config_test.go @@ -1309,3 +1309,108 @@ peers: assert.Contains(t, err.Error(), "unknown macro") }) } + +func TestParseRPCEndpoints_ValidFormats(t *testing.T) { + tests := []struct { + name string + cmd string + expected []string + }{ + { + name: "single endpoint with --rpc", + cmd: "llama-server --rpc localhost:50051 -ngl 99", + expected: []string{"localhost:50051"}, + }, + { + name: "single endpoint with --rpc=", + cmd: "llama-server --rpc=192.168.1.100:50051 -ngl 99", + expected: []string{"192.168.1.100:50051"}, + }, + { + name: "single endpoint with -rpc", + cmd: "llama-server -rpc localhost:50051 -ngl 99", + expected: []string{"localhost:50051"}, + }, + { + name: "single endpoint with -rpc=", + cmd: "llama-server -rpc=localhost:50051 -ngl 99", + expected: []string{"localhost:50051"}, + }, + { + name: "multiple endpoints comma-separated", + cmd: "llama-server --rpc 192.168.1.10:50051,192.168.1.11:50051 -ngl 99", + expected: []string{"192.168.1.10:50051", "192.168.1.11:50051"}, + }, + { + name: "multiple endpoints with spaces trimmed", + cmd: "llama-server --rpc '192.168.1.10:50051, 192.168.1.11:50051' -ngl 99", + expected: []string{"192.168.1.10:50051", "192.168.1.11:50051"}, + }, + { + name: "IPv6 endpoint", + cmd: "llama-server --rpc [::1]:50051 -ngl 99", + expected: []string{"[::1]:50051"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + endpoints, err := ParseRPCEndpoints(tt.cmd) + assert.NoError(t, err) + assert.Equal(t, tt.expected, endpoints) + }) + } +} + +func TestParseRPCEndpoints_NoRPCFlag(t *testing.T) { + cmd := "llama-server -ngl 99 -m model.gguf" + endpoints, err := ParseRPCEndpoints(cmd) + assert.NoError(t, err) + assert.Empty(t, endpoints) +} + +func TestParseRPCEndpoints_InvalidFormats(t *testing.T) { + tests := []struct { + name string + cmd string + wantErr string + }{ + { + name: "missing port", + cmd: "llama-server --rpc localhost -ngl 99", + wantErr: "invalid RPC endpoint", + }, + { + name: "invalid host:port format", + cmd: "llama-server --rpc not-a-valid-endpoint -ngl 99", + wantErr: "invalid RPC endpoint", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := ParseRPCEndpoints(tt.cmd) + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr) + }) + } +} + +func TestParseRPCEndpoints_EmptyEndpointsFiltered(t *testing.T) { + // Empty strings after commas are filtered out + cmd := "llama-server --rpc 'localhost:50051,,' -ngl 99" + endpoints, err := ParseRPCEndpoints(cmd) + assert.NoError(t, err) + assert.Equal(t, []string{"localhost:50051"}, endpoints) +} + +func TestParseRPCEndpoints_MultilineCommand(t *testing.T) { + cmd := `llama-server \ + --rpc localhost:50051 \ + -ngl 99 \ + -m model.gguf` + + endpoints, err := ParseRPCEndpoints(cmd) + assert.NoError(t, err) + assert.Equal(t, []string{"localhost:50051"}, endpoints) +} diff --git a/proxy/config/model_config.go b/proxy/config/model_config.go index 9dc37aea..e5635b24 100644 --- a/proxy/config/model_config.go +++ b/proxy/config/model_config.go @@ -36,6 +36,9 @@ type ModelConfig struct { // override global setting SendLoadingState *bool `yaml:"sendLoadingState"` + + // RPC health checking + RPCHealthCheck bool `yaml:"rpcHealthCheck"` } func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { @@ -53,6 +56,7 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { ConcurrencyLimit: 0, Name: "", Description: "", + RPCHealthCheck: false, } // the default cmdStop to taskkill /f /t /pid ${PID} diff --git a/proxy/process.go b/proxy/process.go index 41427059..dee5f962 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -79,18 +79,25 @@ type Process struct { // track the number of failed starts failedStartCount int + + // RPC health checking + rpcEndpoints []string + rpcHealthy atomic.Bool + rpcHealthTicker *time.Ticker + rpcHealthCancel context.CancelFunc + shutdownCtx context.Context // from ProxyManager for graceful shutdown } -func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor) *Process { +func NewProcess(ID string, healthCheckTimeout int, modelConfig config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor, shutdownCtx context.Context) *Process { concurrentLimit := 10 - if config.ConcurrencyLimit > 0 { - concurrentLimit = config.ConcurrencyLimit + if modelConfig.ConcurrencyLimit > 0 { + concurrentLimit = modelConfig.ConcurrencyLimit } // Setup the reverse proxy. - proxyURL, err := url.Parse(config.Proxy) + proxyURL, err := url.Parse(modelConfig.Proxy) if err != nil { - proxyLogger.Errorf("<%s> invalid proxy URL %q: %v", ID, config.Proxy, err) + proxyLogger.Errorf("<%s> invalid proxy URL %q: %v", ID, modelConfig.Proxy, err) } var reverseProxy *httputil.ReverseProxy @@ -105,9 +112,9 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr } } - return &Process{ + p := &Process{ ID: ID, - config: config, + config: modelConfig, cmd: nil, reverseProxy: reverseProxy, cancelUpstream: nil, @@ -124,7 +131,23 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr // stop timeout gracefulStopTimeout: 10 * time.Second, cmdWaitChan: make(chan struct{}), + shutdownCtx: shutdownCtx, + } + + // Parse RPC endpoints if health checking enabled + if modelConfig.RPCHealthCheck { + endpoints, err := config.ParseRPCEndpoints(modelConfig.Cmd) + if err != nil { + proxyLogger.Errorf("<%s> failed to parse RPC endpoints: %v", ID, err) + } else if len(endpoints) == 0 { + proxyLogger.Warnf("<%s> rpcHealthCheck enabled but no --rpc flag found in cmd", ID) + } else { + p.rpcEndpoints = endpoints + p.rpcHealthy.Store(true) // assume healthy initially + } } + + return p } // LogMonitor returns the log monitor associated with the process. @@ -362,6 +385,7 @@ func (p *Process) start() error { return fmt.Errorf("failed to set Process state to ready: current state: %v, error: %v", curState, err) } else { p.failedStartCount = 0 + p.startRPCHealthChecker() return nil } } @@ -385,6 +409,8 @@ func (p *Process) StopImmediately() { return } + p.stopRPCHealthChecker() + p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState()) if curState, err := p.swapState(StateReady, StateStopping); err != nil { p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState) @@ -877,3 +903,74 @@ func (s *statusResponseWriter) Flush() { flusher.Flush() } } + +// startRPCHealthChecker launches background goroutine for RPC health monitoring +func (p *Process) startRPCHealthChecker() { + if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 { + return + } + + ctx, cancel := context.WithCancel(p.shutdownCtx) + p.rpcHealthCancel = cancel + p.rpcHealthTicker = time.NewTicker(30 * time.Second) + + go func() { + defer p.rpcHealthTicker.Stop() + + // Run initial check immediately + p.checkRPCHealth() + + for { + select { + case <-ctx.Done(): + p.proxyLogger.Debugf("<%s> RPC health checker shutting down", p.ID) + return + case <-p.rpcHealthTicker.C: + if p.CurrentState() != StateReady { + return // Process no longer ready, exit + } + p.checkRPCHealth() + } + } + }() +} + +func (p *Process) checkRPCHealth() { + allHealthy := true + + for _, endpoint := range p.rpcEndpoints { + dialer := net.Dialer{Timeout: 500 * time.Millisecond} + conn, err := dialer.Dial("tcp", endpoint) + if err != nil { + p.proxyLogger.Warnf("<%s> RPC endpoint %s unhealthy: %v", p.ID, endpoint, err) + allHealthy = false + break + } + conn.Close() + } + + wasHealthy := p.rpcHealthy.Load() + p.rpcHealthy.Store(allHealthy) + + // Log state changes + if wasHealthy && !allHealthy { + p.proxyLogger.Infof("<%s> RPC endpoints now UNHEALTHY", p.ID) + } else if !wasHealthy && allHealthy { + p.proxyLogger.Infof("<%s> RPC endpoints now HEALTHY", p.ID) + } +} + +func (p *Process) stopRPCHealthChecker() { + if p.rpcHealthCancel != nil { + p.rpcHealthCancel() + p.rpcHealthCancel = nil + } +} + +// IsRPCHealthy returns true if RPC health checking is disabled or all endpoints healthy +func (p *Process) IsRPCHealthy() bool { + if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 { + return true // not using RPC health checks + } + return p.rpcHealthy.Load() +} diff --git a/proxy/process_test.go b/proxy/process_test.go index 3881c3dd..87e31d6d 100644 --- a/proxy/process_test.go +++ b/proxy/process_test.go @@ -1,6 +1,7 @@ package proxy import ( + "context" "fmt" "net/http" "net/http/httptest" @@ -33,7 +34,7 @@ func TestProcess_AutomaticallyStartsUpstream(t *testing.T) { config := getTestSimpleResponderConfig(expectedMessage) // Create a process - process := NewProcess("test-process", 5, config, debugLogger, debugLogger) + process := NewProcess("test-process", 5, config, debugLogger, debugLogger, context.Background()) defer process.Stop() req := httptest.NewRequest("GET", "/test", nil) @@ -69,7 +70,7 @@ func TestProcess_WaitOnMultipleStarts(t *testing.T) { expectedMessage := "testing91931" config := getTestSimpleResponderConfig(expectedMessage) - process := NewProcess("test-process", 5, config, debugLogger, debugLogger) + process := NewProcess("test-process", 5, config, debugLogger, debugLogger, context.Background()) defer process.Stop() var wg sync.WaitGroup @@ -97,7 +98,7 @@ func TestProcess_BrokenModelConfig(t *testing.T) { CheckEndpoint: "/health", } - process := NewProcess("broken", 1, config, debugLogger, debugLogger) + process := NewProcess("broken", 1, config, debugLogger, debugLogger, context.Background()) req := httptest.NewRequest("GET", "/", nil) w := httptest.NewRecorder() @@ -122,7 +123,7 @@ func TestProcess_UnloadAfterTTL(t *testing.T) { config.UnloadAfter = 3 // seconds assert.Equal(t, 3, config.UnloadAfter) - process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger) + process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger, context.Background()) defer process.Stop() // this should take 4 seconds @@ -164,7 +165,7 @@ func TestProcess_LowTTLValue(t *testing.T) { config.UnloadAfter = 1 // second assert.Equal(t, 1, config.UnloadAfter) - process := NewProcess("ttl", 2, config, debugLogger, debugLogger) + process := NewProcess("ttl", 2, config, debugLogger, debugLogger, context.Background()) defer process.Stop() for i := 0; i < 100; i++ { @@ -191,7 +192,7 @@ func TestProcess_HTTPRequestsHaveTimeToFinish(t *testing.T) { expectedMessage := "12345" config := getTestSimpleResponderConfig(expectedMessage) - process := NewProcess("t", 10, config, debugLogger, debugLogger) + process := NewProcess("t", 10, config, debugLogger, debugLogger, context.Background()) defer process.Stop() results := map[string]string{ @@ -264,7 +265,7 @@ func TestProcess_SwapState(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - p := NewProcess("test", 10, getTestSimpleResponderConfig("test"), debugLogger, debugLogger) + p := NewProcess("test", 10, getTestSimpleResponderConfig("test"), debugLogger, debugLogger, context.Background()) p.state = test.currentState resultState, err := p.swapState(test.expectedState, test.newState) @@ -297,7 +298,7 @@ func TestProcess_ShutdownInterruptsHealthCheck(t *testing.T) { config.Proxy = "http://localhost:9998/test" healthCheckTTLSeconds := 30 - process := NewProcess("test-process", healthCheckTTLSeconds, config, debugLogger, debugLogger) + process := NewProcess("test-process", healthCheckTTLSeconds, config, debugLogger, debugLogger, context.Background()) // make it a lot faster process.healthCheckLoopInterval = time.Second @@ -332,7 +333,7 @@ func TestProcess_ExitInterruptsHealthCheck(t *testing.T) { CheckEndpoint: "/health", } - process := NewProcess("sleepy", checkHealthTimeout, config, debugLogger, debugLogger) + process := NewProcess("sleepy", checkHealthTimeout, config, debugLogger, debugLogger, context.Background()) process.healthCheckLoopInterval = time.Second // make it faster err := process.start() assert.Equal(t, "upstream command exited prematurely but successfully", err.Error()) @@ -350,7 +351,7 @@ func TestProcess_ConcurrencyLimit(t *testing.T) { // only allow 1 concurrent request at a time config.ConcurrencyLimit = 1 - process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger) + process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger, context.Background()) assert.Equal(t, 1, cap(process.concurrencyLimitSemaphore)) defer process.Stop() @@ -375,7 +376,7 @@ func TestProcess_StopImmediately(t *testing.T) { expectedMessage := "test_stop_immediate" config := getTestSimpleResponderConfig(expectedMessage) - process := NewProcess("stop_immediate", 2, config, debugLogger, debugLogger) + process := NewProcess("stop_immediate", 2, config, debugLogger, debugLogger, context.Background()) defer process.Stop() err := process.start() @@ -415,7 +416,7 @@ func TestProcess_ForceStopWithKill(t *testing.T) { CheckEndpoint: "/health", } - process := NewProcess("stop_immediate", 2, conf, debugLogger, debugLogger) + process := NewProcess("stop_immediate", 2, conf, debugLogger, debugLogger, context.Background()) defer process.Stop() // reduce to make testing go faster @@ -465,7 +466,7 @@ func TestProcess_StopCmd(t *testing.T) { conf.CmdStop = "kill -TERM ${PID}" } - process := NewProcess("testStopCmd", 2, conf, debugLogger, debugLogger) + process := NewProcess("testStopCmd", 2, conf, debugLogger, debugLogger, context.Background()) defer process.Stop() err := process.start() @@ -485,8 +486,8 @@ func TestProcess_EnvironmentSetCorrectly(t *testing.T) { // ensure the additiona variables are appended to the process' environment configWEnv.Env = append(configWEnv.Env, "TEST_ENV1=1", "TEST_ENV2=2") - process1 := NewProcess("env_test", 2, conf, debugLogger, debugLogger) - process2 := NewProcess("env_test", 2, configWEnv, debugLogger, debugLogger) + process1 := NewProcess("env_test", 2, conf, debugLogger, debugLogger, context.Background()) + process2 := NewProcess("env_test", 2, configWEnv, debugLogger, debugLogger, context.Background()) process1.start() defer process1.Stop() @@ -521,7 +522,7 @@ func TestProcess_ReverseProxyPanicIsHandled(t *testing.T) { expectedMessage := "panic_test" config := getTestSimpleResponderConfig(expectedMessage) - process := NewProcess("panic-test", 5, config, debugLogger, debugLogger) + process := NewProcess("panic-test", 5, config, debugLogger, debugLogger, context.Background()) defer process.Stop() // Start the process diff --git a/proxy/processgroup.go b/proxy/processgroup.go index b401d8a6..c920f302 100644 --- a/proxy/processgroup.go +++ b/proxy/processgroup.go @@ -1,6 +1,7 @@ package proxy import ( + "context" "fmt" "net/http" "slices" @@ -24,9 +25,11 @@ type ProcessGroup struct { // map of current processes processes map[string]*Process lastUsedProcess string + + shutdownCtx context.Context } -func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup { +func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor, shutdownCtx context.Context) *ProcessGroup { groupConfig, ok := config.Groups[id] if !ok { panic("Unable to find configuration for group id: " + id) @@ -41,13 +44,14 @@ func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, u proxyLogger: proxyLogger, upstreamLogger: upstreamLogger, processes: make(map[string]*Process), + shutdownCtx: shutdownCtx, } // Create a Process for each member in the group for _, modelID := range groupConfig.Members { modelConfig, modelID, _ := pg.config.FindConfig(modelID) processLogger := NewLogMonitorWriter(upstreamLogger) - process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger) + process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger, shutdownCtx) pg.processes[modelID] = process } diff --git a/proxy/processgroup_test.go b/proxy/processgroup_test.go index 6b90f443..55e5276a 100644 --- a/proxy/processgroup_test.go +++ b/proxy/processgroup_test.go @@ -2,6 +2,7 @@ package proxy import ( "bytes" + "context" "net/http" "net/http/httptest" "sync" @@ -35,12 +36,12 @@ var processGroupTestConfig = config.AddDefaultGroupToConfig(config.Config{ }) func TestProcessGroup_DefaultHasCorrectModel(t *testing.T) { - pg := NewProcessGroup(config.DEFAULT_GROUP_ID, processGroupTestConfig, testLogger, testLogger) + pg := NewProcessGroup(config.DEFAULT_GROUP_ID, processGroupTestConfig, testLogger, testLogger, context.Background()) assert.True(t, pg.HasMember("model5")) } func TestProcessGroup_HasMember(t *testing.T) { - pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger) + pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger, context.Background()) assert.True(t, pg.HasMember("model1")) assert.True(t, pg.HasMember("model2")) assert.False(t, pg.HasMember("model3")) @@ -74,7 +75,7 @@ func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) { }, }) - pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger) + pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger, context.Background()) defer pg.StopProcesses(StopWaitForInflightRequest) tests := []string{"model1", "model2", "model3", "model4", "model5"} @@ -96,7 +97,7 @@ func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) { } func TestProcessGroup_ProxyRequestSwapIsFalse(t *testing.T) { - pg := NewProcessGroup("G2", processGroupTestConfig, testLogger, testLogger) + pg := NewProcessGroup("G2", processGroupTestConfig, testLogger, testLogger, context.Background()) defer pg.StopProcesses(StopWaitForInflightRequest) tests := []string{"model3", "model4"} diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 5a016bc5..446362ce 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -167,7 +167,7 @@ func New(proxyConfig config.Config) *ProxyManager { // create the process groups for groupID := range proxyConfig.Groups { - processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger) + processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger, shutdownCtx) pm.processGroups[groupID] = processGroup } @@ -475,6 +475,16 @@ func (pm *ProxyManager) listModelsHandler(c *gin.Context) { continue } + // Filter models with unhealthy RPC endpoints + if processGroup := pm.findGroupByModelName(id); processGroup != nil { + if process, ok := processGroup.GetMember(id); ok { + if !process.IsRPCHealthy() { + pm.proxyLogger.Debugf("<%s> filtered from /v1/models (unhealthy RPC)", id) + continue + } + } + } + data = append(data, newRecord(id, modelConfig)) // Include aliases @@ -627,6 +637,15 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) { return } + // Check RPC health before processing request + if process, ok := processGroup.GetMember(modelID); ok { + if !process.IsRPCHealthy() { + pm.sendErrorResponse(c, http.StatusServiceUnavailable, + fmt.Sprintf("model %s unavailable (RPC endpoints unhealthy)", modelID)) + return + } + } + // issue #69 allow custom model names to be sent to upstream useModelName := pm.config.Models[modelID].UseModelName if useModelName != "" { From 29ef36405921a5239459f7968b9f22f006a2e4d0 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 30 Jan 2026 16:27:20 +0100 Subject: [PATCH 03/13] proxy/config: fix RPC endpoint parsing on Windows Fix parseEndpointList to handle single and double quotes that are treated as literal characters on Windows. - Strip surrounding quotes before parsing comma-separated endpoints - Fixes test failures on Windows CI Co-Authored-By: Claude Sonnet 4.5 --- proxy/config/config.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/proxy/config/config.go b/proxy/config/config.go index 4b7dbb2d..ac8a9b54 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -566,6 +566,15 @@ func ParseRPCEndpoints(cmdStr string) ([]string, error) { } func parseEndpointList(s string) []string { + // Strip surrounding quotes (both single and double) that may be present + // on Windows where single quotes are not handled by the shell parser + s = strings.TrimSpace(s) + if len(s) >= 2 { + if (s[0] == '\'' && s[len(s)-1] == '\'') || (s[0] == '"' && s[len(s)-1] == '"') { + s = s[1 : len(s)-1] + } + } + parts := strings.Split(s, ",") var result []string for _, p := range parts { From ac074d15ea3962f0019b1647d60f0ccf3e05743a Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 30 Jan 2026 18:12:38 +0100 Subject: [PATCH 04/13] fix unit test --- proxy/config/config.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/proxy/config/config.go b/proxy/config/config.go index ac8a9b54..1866d03f 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -566,9 +566,10 @@ func ParseRPCEndpoints(cmdStr string) ([]string, error) { } func parseEndpointList(s string) []string { - // Strip surrounding quotes (both single and double) that may be present - // on Windows where single quotes are not handled by the shell parser s = strings.TrimSpace(s) + + // Strip surrounding quotes (both single and double) from the whole string + // if they match. This handles cases like: "host:port,host2:port2" if len(s) >= 2 { if (s[0] == '\'' && s[len(s)-1] == '\'') || (s[0] == '"' && s[len(s)-1] == '"') { s = s[1 : len(s)-1] @@ -578,7 +579,12 @@ func parseEndpointList(s string) []string { parts := strings.Split(s, ",") var result []string for _, p := range parts { - if p = strings.TrimSpace(p); p != "" { + p = strings.TrimSpace(p) + // Strip any remaining leading/trailing quotes from individual parts + // This handles Windows where shlex doesn't handle single quotes and + // may split 'host:port, host2:port' into "'host:port," and "host2:port'" + p = strings.Trim(p, "'\"") + if p != "" { result = append(result, p) } } From c8f27617df918bbb7eb53e35ed12a2534d36c4eb Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 30 Jan 2026 23:03:58 +0100 Subject: [PATCH 05/13] rework web interface --- llama-swap.go | 16 +- proxy/proxymanager.go | 9 + proxy/proxymanager_api.go | 64 ++++++ test-config.yaml | 264 ++++++++++++++++++++++ ui-svelte/package-lock.json | 200 ++++++++++++++++- ui-svelte/package.json | 6 + ui-svelte/src/App.svelte | 2 + ui-svelte/src/components/Header.svelte | 8 + ui-svelte/src/routes/Config.svelte | 300 +++++++++++++++++++++++++ 9 files changed, 857 insertions(+), 12 deletions(-) create mode 100644 test-config.yaml create mode 100644 ui-svelte/src/routes/Config.svelte diff --git a/llama-swap.go b/llama-swap.go index 9706e07d..60ccbc73 100644 --- a/llama-swap.go +++ b/llama-swap.go @@ -97,6 +97,7 @@ func main() { currentPM.Shutdown() newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) + newPM.SetConfigPath(*configPath) srv.Handler = newPM fmt.Println("Configuration Reloaded") @@ -114,6 +115,7 @@ func main() { } newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) + newPM.SetConfigPath(*configPath) srv.Handler = newPM } } @@ -121,13 +123,15 @@ func main() { // load the initial proxy manager reloadProxyManager() debouncedReload := debounce(time.Second, reloadProxyManager) - if *watchConfig { - defer event.On(func(e proxy.ConfigFileChangedEvent) { - if e.ReloadingState == proxy.ReloadingStateStart { - debouncedReload() - } - })() + // Always listen for API-triggered config changes + defer event.On(func(e proxy.ConfigFileChangedEvent) { + if e.ReloadingState == proxy.ReloadingStateStart { + debouncedReload() + } + })() + + if *watchConfig { fmt.Println("Watching Configuration for changes") go func() { absConfigPath, err := filepath.Abs(*configPath) diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 5a016bc5..bf227120 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -52,6 +52,9 @@ type ProxyManager struct { commit string version string + // config file path for editing + configPath string + // peer proxy see: #296, #433 peerProxy *PeerProxy } @@ -966,3 +969,9 @@ func (pm *ProxyManager) SetVersion(buildDate string, commit string, version stri pm.commit = commit pm.version = version } + +func (pm *ProxyManager) SetConfigPath(configPath string) { + pm.Lock() + defer pm.Unlock() + pm.configPath = configPath +} diff --git a/proxy/proxymanager_api.go b/proxy/proxymanager_api.go index fe4326d0..f590cd6e 100644 --- a/proxy/proxymanager_api.go +++ b/proxy/proxymanager_api.go @@ -4,7 +4,9 @@ import ( "context" "encoding/json" "fmt" + "io" "net/http" + "os" "sort" "strings" @@ -31,6 +33,9 @@ func addApiHandlers(pm *ProxyManager) { apiGroup.GET("/events", pm.apiSendEvents) apiGroup.GET("/metrics", pm.apiGetMetrics) apiGroup.GET("/version", pm.apiGetVersion) + apiGroup.GET("/config/current", pm.apiGetCurrentConfig) + apiGroup.GET("/config/example", pm.apiGetExampleConfig) + apiGroup.POST("/config", pm.apiUpdateConfig) } } @@ -250,3 +255,62 @@ func (pm *ProxyManager) apiGetVersion(c *gin.Context) { "build_date": pm.buildDate, }) } + +func (pm *ProxyManager) apiGetCurrentConfig(c *gin.Context) { + pm.Lock() + configPath := pm.configPath + pm.Unlock() + + if configPath == "" { + pm.sendErrorResponse(c, http.StatusNotFound, "Config file path not set") + return + } + + data, err := os.ReadFile(configPath) + if err != nil { + pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to read config file: %v", err)) + return + } + + c.Data(http.StatusOK, "text/yaml; charset=utf-8", data) +} + +func (pm *ProxyManager) apiGetExampleConfig(c *gin.Context) { + data, err := os.ReadFile("config.example.yaml") + if err != nil { + pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to read example config: %v", err)) + return + } + + c.Data(http.StatusOK, "text/yaml; charset=utf-8", data) +} + +func (pm *ProxyManager) apiUpdateConfig(c *gin.Context) { + pm.Lock() + configPath := pm.configPath + pm.Unlock() + + if configPath == "" { + pm.sendErrorResponse(c, http.StatusBadRequest, "Config file path not set") + return + } + + body, err := io.ReadAll(c.Request.Body) + if err != nil { + pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("Failed to read request body: %v", err)) + return + } + + // Write to config file + if err := os.WriteFile(configPath, body, 0644); err != nil { + pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to write config file: %v", err)) + return + } + + // Trigger config reload event + event.Emit(ConfigFileChangedEvent{ + ReloadingState: ReloadingStateStart, + }) + + c.JSON(http.StatusOK, gin.H{"message": "Config updated successfully. Reloading..."}) +} diff --git a/test-config.yaml b/test-config.yaml new file mode 100644 index 00000000..15fd5784 --- /dev/null +++ b/test-config.yaml @@ -0,0 +1,264 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json +# +# llama-swap configuration for 16GB VRAM AMD Radeon RX 6800 XT (gfx1030) +# Optimized for headless system with no display overhead +# ------------------------------------- + +healthCheckTimeout: 300 +logLevel: info +logTimeFormat: "rfc3339" +logToStdout: "proxy" +metricsMaxInMemory: 1000 +startPort: 10001 +sendLoadingState: false +includeAliasesInList: false + +macros: + "latest-llama": > + /home/svc-gpgpu/.local/bin/llama-server + --port ${PORT} --host 0.0.0.0 -b 512 -ub 32 -np 1 + "default_ctx": 4096 + "rocm_device": "0" + +models: + # ======================================== + # GENERAL PURPOSE MODELS + # ======================================== + + "qwen3:14b-q5_k_m-32768": + cmd: | + ${latest-llama} + -hf Qwen/Qwen3-14B-GGUF:q5_k_m + --ctx-size 32768 + -fa auto + -ctv q8_0 + -ctk q8_0 + -ngl 99 + --jinja + --mmap + -b 512 + name: "qwen3:14b-q5_k_m-32768" + description: "VRAM: 12505 MiB" + ttl: 600 + + "qwen3:8b-q5_k_m-40960": + cmd: | + ${latest-llama} + -hf Qwen/Qwen3-8B-GGUF:q5_k_m + --ctx-size 40960 + -fa auto + -ctv q8_0 + -ctk q8_0 + -ngl 99 + --jinja + --mmap + -b 512 + name: "qwen3:8b-q5_k_m-40960" + description: "VRAM: 8491 MiB" + ttl: 600 + + "qwen3:8b-q8_0-32768": + cmd: | + ${latest-llama} + -hf Qwen/Qwen3-8B-GGUF:q8_0 + --ctx-size 32768 + -fa auto + -ctv q8_0 + -ctk q8_0 + -ngl 99 + --jinja + --mmap + -b 512 + name: "qwen3:8b-q8_0-32768" + description: "VRAM: 10381 MiB" + ttl: 600 + + "ministral-3:14b-instruct-q5_k_m-20480-vision": + cmd: | + ${latest-llama} + -hf mistralai/Ministral-3-14B-Instruct-2512-GGUF:q5_k_m + --ctx-size 20480 + -fa off + -ngl 99 + --mmap + --jinja + --mmproj-auto + name: "ministral-3:14b-instruct-q5_k_m-20480-vision" + description: "VRAM: 13184 MiB" + ttl: 600 + + "ministral-3:14b-reasoning-q5_k_m-20480-vision": + cmd: | + ${latest-llama} + -hf mistralai/Ministral-3-14B-Reasoning-2512-GGUF:q5_k_m + --ctx-size 20480 + -fa off + -ngl 99 + --mmap + --jinja + --mmproj-auto + name: "ministral-3:14b-reasoning-q5_k_m-20480-vision" + description: "VRAM: 13184 MiB" + ttl: 600 + + "ministral-3:14b-instruct-q5_k_m-32768": + cmd: | + ${latest-llama} + -hf mistralai/Ministral-3-14B-Instruct-2512-GGUF:q5_k_m + --ctx-size 32768 + -fa off + -ngl 99 + --mmap + --jinja + --no-mmproj + name: "ministral-3:14b-instruct-q5_k_m-32768" + description: "VRAM: 14224 MiB" + ttl: 600 + + "ministral-3:14b-reasoning-q5_k_m-32768": + cmd: | + ${latest-llama} + -hf mistralai/Ministral-3-14B-Reasoning-2512-GGUF:q5_k_m + --ctx-size 32768 + -fa off + -ngl 99 + --mmap + --jinja + --no-mmproj + name: "ministral-3:14b-reasoning-q5_k_m-32768" + description: "VRAM: 14224 MiB" + ttl: 600 + + # ======================================== + # UTILITY MODELS (General Purpose) + # ======================================== + + "embeddinggemma:300m": + cmd: | + ${latest-llama} + -hf gaianet/embeddinggemma-300m-GGUF + --ctx-size 2048 + -fa off + -ngl 99 + --embeddings + --pooling mean + -b 1024 + -ub 1024 + name: "embeddinggemma:300m" + description: "VRAM: 512 MiB" + ttl: 3600 + + "bge-reranker-v2-m3": + cmd: | + ${latest-llama} + -hf Felladrin/bge-reranker-v2-m3-Q8_0-GGUF + --ctx-size 8192 + -ngl 99 + --mmap + --rerank + --embedding + --pooling rank + -b 8192 + -ub 8192 + name: "bge-reranker-v2-m3" + description: "VRAM: 1077 MiB" + ttl: 3600 + + # ======================================== + # CODING MODELS + # ======================================== + + "qwen2.5-coder:14b-q5_k_m-32768": + cmd: | + ${latest-llama} + -hf Qwen/Qwen2.5-Coder-14B-Instruct-GGUF:q5_k_m + --ctx-size 32768 + -fa auto + -ctv q8_0 + -ctk q8_0 + -ngl 99 + --jinja + --mmap + -b 512 + name: "qwen2.5-coder:14b-q5_k_m-32768" + description: "VRAM: ~12500 MiB" + ttl: 600 + + "qwen2.5-coder:1.5b-q4_k_m-autocomplete": + cmd: | + ${latest-llama} + -hf Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF:q4_k_m + --ctx-size 2048 + -fa off + -ngl 99 + -b 128 + -ub 32 + --mmap + --no-warmup + name: "qwen2.5-coder:1.5b-q4_k_m-autocomplete" + description: "VRAM: ~1000 MiB" + ttl: 3600 + + # ======================================== + # PERSISTENT CPU MODEL + # ======================================== + + "qwen3:1.7b-cpu-json": + cmd: | + ${latest-llama} + -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_M + --ctx-size 8192 + -fa off + -ngl 0 + -b 512 + --jinja + --mmap + name: "qwen3:1.7b-cpu-json" + description: "CPU-only - permanent RAM resident for tags/titles/queries" + ttl: 0 + +# ======================================== +# GROUPS CONFIGURATION +# ======================================== + +groups: + # General purpose models can coexist with utility models + # When loaded, they prevent coding group from running + "general-purpose": + swap: false # All models in group can run simultaneously + exclusive: true # Unloads other exclusive groups when active + members: + - "qwen3:14b-q5_k_m-32768" + - "qwen3:8b-q5_k_m-40960" + - "qwen3:8b-q8_0-32768" + - "ministral-3:14b-instruct-q5_k_m-20480-vision" + - "ministral-3:14b-reasoning-q5_k_m-20480-vision" + - "ministral-3:14b-instruct-q5_k_m-32768" + - "ministral-3:14b-reasoning-q5_k_m-32768" + - "bge-reranker-v2-m3" + + # Coding models can coexist with each other + # When loaded, they prevent general-purpose group from running + "coding": + swap: false # Both coder models can run simultaneously + exclusive: true # Unloads other exclusive groups when active + members: + - "qwen2.5-coder:14b-q5_k_m-32768" + - "qwen2.5-coder:1.5b-q4_k_m-autocomplete" + + # CPU-based persistent model - never unloaded, doesn't interfere + "persistent-cpu": + swap: false # No swapping (only one model anyway) + exclusive: false # Doesn't unload other groups + persistent: true # Other groups cannot unload this + members: + - "qwen3:1.7b-cpu-json" + +# ======================================== +# STARTUP HOOKS +# ======================================== + +hooks: + on_startup: + preload: + - "qwen3:1.7b-cpu-json" diff --git a/ui-svelte/package-lock.json b/ui-svelte/package-lock.json index 93150075..57723b8e 100644 --- a/ui-svelte/package-lock.json +++ b/ui-svelte/package-lock.json @@ -8,6 +8,12 @@ "name": "ui-svelte", "version": "0.0.0", "dependencies": { + "@codemirror/lang-yaml": "^6.1.2", + "@codemirror/language": "^6.12.1", + "@codemirror/state": "^6.5.4", + "@codemirror/view": "^6.39.12", + "codemirror": "^6.0.2", + "js-yaml": "^4.1.1", "svelte-spa-router": "^4.0.1" }, "devDependencies": { @@ -21,6 +27,102 @@ "vite": "^6.3.5" } }, + "node_modules/@codemirror/autocomplete": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/@codemirror/autocomplete/-/autocomplete-6.20.0.tgz", + "integrity": "sha512-bOwvTOIJcG5FVo5gUUupiwYh8MioPLQ4UcqbcRf7UQ98X90tCa9E1kZ3Z7tqwpZxYyOvh1YTYbmZE9RTfTp5hg==", + "license": "MIT", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.17.0", + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@codemirror/commands": { + "version": "6.10.1", + "resolved": "https://registry.npmjs.org/@codemirror/commands/-/commands-6.10.1.tgz", + "integrity": "sha512-uWDWFypNdQmz2y1LaNJzK7fL7TYKLeUAU0npEC685OKTF3KcQ2Vu3klIM78D7I6wGhktme0lh3CuQLv0ZCrD9Q==", + "license": "MIT", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.4.0", + "@codemirror/view": "^6.27.0", + "@lezer/common": "^1.1.0" + } + }, + "node_modules/@codemirror/lang-yaml": { + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/@codemirror/lang-yaml/-/lang-yaml-6.1.2.tgz", + "integrity": "sha512-dxrfG8w5Ce/QbT7YID7mWZFKhdhsaTNOYjOkSIMt1qmC4VQnXSDSYVHHHn8k6kJUfIhtLo8t1JJgltlxWdsITw==", + "license": "MIT", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.2.0", + "@lezer/lr": "^1.0.0", + "@lezer/yaml": "^1.0.0" + } + }, + "node_modules/@codemirror/language": { + "version": "6.12.1", + "resolved": "https://registry.npmjs.org/@codemirror/language/-/language-6.12.1.tgz", + "integrity": "sha512-Fa6xkSiuGKc8XC8Cn96T+TQHYj4ZZ7RdFmXA3i9xe/3hLHfwPZdM+dqfX0Cp0zQklBKhVD8Yzc8LS45rkqcwpQ==", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.23.0", + "@lezer/common": "^1.5.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0", + "style-mod": "^4.0.0" + } + }, + "node_modules/@codemirror/lint": { + "version": "6.9.3", + "resolved": "https://registry.npmjs.org/@codemirror/lint/-/lint-6.9.3.tgz", + "integrity": "sha512-y3YkYhdnhjDBAe0VIA0c4wVoFOvnp8CnAvfLqi0TqotIv92wIlAAP7HELOpLBsKwjAX6W92rSflA6an/2zBvXw==", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.35.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/search": { + "version": "6.6.0", + "resolved": "https://registry.npmjs.org/@codemirror/search/-/search-6.6.0.tgz", + "integrity": "sha512-koFuNXcDvyyotWcgOnZGmY7LZqEOXZaaxD/j6n18TCLx2/9HieZJ5H6hs1g8FiRxBD0DNfs0nXn17g872RmYdw==", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.37.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/state": { + "version": "6.5.4", + "resolved": "https://registry.npmjs.org/@codemirror/state/-/state-6.5.4.tgz", + "integrity": "sha512-8y7xqG/hpB53l25CIoit9/ngxdfoG+fx+V3SHBrinnhOtLvKHRyAJJuHzkWrR4YXXLX8eXBsejgAAxHUOdW1yw==", + "license": "MIT", + "dependencies": { + "@marijn/find-cluster-break": "^1.0.0" + } + }, + "node_modules/@codemirror/view": { + "version": "6.39.12", + "resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.39.12.tgz", + "integrity": "sha512-f+/VsHVn/kOA9lltk/GFzuYwVVAKmOnNjxbrhkk3tPHntFqjWeI2TbIXx006YkBkqC10wZ4NsnWXCQiFPeAISQ==", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.5.0", + "crelt": "^1.0.6", + "style-mod": "^4.1.0", + "w3c-keyname": "^2.2.4" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.25.12", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz", @@ -513,6 +615,47 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@lezer/common": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.5.0.tgz", + "integrity": "sha512-PNGcolp9hr4PJdXR4ix7XtixDrClScvtSCYW3rQG106oVMOOI+jFb+0+J3mbeL/53g1Zd6s0kJzaw6Ri68GmAA==", + "license": "MIT" + }, + "node_modules/@lezer/highlight": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.3.tgz", + "integrity": "sha512-qXdH7UqTvGfdVBINrgKhDsVTJTxactNNxLk7+UMwZhU13lMHaOBlJe9Vqp907ya56Y3+ed2tlqzys7jDkTmW0g==", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.3.0" + } + }, + "node_modules/@lezer/lr": { + "version": "1.4.8", + "resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.8.tgz", + "integrity": "sha512-bPWa0Pgx69ylNlMlPvBPryqeLYQjyJjqPx+Aupm5zydLIF3NE+6MMLT8Yi23Bd9cif9VS00aUebn+6fDIGBcDA==", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@lezer/yaml": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@lezer/yaml/-/yaml-1.0.4.tgz", + "integrity": "sha512-2lrrHqxalACEbxIbsjhqGpSW8kWpUKuY6RHgnSAFZa6qK62wvnPxA8hGOwOoDbwHcOFs5M4o27mjGu+P7TvBmw==", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.4.0" + } + }, + "node_modules/@marijn/find-cluster-break": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@marijn/find-cluster-break/-/find-cluster-break-1.0.2.tgz", + "integrity": "sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==", + "license": "MIT" + }, "node_modules/@rollup/rollup-android-arm-eabi": { "version": "4.57.0", "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.57.0.tgz", @@ -879,7 +1022,6 @@ "integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@sveltejs/vite-plugin-svelte-inspector": "^4.0.1", "debug": "^4.4.1", @@ -1206,7 +1348,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1214,6 +1355,12 @@ "node": ">=0.4.0" } }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "license": "Python-2.0" + }, "node_modules/aria-query": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz", @@ -1260,6 +1407,27 @@ "node": ">=6" } }, + "node_modules/codemirror": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/codemirror/-/codemirror-6.0.2.tgz", + "integrity": "sha512-VhydHotNW5w1UGK0Qj96BwSk/Zqbp9WbnyK2W/eVMv4QyF41INRGpjUhFJY7/uDNuudSc33a/PKr4iDqRduvHw==", + "license": "MIT", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/commands": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/lint": "^6.0.0", + "@codemirror/search": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.0.0" + } + }, + "node_modules/crelt": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/crelt/-/crelt-1.0.6.tgz", + "integrity": "sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==", + "license": "MIT" + }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", @@ -1438,6 +1606,18 @@ "jiti": "lib/jiti-cli.mjs" } }, + "node_modules/js-yaml": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, "node_modules/kleur": { "version": "4.1.5", "resolved": "https://registry.npmjs.org/kleur/-/kleur-4.1.5.tgz", @@ -1775,7 +1955,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -1903,13 +2082,18 @@ "node": ">=0.10.0" } }, + "node_modules/style-mod": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/style-mod/-/style-mod-4.1.3.tgz", + "integrity": "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ==", + "license": "MIT" + }, "node_modules/svelte": { "version": "5.48.5", "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.48.5.tgz", "integrity": "sha512-NB3o70OxfmnE5UPyLr8uH3IV02Q43qJVAuWigYmsSOYsS0s/rHxP0TF81blG0onF/xkhNvZw4G8NfzIX+By5ZQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@jridgewell/remapping": "^2.3.4", "@jridgewell/sourcemap-codec": "^1.5.0", @@ -2011,7 +2195,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -2026,7 +2209,6 @@ "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", @@ -2116,6 +2298,12 @@ } } }, + "node_modules/w3c-keyname": { + "version": "2.2.8", + "resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz", + "integrity": "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==", + "license": "MIT" + }, "node_modules/zimmerframe": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/zimmerframe/-/zimmerframe-1.1.4.tgz", diff --git a/ui-svelte/package.json b/ui-svelte/package.json index 53296797..fbe35b9d 100644 --- a/ui-svelte/package.json +++ b/ui-svelte/package.json @@ -20,6 +20,12 @@ "vite": "^6.3.5" }, "dependencies": { + "@codemirror/lang-yaml": "^6.1.2", + "@codemirror/language": "^6.12.1", + "@codemirror/state": "^6.5.4", + "@codemirror/view": "^6.39.12", + "codemirror": "^6.0.2", + "js-yaml": "^4.1.1", "svelte-spa-router": "^4.0.1" } } diff --git a/ui-svelte/src/App.svelte b/ui-svelte/src/App.svelte index 69216703..2e4ab9d1 100644 --- a/ui-svelte/src/App.svelte +++ b/ui-svelte/src/App.svelte @@ -5,6 +5,7 @@ import LogViewer from "./routes/LogViewer.svelte"; import Models from "./routes/Models.svelte"; import Activity from "./routes/Activity.svelte"; + import Config from "./routes/Config.svelte"; import { enableAPIEvents } from "./stores/api"; import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme"; @@ -12,6 +13,7 @@ "/": Models, "/logs": LogViewer, "/activity": Activity, + "/config": Config, "*": Models, }; diff --git a/ui-svelte/src/components/Header.svelte b/ui-svelte/src/components/Header.svelte index 73c66874..4c7553ee 100644 --- a/ui-svelte/src/components/Header.svelte +++ b/ui-svelte/src/components/Header.svelte @@ -68,6 +68,14 @@ > Logs + + Config + + + + + + + {#if validationError} +
+ Validation Error: {validationError} +
+ {/if} + + {#if error} +
+ {error} +
+ {/if} + + {#if loading} +
+
Loading configuration...
+
+ {:else} +
+ +
+

Current Config (Editable)

+
+
+ + +
+

Example Config (Reference)

+
+
+
+ {/if} + From 6f023c7993403a5d543769b7113a012c675e7b98 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 30 Jan 2026 22:32:56 +0000 Subject: [PATCH 06/13] fix error assumption healthy --- proxy/process.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy/process.go b/proxy/process.go index dee5f962..253ab189 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -143,7 +143,7 @@ func NewProcess(ID string, healthCheckTimeout int, modelConfig config.ModelConfi proxyLogger.Warnf("<%s> rpcHealthCheck enabled but no --rpc flag found in cmd", ID) } else { p.rpcEndpoints = endpoints - p.rpcHealthy.Store(true) // assume healthy initially + p.rpcHealthy.Store(false) // start unhealthy until first check passes } } From c17df42f43f4dd5070ef575566634ba90b888abf Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 30 Jan 2026 23:26:32 +0000 Subject: [PATCH 07/13] proxy: make RPC health checks independent of process state RPC health checking now runs continuously from process creation until proxy shutdown, completely independent of whether the model is loaded, starting, stopped, or in any other state. - Start health checker in NewProcess when rpcHealthCheck is enabled - Remove stopRPCHealthChecker - only stops on proxy shutdown - Remove state checks from health checker goroutine - Health status always reflects current RPC endpoint availability Previously, the health checker only ran while a process was in StateReady, causing stale health data when processes stopped. Now /v1/models always shows accurate RPC health regardless of model state. Co-Authored-By: Claude Sonnet 4.5 --- proxy/process.go | 20 +++----- proxy/process_rpc_health_test.go | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 14 deletions(-) create mode 100644 proxy/process_rpc_health_test.go diff --git a/proxy/process.go b/proxy/process.go index 253ab189..b9658e44 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -144,6 +144,8 @@ func NewProcess(ID string, healthCheckTimeout int, modelConfig config.ModelConfi } else { p.rpcEndpoints = endpoints p.rpcHealthy.Store(false) // start unhealthy until first check passes + // Start health checker immediately - runs independent of process state + p.startRPCHealthChecker() } } @@ -385,7 +387,6 @@ func (p *Process) start() error { return fmt.Errorf("failed to set Process state to ready: current state: %v, error: %v", curState, err) } else { p.failedStartCount = 0 - p.startRPCHealthChecker() return nil } } @@ -409,8 +410,6 @@ func (p *Process) StopImmediately() { return } - p.stopRPCHealthChecker() - p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState()) if curState, err := p.swapState(StateReady, StateStopping); err != nil { p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState) @@ -904,7 +903,9 @@ func (s *statusResponseWriter) Flush() { } } -// startRPCHealthChecker launches background goroutine for RPC health monitoring +// startRPCHealthChecker launches background goroutine for RPC health monitoring. +// Runs independently of process state - checks RPC endpoints regardless of whether +// the model is loaded, starting, stopped, etc. func (p *Process) startRPCHealthChecker() { if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 { return @@ -926,9 +927,7 @@ func (p *Process) startRPCHealthChecker() { p.proxyLogger.Debugf("<%s> RPC health checker shutting down", p.ID) return case <-p.rpcHealthTicker.C: - if p.CurrentState() != StateReady { - return // Process no longer ready, exit - } + // Check regardless of process state p.checkRPCHealth() } } @@ -960,13 +959,6 @@ func (p *Process) checkRPCHealth() { } } -func (p *Process) stopRPCHealthChecker() { - if p.rpcHealthCancel != nil { - p.rpcHealthCancel() - p.rpcHealthCancel = nil - } -} - // IsRPCHealthy returns true if RPC health checking is disabled or all endpoints healthy func (p *Process) IsRPCHealthy() bool { if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 { diff --git a/proxy/process_rpc_health_test.go b/proxy/process_rpc_health_test.go new file mode 100644 index 00000000..cb9d1d25 --- /dev/null +++ b/proxy/process_rpc_health_test.go @@ -0,0 +1,84 @@ +package proxy + +import ( + "context" + "io" + "testing" + + "github.com/mostlygeek/llama-swap/proxy/config" + "github.com/stretchr/testify/assert" +) + +func TestProcess_RPCHealthIndependentOfState(t *testing.T) { + testLogger := NewLogMonitorWriter(io.Discard) + proxyLogger := NewLogMonitorWriter(io.Discard) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + modelConfig := config.ModelConfig{ + Cmd: "llama-server --rpc 127.0.0.1:50051", + Proxy: "http://localhost:8080", + RPCHealthCheck: true, + } + + process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx) + + // Verify endpoints were parsed + assert.NotEmpty(t, process.rpcEndpoints, "RPC endpoints should be parsed from cmd") + assert.Equal(t, []string{"127.0.0.1:50051"}, process.rpcEndpoints) + + // Initially should be unhealthy (false) until first check + assert.False(t, process.rpcHealthy.Load(), "RPC health should start as false") + + // Health checker should be running regardless of process state + assert.NotNil(t, process.rpcHealthTicker, "Health checker ticker should be running") + assert.NotNil(t, process.rpcHealthCancel, "Health checker should have cancel func") + + // Process state should not affect health checking + assert.Equal(t, StateStopped, process.CurrentState(), "Process should be in stopped state") + + // Health check runs independently - simulate RPC becoming healthy + process.rpcHealthy.Store(true) + assert.True(t, process.IsRPCHealthy(), "Process should report healthy regardless of state") +} + +func TestProcess_RPCHealthCheckDisabled(t *testing.T) { + testLogger := NewLogMonitorWriter(io.Discard) + proxyLogger := NewLogMonitorWriter(io.Discard) + ctx := context.Background() + + modelConfig := config.ModelConfig{ + Cmd: "llama-server --rpc 127.0.0.1:50051", + Proxy: "http://localhost:8080", + RPCHealthCheck: false, // Disabled + } + + process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx) + + // Should always return healthy when disabled + assert.True(t, process.IsRPCHealthy(), "Should return true when RPC health check is disabled") +} + +func TestProcess_RPCHealthCheckNoEndpoints(t *testing.T) { + testLogger := NewLogMonitorWriter(io.Discard) + proxyLogger := NewLogMonitorWriter(io.Discard) + ctx := context.Background() + + modelConfig := config.ModelConfig{ + Cmd: "llama-server --port 8080", // No --rpc flag + Proxy: "http://localhost:8080", + RPCHealthCheck: true, // Enabled but no endpoints + } + + process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx) + + // Should have no endpoints + assert.Empty(t, process.rpcEndpoints, "Should have no RPC endpoints when --rpc flag is missing") + + // Should return healthy when no endpoints configured (treat as not using RPC) + assert.True(t, process.IsRPCHealthy(), "Should return true when no RPC endpoints found") + + // Health checker should NOT start when no endpoints + assert.Nil(t, process.rpcHealthTicker, "Health checker should not run without endpoints") + assert.Nil(t, process.rpcHealthCancel, "Health checker cancel should be nil") +} From 4987dafa545bc35b8cce8afc481ca245c367b41b Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 30 Jan 2026 23:45:19 +0000 Subject: [PATCH 08/13] WIP: web config changes Work in progress on web configuration feature. Co-Authored-By: Claude Sonnet 4.5 --- config_embed.go | 13 +++ llama-swap.go | 2 + proxy/proxymanager.go | 9 ++ proxy/proxymanager_api.go | 9 +- ui/package-lock.json | 178 +++++++++++++++++++++++++++++++++++--- ui/package.json | 5 ++ 6 files changed, 199 insertions(+), 17 deletions(-) create mode 100644 config_embed.go diff --git a/config_embed.go b/config_embed.go new file mode 100644 index 00000000..b158e944 --- /dev/null +++ b/config_embed.go @@ -0,0 +1,13 @@ +package main + +import ( + _ "embed" +) + +//go:embed config.example.yaml +var configExampleYAML []byte + +// GetConfigExampleYAML returns the embedded example config file +func GetConfigExampleYAML() []byte { + return configExampleYAML +} diff --git a/llama-swap.go b/llama-swap.go index 60ccbc73..1c68a25c 100644 --- a/llama-swap.go +++ b/llama-swap.go @@ -98,6 +98,7 @@ func main() { newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) newPM.SetConfigPath(*configPath) + newPM.SetConfigExample(GetConfigExampleYAML()) srv.Handler = newPM fmt.Println("Configuration Reloaded") @@ -116,6 +117,7 @@ func main() { newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) newPM.SetConfigPath(*configPath) + newPM.SetConfigExample(GetConfigExampleYAML()) srv.Handler = newPM } } diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index bf227120..c33c9f96 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -55,6 +55,9 @@ type ProxyManager struct { // config file path for editing configPath string + // embedded example config + configExample []byte + // peer proxy see: #296, #433 peerProxy *PeerProxy } @@ -975,3 +978,9 @@ func (pm *ProxyManager) SetConfigPath(configPath string) { defer pm.Unlock() pm.configPath = configPath } + +func (pm *ProxyManager) SetConfigExample(configExample []byte) { + pm.Lock() + defer pm.Unlock() + pm.configExample = configExample +} diff --git a/proxy/proxymanager_api.go b/proxy/proxymanager_api.go index f590cd6e..05058193 100644 --- a/proxy/proxymanager_api.go +++ b/proxy/proxymanager_api.go @@ -276,9 +276,12 @@ func (pm *ProxyManager) apiGetCurrentConfig(c *gin.Context) { } func (pm *ProxyManager) apiGetExampleConfig(c *gin.Context) { - data, err := os.ReadFile("config.example.yaml") - if err != nil { - pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to read example config: %v", err)) + pm.Lock() + data := pm.configExample + pm.Unlock() + + if data == nil { + pm.sendErrorResponse(c, http.StatusInternalServerError, "Example config not available") return } diff --git a/ui/package-lock.json b/ui/package-lock.json index c88133e7..097d3bcc 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -8,6 +8,10 @@ "name": "ui", "version": "0.0.0", "dependencies": { + "@codemirror/lang-yaml": "^6.1.1", + "@codemirror/state": "^6.4.1", + "codemirror": "^6.0.1", + "js-yaml": "^4.1.0", "react": "^19.1.0", "react-dom": "^19.1.0", "react-icons": "^5.5.0", @@ -17,6 +21,7 @@ "devDependencies": { "@eslint/js": "^9.25.0", "@tailwindcss/vite": "^4.1.8", + "@types/js-yaml": "^4.0.9", "@types/react": "^19.1.2", "@types/react-dom": "^19.1.2", "@vitejs/plugin-react": "^4.4.1", @@ -75,7 +80,6 @@ "integrity": "sha512-bXYxrXFubeYdvB0NhD/NBB3Qi6aZeV20GOWVI47t2dkecCEoneR4NPVcb7abpXDEvejgrUfFtG6vG/zxAKmg+g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.27.1", @@ -327,6 +331,94 @@ "node": ">=6.9.0" } }, + "node_modules/@codemirror/autocomplete": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/@codemirror/autocomplete/-/autocomplete-6.20.0.tgz", + "integrity": "sha512-bOwvTOIJcG5FVo5gUUupiwYh8MioPLQ4UcqbcRf7UQ98X90tCa9E1kZ3Z7tqwpZxYyOvh1YTYbmZE9RTfTp5hg==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.17.0", + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@codemirror/commands": { + "version": "6.10.1", + "resolved": "https://registry.npmjs.org/@codemirror/commands/-/commands-6.10.1.tgz", + "integrity": "sha512-uWDWFypNdQmz2y1LaNJzK7fL7TYKLeUAU0npEC685OKTF3KcQ2Vu3klIM78D7I6wGhktme0lh3CuQLv0ZCrD9Q==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.4.0", + "@codemirror/view": "^6.27.0", + "@lezer/common": "^1.1.0" + } + }, + "node_modules/@codemirror/lang-yaml": { + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/@codemirror/lang-yaml/-/lang-yaml-6.1.2.tgz", + "integrity": "sha512-dxrfG8w5Ce/QbT7YID7mWZFKhdhsaTNOYjOkSIMt1qmC4VQnXSDSYVHHHn8k6kJUfIhtLo8t1JJgltlxWdsITw==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.2.0", + "@lezer/lr": "^1.0.0", + "@lezer/yaml": "^1.0.0" + } + }, + "node_modules/@codemirror/language": { + "version": "6.12.1", + "resolved": "https://registry.npmjs.org/@codemirror/language/-/language-6.12.1.tgz", + "integrity": "sha512-Fa6xkSiuGKc8XC8Cn96T+TQHYj4ZZ7RdFmXA3i9xe/3hLHfwPZdM+dqfX0Cp0zQklBKhVD8Yzc8LS45rkqcwpQ==", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.23.0", + "@lezer/common": "^1.5.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0", + "style-mod": "^4.0.0" + } + }, + "node_modules/@codemirror/lint": { + "version": "6.9.3", + "resolved": "https://registry.npmjs.org/@codemirror/lint/-/lint-6.9.3.tgz", + "integrity": "sha512-y3YkYhdnhjDBAe0VIA0c4wVoFOvnp8CnAvfLqi0TqotIv92wIlAAP7HELOpLBsKwjAX6W92rSflA6an/2zBvXw==", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.35.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/search": { + "version": "6.6.0", + "resolved": "https://registry.npmjs.org/@codemirror/search/-/search-6.6.0.tgz", + "integrity": "sha512-koFuNXcDvyyotWcgOnZGmY7LZqEOXZaaxD/j6n18TCLx2/9HieZJ5H6hs1g8FiRxBD0DNfs0nXn17g872RmYdw==", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.37.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/state": { + "version": "6.5.4", + "resolved": "https://registry.npmjs.org/@codemirror/state/-/state-6.5.4.tgz", + "integrity": "sha512-8y7xqG/hpB53l25CIoit9/ngxdfoG+fx+V3SHBrinnhOtLvKHRyAJJuHzkWrR4YXXLX8eXBsejgAAxHUOdW1yw==", + "dependencies": { + "@marijn/find-cluster-break": "^1.0.0" + } + }, + "node_modules/@codemirror/view": { + "version": "6.39.12", + "resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.39.12.tgz", + "integrity": "sha512-f+/VsHVn/kOA9lltk/GFzuYwVVAKmOnNjxbrhkk3tPHntFqjWeI2TbIXx006YkBkqC10wZ4NsnWXCQiFPeAISQ==", + "dependencies": { + "@codemirror/state": "^6.5.0", + "crelt": "^1.0.6", + "style-mod": "^4.1.0", + "w3c-keyname": "^2.2.4" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.25.5", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz", @@ -1041,6 +1133,42 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@lezer/common": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.5.0.tgz", + "integrity": "sha512-PNGcolp9hr4PJdXR4ix7XtixDrClScvtSCYW3rQG106oVMOOI+jFb+0+J3mbeL/53g1Zd6s0kJzaw6Ri68GmAA==" + }, + "node_modules/@lezer/highlight": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.3.tgz", + "integrity": "sha512-qXdH7UqTvGfdVBINrgKhDsVTJTxactNNxLk7+UMwZhU13lMHaOBlJe9Vqp907ya56Y3+ed2tlqzys7jDkTmW0g==", + "dependencies": { + "@lezer/common": "^1.3.0" + } + }, + "node_modules/@lezer/lr": { + "version": "1.4.8", + "resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.8.tgz", + "integrity": "sha512-bPWa0Pgx69ylNlMlPvBPryqeLYQjyJjqPx+Aupm5zydLIF3NE+6MMLT8Yi23Bd9cif9VS00aUebn+6fDIGBcDA==", + "dependencies": { + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@lezer/yaml": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@lezer/yaml/-/yaml-1.0.4.tgz", + "integrity": "sha512-2lrrHqxalACEbxIbsjhqGpSW8kWpUKuY6RHgnSAFZa6qK62wvnPxA8hGOwOoDbwHcOFs5M4o27mjGu+P7TvBmw==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.4.0" + } + }, + "node_modules/@marijn/find-cluster-break": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@marijn/find-cluster-break/-/find-cluster-break-1.0.2.tgz", + "integrity": "sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==" + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -1755,6 +1883,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/js-yaml": { + "version": "4.0.9", + "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz", + "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==", + "dev": true + }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -1768,7 +1902,6 @@ "integrity": "sha512-JeG0rEWak0N6Itr6QUx+X60uQmN+5t3j9r/OVDtWzFXKaj6kD1BwJzOksD0FF6iWxZlbE1kB0q9vtnU2ekqa1Q==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -1829,7 +1962,6 @@ "integrity": "sha512-qwxv6dq682yVvgKKp2qWwLgRbscDAYktPptK4JPojCwwi3R9cwrvIxS4lvBpzmcqzR4bdn54Z0IG1uHFskW4dA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.33.1", "@typescript-eslint/types": "8.33.1", @@ -2081,7 +2213,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2136,7 +2267,6 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true, "license": "Python-2.0" }, "node_modules/balanced-match": { @@ -2190,7 +2320,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001718", "electron-to-chromium": "^1.5.160", @@ -2262,6 +2391,20 @@ "node": ">=18" } }, + "node_modules/codemirror": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/codemirror/-/codemirror-6.0.2.tgz", + "integrity": "sha512-VhydHotNW5w1UGK0Qj96BwSk/Zqbp9WbnyK2W/eVMv4QyF41INRGpjUhFJY7/uDNuudSc33a/PKr4iDqRduvHw==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/commands": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/lint": "^6.0.0", + "@codemirror/search": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.0.0" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -2309,6 +2452,11 @@ "url": "https://opencollective.com/express" } }, + "node_modules/crelt": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/crelt/-/crelt-1.0.6.tgz", + "integrity": "sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==" + }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", @@ -2457,7 +2605,6 @@ "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -2927,7 +3074,6 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", - "dev": true, "license": "MIT", "dependencies": { "argparse": "^2.0.1" @@ -3567,7 +3713,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz", "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -3577,7 +3722,6 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz", "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==", "license": "MIT", - "peer": true, "dependencies": { "scheduler": "^0.26.0" }, @@ -3812,6 +3956,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/style-mod": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/style-mod/-/style-mod-4.1.3.tgz", + "integrity": "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ==" + }, "node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", @@ -3907,7 +4056,6 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -3960,7 +4108,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -4039,7 +4186,6 @@ "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", @@ -4130,7 +4276,6 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -4138,6 +4283,11 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/w3c-keyname": { + "version": "2.2.8", + "resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz", + "integrity": "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==" + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/ui/package.json b/ui/package.json index d8a5e090..07cc38ea 100644 --- a/ui/package.json +++ b/ui/package.json @@ -10,6 +10,10 @@ "preview": "vite preview" }, "dependencies": { + "@codemirror/lang-yaml": "^6.1.1", + "@codemirror/state": "^6.4.1", + "codemirror": "^6.0.1", + "js-yaml": "^4.1.0", "react": "^19.1.0", "react-dom": "^19.1.0", "react-icons": "^5.5.0", @@ -19,6 +23,7 @@ "devDependencies": { "@eslint/js": "^9.25.0", "@tailwindcss/vite": "^4.1.8", + "@types/js-yaml": "^4.0.9", "@types/react": "^19.1.2", "@types/react-dom": "^19.1.2", "@vitejs/plugin-react": "^4.4.1", From e6f9f9a9990cea88228dc67a62e072ef0854da5e Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Sat, 31 Jan 2026 00:27:33 +0000 Subject: [PATCH 09/13] proxy: fix requestTimeout feature to actually terminate requests The requestTimeout feature was not working because the timeout context was not connected to the HTTP request. When the timeout fired, it attempted to kill the process but the reverse proxy continued waiting for a response indefinitely. - Use context.WithTimeout() to create a timeout context for the HTTP request - Clone the request with the timeout context before proxying - When timeout fires, the HTTP request is immediately cancelled - Fix StopImmediately() to handle timeouts during model loading (StateStarting) - Add unit test to verify timeout behavior Before: requests would run for 60+ seconds despite requestTimeout: 20 After: requests terminate in exactly 20 seconds as configured Co-Authored-By: Claude Sonnet 4.5 --- proxy/process.go | 36 ++++++----- proxy/process_timeout_test.go | 109 ++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 15 deletions(-) create mode 100644 proxy/process_timeout_test.go diff --git a/proxy/process.go b/proxy/process.go index 5ada9723..7e311d11 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -381,13 +381,17 @@ func (p *Process) Stop() { // StopImmediately will transition the process to the stopping state and stop the process with a SIGTERM. // If the process does not stop within the specified timeout, it will be forcefully stopped with a SIGKILL. func (p *Process) StopImmediately() { - if !isValidTransition(p.CurrentState(), StateStopping) { + currentState := p.CurrentState() + if !isValidTransition(currentState, StateStopping) { return } - p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState()) - if curState, err := p.swapState(StateReady, StateStopping); err != nil { - p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState) + p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, currentState) + + // Try to transition from current state to StateStopping + // Process might be in StateReady or StateStarting when timeout fires + if _, err := p.swapState(currentState, StateStopping); err != nil { + p.proxyLogger.Infof("<%s> Stop() %s -> StateStopping err: %v", p.ID, currentState, err) return } @@ -502,30 +506,32 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) { // Start timeout monitoring if requestTimeout is configured var timeoutCancel context.CancelFunc + var requestCtx context.Context = r.Context() + if p.config.RequestTimeout > 0 { - timeoutCtx, cancel := context.WithCancel(context.Background()) + timeoutDuration := time.Duration(p.config.RequestTimeout) * time.Second + var cancel context.CancelFunc + requestCtx, cancel = context.WithTimeout(r.Context(), timeoutDuration) timeoutCancel = cancel go func() { - timeoutDuration := time.Duration(p.config.RequestTimeout) * time.Second - timer := time.NewTimer(timeoutDuration) - defer timer.Stop() - - select { - case <-timer.C: + <-requestCtx.Done() + if requestCtx.Err() == context.DeadlineExceeded { p.proxyLogger.Warnf("<%s> Request timeout exceeded (%v), force stopping process to prevent GPU blocking", p.ID, timeoutDuration) // Force stop the process - this will kill the underlying inference process p.StopImmediately() - case <-timeoutCtx.Done(): - // Request completed normally, cancel timeout - return } }() - // Ensure timeout goroutine is cancelled when request completes + // Ensure timeout is cancelled when request completes defer timeoutCancel() } + // Create a new request with the timeout context + if requestCtx != r.Context() { + r = r.Clone(requestCtx) + } + // for #366 // - extract streaming param from request context, should have been set by proxymanager var srw *statusResponseWriter diff --git a/proxy/process_timeout_test.go b/proxy/process_timeout_test.go new file mode 100644 index 00000000..9f048d9e --- /dev/null +++ b/proxy/process_timeout_test.go @@ -0,0 +1,109 @@ +package proxy + +import ( + "fmt" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" + + "github.com/mostlygeek/llama-swap/proxy/config" +) + +// TestProcess_RequestTimeout verifies that requestTimeout actually kills the process +func TestProcess_RequestTimeout(t *testing.T) { + // Create a mock server that simulates a long-running inference + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Logf("Mock server received request") + + // Simulate streaming response that takes 60 seconds + w.Header().Set("Content-Type", "text/event-stream") + w.WriteHeader(http.StatusOK) + + flusher, ok := w.(http.Flusher) + if !ok { + t.Fatal("Expected http.ResponseWriter to be an http.Flusher") + } + + // Stream data for 60 seconds + for i := 0; i < 60; i++ { + select { + case <-r.Context().Done(): + t.Logf("Mock server: client disconnected after %d seconds", i) + return + default: + fmt.Fprintf(w, "data: token %d\n\n", i) + flusher.Flush() + time.Sleep(1 * time.Second) + } + } + t.Logf("Mock server completed full 60 second response") + })) + defer mockServer.Close() + + // Setup process logger - use NewLogMonitor() to avoid race in test + processLogger := NewLogMonitor() + proxyLogger := NewLogMonitor() + + // Create process with 5 second request timeout + cfg := config.ModelConfig{ + Proxy: mockServer.URL, + CheckEndpoint: "none", // skip health check + RequestTimeout: 5, // 5 second timeout + } + + p := NewProcess("test-timeout", 30, cfg, processLogger, proxyLogger) + p.gracefulStopTimeout = 2 * time.Second // shorter for testing + + // Manually set state to ready (skip actual process start) + p.forceState(StateReady) + + // Make a request that should timeout + req := httptest.NewRequest("POST", "/v1/chat/completions", nil) + w := httptest.NewRecorder() + + start := time.Now() + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + p.ProxyRequest(w, req) + }() + + // Wait for either completion or timeout + done := make(chan struct{}) + go func() { + wg.Wait() + close(done) + }() + + select { + case <-done: + elapsed := time.Since(start) + t.Logf("Request completed after %v", elapsed) + + // Request should complete within timeout + gracefulStopTimeout + some buffer + maxExpected := time.Duration(cfg.RequestTimeout+2)*time.Second + 3*time.Second + if elapsed > maxExpected { + t.Errorf("Request took %v, expected less than %v with 5s timeout", elapsed, maxExpected) + } else { + t.Logf("✓ Request was properly terminated by timeout") + } + + case <-time.After(15 * time.Second): + t.Fatalf("Test timed out after 15 seconds - request should have been killed by requestTimeout") + } +} + +// TestProcess_RequestTimeoutWithRealProcess tests with an actual process +func TestProcess_RequestTimeoutWithRealProcess(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test with real process in short mode") + } + + // This test would require a real llama.cpp server or similar + // For now, we can skip it or mock it + t.Skip("Requires real inference server") +} From 0e86bbcb688011bd625f23bf4b1a19f4d3aafae4 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Sat, 31 Jan 2026 00:33:41 +0000 Subject: [PATCH 10/13] docs: add requestTimeout to README features list Add brief mention of requestTimeout feature in the customizable features section of README. Co-Authored-By: Claude Sonnet 4.5 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c2696235..8d372c10 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,8 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and - ✅ API Key support - define keys to restrict access to API endpoints - ✅ Customizable - Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107)) - - Automatic unloading of models after timeout by setting a `ttl` + - Automatic unloading of models after idle timeout by setting a `ttl` + - Request timeout protection with `requestTimeout` to prevent runaway inference - Reliable Docker and Podman support using `cmd` and `cmdStop` together - Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235)) From 79332e309eefcae4434b92458ac9081e82b0f861 Mon Sep 17 00:00:00 2001 From: Overcuriousity Date: Sat, 31 Jan 2026 18:57:32 +0100 Subject: [PATCH 11/13] ui-svelte: improve Config editor dark mode styling Fix editor cleanup and improve dark mode appearance with better colors, contrast, and styling. - Add proper editor disposal in $effect cleanup - Update theme colors for better dark mode visibility - Improve button styling with teal export button - Better text contrast and subtle borders - Refine error message styling Co-Authored-By: Claude Sonnet 4.5 --- ui-svelte/src/routes/Config.svelte | 66 ++++++++++++++++++------------ 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/ui-svelte/src/routes/Config.svelte b/ui-svelte/src/routes/Config.svelte index cc830401..4d62bd38 100644 --- a/ui-svelte/src/routes/Config.svelte +++ b/ui-svelte/src/routes/Config.svelte @@ -31,39 +31,39 @@ function getTheme(dark: boolean, readOnly: boolean) { return EditorView.theme({ - "&": { + "&": { height: "100%", - backgroundColor: dark ? (readOnly ? "#1a1a1a" : "#1f1f1f") : (readOnly ? "#f9fafb" : "#ffffff"), + backgroundColor: dark ? (readOnly ? "#1a1a1a" : "#252525") : (readOnly ? "#f9fafb" : "#ffffff"), }, - ".cm-scroller": { + ".cm-scroller": { overflow: "auto", }, - ".cm-content": { + ".cm-content": { fontFamily: "monospace", - color: dark ? "#e0e0e0" : "#1f2937", + color: dark ? "#d1d5db" : "#1f2937", }, ".cm-gutters": { - backgroundColor: dark ? "#2a2a2a" : "#f3f4f6", + backgroundColor: dark ? (readOnly ? "#151515" : "#1f1f1f") : "#f3f4f6", color: dark ? "#6b7280" : "#9ca3af", border: "none", }, ".cm-activeLineGutter": { - backgroundColor: dark ? "#374151" : "#e5e7eb", + backgroundColor: dark ? "#2d3748" : "#e5e7eb", }, ".cm-activeLine": { - backgroundColor: dark ? "#374151" : "#f3f4f6", + backgroundColor: dark ? "#2d3748" : "#f3f4f6", }, ".cm-selectionBackground, ::selection": { - backgroundColor: dark ? "#3b82f6" : "#bfdbfe", + backgroundColor: dark ? "#2d5a7b" : "#bfdbfe", }, ".cm-cursor": { - borderLeftColor: dark ? "#60a5fa" : "#2563eb", + borderLeftColor: dark ? "#14b8a6" : "#2563eb", }, // YAML syntax colors ".cm-atom": { color: dark ? "#fbbf24" : "#d97706" }, // true/false/null - ".cm-number": { color: dark ? "#a78bfa" : "#7c3aed" }, // numbers - ".cm-string": { color: dark ? "#34d399" : "#059669" }, // strings - ".cm-property": { color: dark ? "#60a5fa" : "#2563eb" }, // keys + ".cm-number": { color: dark ? "#c4b5fd" : "#7c3aed" }, // numbers + ".cm-string": { color: dark ? "#6ee7b7" : "#059669" }, // strings + ".cm-property": { color: dark ? "#7dd3fc" : "#2563eb" }, // keys ".cm-comment": { color: dark ? "#6b7280" : "#9ca3af" }, // comments }, { dark }); } @@ -219,36 +219,50 @@ if (!loading && editorContainer && !editorView && currentConfig) { editorView = createEditor(editorContainer, currentConfig, false); } + + return () => { + if (editorView) { + editorView.destroy(); + editorView = null; + } + }; }); $effect(() => { if (!loading && exampleContainer && !exampleView && exampleConfig) { exampleView = createEditor(exampleContainer, exampleConfig, true); } + + return () => { + if (exampleView) { + exampleView.destroy(); + exampleView = null; + } + }; });
-

Configuration Editor

+

Configuration Editor

{#if validationError} -
+
Validation Error: {validationError}
{/if} {#if error} -
+
{error}
{/if} {#if loading}
-
Loading configuration...
+
Loading configuration...
{:else}
-

Current Config (Editable)

-
Current Config (Editable) +
-

Example Config (Reference)

-
Example Config (Reference) +
From 59db9f0754b65e80bd57e419134748be81500ea2 Mon Sep 17 00:00:00 2001 From: Overcuriousity Date: Sat, 31 Jan 2026 19:50:27 +0100 Subject: [PATCH 12/13] ui-svelte: fix Config editor compartment collision and error handling Fix theme compartment sharing bug and improve error response handling. - create separate Compartment instances for each CodeMirror editor - update createEditor to accept compartment parameter - improve saveConfig error handling to parse both JSON and non-JSON responses - include status code and statusText in error messages --- ui-svelte/src/routes/Config.svelte | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/ui-svelte/src/routes/Config.svelte b/ui-svelte/src/routes/Config.svelte index 4d62bd38..e13e9e79 100644 --- a/ui-svelte/src/routes/Config.svelte +++ b/ui-svelte/src/routes/Config.svelte @@ -19,6 +19,7 @@ let editorView: EditorView | null = null; let exampleView: EditorView | null = null; let themeCompartment = new Compartment(); + let exampleThemeCompartment = new Compartment(); function validateYAML(text: string): string | null { try { @@ -68,7 +69,7 @@ }, { dark }); } - function createEditor(parent: HTMLElement, content: string, readOnly: boolean) { + function createEditor(parent: HTMLElement, content: string, readOnly: boolean, compartment: Compartment) { const state = EditorState.create({ doc: content, extensions: [ @@ -76,7 +77,7 @@ yaml(), EditorView.lineWrapping, EditorView.editable.of(!readOnly), - themeCompartment.of(getTheme($isDarkMode, readOnly)), + compartment.of(getTheme($isDarkMode, readOnly)), EditorView.updateListener.of((update) => { if (!readOnly && update.docChanged) { currentConfig = update.state.doc.toString(); @@ -102,7 +103,7 @@ } if (exampleView) { exampleView.dispatch({ - effects: themeCompartment.reconfigure(getTheme($isDarkMode, true)) + effects: exampleThemeCompartment.reconfigure(getTheme($isDarkMode, true)) }); } }); @@ -155,8 +156,14 @@ }); if (!res.ok) { - const errData = await res.json(); - throw new Error(errData.error || "Failed to save config"); + let errMsg: string; + try { + const errData = await res.json(); + errMsg = errData.error || JSON.stringify(errData); + } catch { + errMsg = await res.text(); + } + throw new Error(`${res.status} ${res.statusText}: ${errMsg || "Failed to save config"}`); } alert("Config saved successfully! Application is reloading..."); @@ -217,7 +224,7 @@ $effect(() => { if (!loading && editorContainer && !editorView && currentConfig) { - editorView = createEditor(editorContainer, currentConfig, false); + editorView = createEditor(editorContainer, currentConfig, false, themeCompartment); } return () => { @@ -230,7 +237,7 @@ $effect(() => { if (!loading && exampleContainer && !exampleView && exampleConfig) { - exampleView = createEditor(exampleContainer, exampleConfig, true); + exampleView = createEditor(exampleContainer, exampleConfig, true, exampleThemeCompartment); } return () => { From 8e62ce1cd84f99acd98da5a8c473b8bb0310929b Mon Sep 17 00:00:00 2001 From: Overcuriousity Date: Sat, 31 Jan 2026 21:43:51 +0100 Subject: [PATCH 13/13] ui-svelte: fix Config editor cursor jumping on input Fix cursor jumping to top after typing by preventing reactive effect from re-running on content changes. Use untrack() to read config state without creating reactive dependency, ensuring editor is only created once and not destroyed/recreated on each keystroke. Co-Authored-By: Claude Sonnet 4.5 --- ui-svelte/src/routes/Config.svelte | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ui-svelte/src/routes/Config.svelte b/ui-svelte/src/routes/Config.svelte index e13e9e79..d3e1c036 100644 --- a/ui-svelte/src/routes/Config.svelte +++ b/ui-svelte/src/routes/Config.svelte @@ -1,5 +1,5 @@