diff --git a/README.md b/README.md index c2696235..40e7182a 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,11 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and - ✅ API Key support - define keys to restrict access to API endpoints - ✅ Customizable - Run multiple models at once with `Groups` ([#107](https://github.com/mostlygeek/llama-swap/issues/107)) - - Automatic unloading of models after timeout by setting a `ttl` + - Automatic unloading of models after idle timeout by setting a `ttl` + - Request timeout protection with `requestTimeout` to prevent runaway inference - Reliable Docker and Podman support using `cmd` and `cmdStop` together - Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235)) + - RPC health checking for distributed inference - conditionally expose models based on RPC server availability ### Web UI @@ -174,6 +176,7 @@ Almost all configuration settings are optional and can be added one step at a ti - `useModelName` to override model names sent to upstream servers - `${PORT}` automatic port variables for dynamic port assignment - `filters` rewrite parts of requests before sending to the upstream server + - `rpcHealthCheck` monitor RPC server health for distributed inference models See the [configuration documentation](docs/configuration.md) for all options. diff --git a/config-schema.json b/config-schema.json index 8baa0cc4..d0bfd2f5 100644 --- a/config-schema.json +++ b/config-schema.json @@ -216,10 +216,21 @@ "type": "boolean", "description": "Overrides the global sendLoadingState for this model. Ommitting this property will use the global setting." }, + "requestTimeout": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "Maximum time in seconds for a single request to complete before forcefully killing the model process. This prevents runaway inference processes from blocking the GPU indefinitely. 0 disables timeout (default). When exceeded, the process is terminated and must be restarted for the next request." + }, "unlisted": { "type": "boolean", "default": false, "description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests." + }, + "rpcHealthCheck": { + "type": "boolean", + "default": false, + "description": "Enable TCP health checks for RPC endpoints specified in cmd. When enabled, parses --rpc host:port[,host:port,...] from cmd and performs health checks every 30 seconds. Models with unhealthy RPC endpoints are filtered from /v1/models and return 503 on inference requests." } } } diff --git a/config.example.yaml b/config.example.yaml index d8282fc1..defb8e55 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -249,6 +249,16 @@ models: # - recommended to be omitted and the default used concurrencyLimit: 0 + # requestTimeout: maximum time in seconds for a single request to complete + # - optional, default: 0 (no timeout) + # - useful for preventing runaway inference processes that never complete + # - when exceeded, the model process is forcefully stopped + # - protects against GPU overheating and blocking from stuck processes + # - the process must be restarted for the next request + # - set to 0 to disable timeout + # - recommended for models that may have infinite loops or excessive generation + requestTimeout: 0 # disabled by default, set to e.g., 300 for 5 minutes + # sendLoadingState: overrides the global sendLoadingState setting for this model # - optional, default: undefined (use global setting) sendLoadingState: false @@ -262,6 +272,24 @@ models: unlisted: true cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0 + # RPC health check example for distributed inference: + "qwen-distributed": + # rpcHealthCheck: enable TCP health checks for RPC endpoints + # - optional, default: false + # - when enabled, parses --rpc host:port[,host:port,...] from cmd + # - performs TCP connectivity checks every 30 seconds + # - model is only listed in /v1/models when ALL RPC endpoints are healthy + # - inference requests to unhealthy models return HTTP 503 + # - useful for distributed inference with llama.cpp's rpc-server + rpcHealthCheck: true + cmd: | + llama-server --port ${PORT} + --rpc 192.168.1.10:50051,192.168.1.11:50051 + -m Qwen2.5-32B-Instruct-Q4_K_M.gguf + -ngl 99 + name: "Qwen 32B (Distributed)" + description: "Large model using distributed RPC inference" + # Docker example: # container runtimes like Docker and Podman can be used reliably with # a combination of cmd, cmdStop, and ${MODEL_ID} diff --git a/config_embed.go b/config_embed.go new file mode 100644 index 00000000..b158e944 --- /dev/null +++ b/config_embed.go @@ -0,0 +1,13 @@ +package main + +import ( + _ "embed" +) + +//go:embed config.example.yaml +var configExampleYAML []byte + +// GetConfigExampleYAML returns the embedded example config file +func GetConfigExampleYAML() []byte { + return configExampleYAML +} diff --git a/docs/configuration.md b/docs/configuration.md index 5aac2706..13b747d2 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -72,16 +72,17 @@ models: llama-swap supports many more features to customize how you want to manage your environment. -| Feature | Description | -| --------- | ---------------------------------------------- | -| `ttl` | automatic unloading of models after a timeout | -| `macros` | reusable snippets to use in configurations | -| `groups` | run multiple models at a time | -| `hooks` | event driven functionality | -| `env` | define environment variables per model | -| `aliases` | serve a model with different names | -| `filters` | modify requests before sending to the upstream | -| `...` | And many more tweaks | +| Feature | Description | +| ----------------- | ------------------------------------------------------- | +| `ttl` | automatic unloading of models after a timeout | +| `macros` | reusable snippets to use in configurations | +| `groups` | run multiple models at a time | +| `hooks` | event driven functionality | +| `env` | define environment variables per model | +| `aliases` | serve a model with different names | +| `filters` | modify requests before sending to the upstream | +| `rpcHealthCheck` | monitor RPC server health for distributed inference | +| `...` | And many more tweaks | ## Full Configuration Example @@ -319,6 +320,16 @@ models: # - recommended to be omitted and the default used concurrencyLimit: 0 + # requestTimeout: maximum time in seconds for a single request to complete + # - optional, default: 0 (no timeout) + # - useful for preventing runaway inference processes that never complete + # - when exceeded, the model process is forcefully stopped + # - protects against GPU overheating and blocking from stuck processes + # - the process must be restarted for the next request + # - set to 0 to disable timeout + # - recommended for models that may have infinite loops or excessive generation + requestTimeout: 300 # 5 minutes + # sendLoadingState: overrides the global sendLoadingState setting for this model # - optional, default: undefined (use global setting) sendLoadingState: false diff --git a/llama-swap.go b/llama-swap.go index 9706e07d..1c68a25c 100644 --- a/llama-swap.go +++ b/llama-swap.go @@ -97,6 +97,8 @@ func main() { currentPM.Shutdown() newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) + newPM.SetConfigPath(*configPath) + newPM.SetConfigExample(GetConfigExampleYAML()) srv.Handler = newPM fmt.Println("Configuration Reloaded") @@ -114,6 +116,8 @@ func main() { } newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) + newPM.SetConfigPath(*configPath) + newPM.SetConfigExample(GetConfigExampleYAML()) srv.Handler = newPM } } @@ -121,13 +125,15 @@ func main() { // load the initial proxy manager reloadProxyManager() debouncedReload := debounce(time.Second, reloadProxyManager) - if *watchConfig { - defer event.On(func(e proxy.ConfigFileChangedEvent) { - if e.ReloadingState == proxy.ReloadingStateStart { - debouncedReload() - } - })() + // Always listen for API-triggered config changes + defer event.On(func(e proxy.ConfigFileChangedEvent) { + if e.ReloadingState == proxy.ReloadingStateStart { + debouncedReload() + } + })() + + if *watchConfig { fmt.Println("Watching Configuration for changes") go func() { absConfigPath, err := filepath.Abs(*configPath) diff --git a/proxy/config/config.go b/proxy/config/config.go index 945a9d4b..786775dc 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -3,6 +3,7 @@ package config import ( "fmt" "io" + "net" "net/url" "os" "regexp" @@ -533,6 +534,63 @@ func SanitizeCommand(cmdStr string) ([]string, error) { return args, nil } +// ParseRPCEndpoints extracts RPC endpoints from command string +// Handles: --rpc host:port,host2:port2 or --rpc=host:port or -rpc host:port +func ParseRPCEndpoints(cmdStr string) ([]string, error) { + args, err := SanitizeCommand(cmdStr) + if err != nil { + return nil, err + } + + var endpoints []string + for i, arg := range args { + if arg == "--rpc" || arg == "-rpc" { + if i+1 < len(args) { + endpoints = parseEndpointList(args[i+1]) + } + } else if strings.HasPrefix(arg, "--rpc=") { + endpoints = parseEndpointList(strings.TrimPrefix(arg, "--rpc=")) + } else if strings.HasPrefix(arg, "-rpc=") { + endpoints = parseEndpointList(strings.TrimPrefix(arg, "-rpc=")) + } + } + + // Validate each endpoint + for _, ep := range endpoints { + if _, _, err := net.SplitHostPort(ep); err != nil { + return nil, fmt.Errorf("invalid RPC endpoint %q: %w", ep, err) + } + } + + return endpoints, nil +} + +func parseEndpointList(s string) []string { + s = strings.TrimSpace(s) + + // Strip surrounding quotes (both single and double) from the whole string + // if they match. This handles cases like: "host:port,host2:port2" + if len(s) >= 2 { + if (s[0] == '\'' && s[len(s)-1] == '\'') || (s[0] == '"' && s[len(s)-1] == '"') { + s = s[1 : len(s)-1] + } + } + + parts := strings.Split(s, ",") + var result []string + for _, p := range parts { + p = strings.TrimSpace(p) + // Strip any remaining leading/trailing quotes from individual parts + // This handles Windows where shlex doesn't handle single quotes and + // may split 'host:port, host2:port' into "'host:port," and "host2:port'" + p = strings.Trim(p, "'\"") + if p != "" { + result = append(result, p) + } + } + return result +} + func StripComments(cmdStr string) string { var cleanedLines []string for _, line := range strings.Split(cmdStr, "\n") { diff --git a/proxy/config/config_test.go b/proxy/config/config_test.go index 49bbdc9f..e949fd1b 100644 --- a/proxy/config/config_test.go +++ b/proxy/config/config_test.go @@ -1373,3 +1373,108 @@ models: }) } + +func TestParseRPCEndpoints_ValidFormats(t *testing.T) { + tests := []struct { + name string + cmd string + expected []string + }{ + { + name: "single endpoint with --rpc", + cmd: "llama-server --rpc localhost:50051 -ngl 99", + expected: []string{"localhost:50051"}, + }, + { + name: "single endpoint with --rpc=", + cmd: "llama-server --rpc=192.168.1.100:50051 -ngl 99", + expected: []string{"192.168.1.100:50051"}, + }, + { + name: "single endpoint with -rpc", + cmd: "llama-server -rpc localhost:50051 -ngl 99", + expected: []string{"localhost:50051"}, + }, + { + name: "single endpoint with -rpc=", + cmd: "llama-server -rpc=localhost:50051 -ngl 99", + expected: []string{"localhost:50051"}, + }, + { + name: "multiple endpoints comma-separated", + cmd: "llama-server --rpc 192.168.1.10:50051,192.168.1.11:50051 -ngl 99", + expected: []string{"192.168.1.10:50051", "192.168.1.11:50051"}, + }, + { + name: "multiple endpoints with spaces trimmed", + cmd: "llama-server --rpc '192.168.1.10:50051, 192.168.1.11:50051' -ngl 99", + expected: []string{"192.168.1.10:50051", "192.168.1.11:50051"}, + }, + { + name: "IPv6 endpoint", + cmd: "llama-server --rpc [::1]:50051 -ngl 99", + expected: []string{"[::1]:50051"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + endpoints, err := ParseRPCEndpoints(tt.cmd) + assert.NoError(t, err) + assert.Equal(t, tt.expected, endpoints) + }) + } +} + +func TestParseRPCEndpoints_NoRPCFlag(t *testing.T) { + cmd := "llama-server -ngl 99 -m model.gguf" + endpoints, err := ParseRPCEndpoints(cmd) + assert.NoError(t, err) + assert.Empty(t, endpoints) +} + +func TestParseRPCEndpoints_InvalidFormats(t *testing.T) { + tests := []struct { + name string + cmd string + wantErr string + }{ + { + name: "missing port", + cmd: "llama-server --rpc localhost -ngl 99", + wantErr: "invalid RPC endpoint", + }, + { + name: "invalid host:port format", + cmd: "llama-server --rpc not-a-valid-endpoint -ngl 99", + wantErr: "invalid RPC endpoint", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := ParseRPCEndpoints(tt.cmd) + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr) + }) + } +} + +func TestParseRPCEndpoints_EmptyEndpointsFiltered(t *testing.T) { + // Empty strings after commas are filtered out + cmd := "llama-server --rpc 'localhost:50051,,' -ngl 99" + endpoints, err := ParseRPCEndpoints(cmd) + assert.NoError(t, err) + assert.Equal(t, []string{"localhost:50051"}, endpoints) +} + +func TestParseRPCEndpoints_MultilineCommand(t *testing.T) { + cmd := `llama-server \ + --rpc localhost:50051 \ + -ngl 99 \ + -m model.gguf` + + endpoints, err := ParseRPCEndpoints(cmd) + assert.NoError(t, err) + assert.Equal(t, []string{"localhost:50051"}, endpoints) +} diff --git a/proxy/config/model_config.go b/proxy/config/model_config.go index 9dc37aea..92bed341 100644 --- a/proxy/config/model_config.go +++ b/proxy/config/model_config.go @@ -36,6 +36,12 @@ type ModelConfig struct { // override global setting SendLoadingState *bool `yaml:"sendLoadingState"` + + // RPC health checking + RPCHealthCheck bool `yaml:"rpcHealthCheck"` + // Maximum time in seconds for a request to complete before killing the process + // 0 means no timeout (default) + RequestTimeout int `yaml:"requestTimeout"` } func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { @@ -53,6 +59,8 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { ConcurrencyLimit: 0, Name: "", Description: "", + RPCHealthCheck: false, + RequestTimeout: 0, } // the default cmdStop to taskkill /f /t /pid ${PID} diff --git a/proxy/process.go b/proxy/process.go index 41427059..a464980e 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -79,18 +79,25 @@ type Process struct { // track the number of failed starts failedStartCount int + + // RPC health checking + rpcEndpoints []string + rpcHealthy atomic.Bool + rpcHealthTicker *time.Ticker + rpcHealthCancel context.CancelFunc + shutdownCtx context.Context // from ProxyManager for graceful shutdown } -func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor) *Process { +func NewProcess(ID string, healthCheckTimeout int, modelConfig config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor, shutdownCtx context.Context) *Process { concurrentLimit := 10 - if config.ConcurrencyLimit > 0 { - concurrentLimit = config.ConcurrencyLimit + if modelConfig.ConcurrencyLimit > 0 { + concurrentLimit = modelConfig.ConcurrencyLimit } // Setup the reverse proxy. - proxyURL, err := url.Parse(config.Proxy) + proxyURL, err := url.Parse(modelConfig.Proxy) if err != nil { - proxyLogger.Errorf("<%s> invalid proxy URL %q: %v", ID, config.Proxy, err) + proxyLogger.Errorf("<%s> invalid proxy URL %q: %v", ID, modelConfig.Proxy, err) } var reverseProxy *httputil.ReverseProxy @@ -105,9 +112,9 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr } } - return &Process{ + p := &Process{ ID: ID, - config: config, + config: modelConfig, cmd: nil, reverseProxy: reverseProxy, cancelUpstream: nil, @@ -124,7 +131,25 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr // stop timeout gracefulStopTimeout: 10 * time.Second, cmdWaitChan: make(chan struct{}), + shutdownCtx: shutdownCtx, + } + + // Parse RPC endpoints if health checking enabled + if modelConfig.RPCHealthCheck { + endpoints, err := config.ParseRPCEndpoints(modelConfig.Cmd) + if err != nil { + proxyLogger.Errorf("<%s> failed to parse RPC endpoints: %v", ID, err) + } else if len(endpoints) == 0 { + proxyLogger.Warnf("<%s> rpcHealthCheck enabled but no --rpc flag found in cmd", ID) + } else { + p.rpcEndpoints = endpoints + p.rpcHealthy.Store(false) // start unhealthy until first check passes + // Start health checker immediately - runs independent of process state + p.startRPCHealthChecker() + } } + + return p } // LogMonitor returns the log monitor associated with the process. @@ -381,13 +406,17 @@ func (p *Process) Stop() { // StopImmediately will transition the process to the stopping state and stop the process with a SIGTERM. // If the process does not stop within the specified timeout, it will be forcefully stopped with a SIGKILL. func (p *Process) StopImmediately() { - if !isValidTransition(p.CurrentState(), StateStopping) { + currentState := p.CurrentState() + if !isValidTransition(currentState, StateStopping) { return } - p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState()) - if curState, err := p.swapState(StateReady, StateStopping); err != nil { - p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState) + p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, currentState) + + // Try to transition from current state to StateStopping + // Process might be in StateReady or StateStarting when timeout fires + if _, err := p.swapState(currentState, StateStopping); err != nil { + p.proxyLogger.Infof("<%s> Stop() %s -> StateStopping err: %v", p.ID, currentState, err) return } @@ -500,6 +529,34 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) { p.inFlightRequests.Done() }() + // Start timeout monitoring if requestTimeout is configured + var timeoutCancel context.CancelFunc + var requestCtx context.Context = r.Context() + + if p.config.RequestTimeout > 0 { + timeoutDuration := time.Duration(p.config.RequestTimeout) * time.Second + var cancel context.CancelFunc + requestCtx, cancel = context.WithTimeout(r.Context(), timeoutDuration) + timeoutCancel = cancel + + go func() { + <-requestCtx.Done() + if requestCtx.Err() == context.DeadlineExceeded { + p.proxyLogger.Warnf("<%s> Request timeout exceeded (%v), force stopping process to prevent GPU blocking", p.ID, timeoutDuration) + // Force stop the process - this will kill the underlying inference process + p.StopImmediately() + } + }() + + // Ensure timeout is cancelled when request completes + defer timeoutCancel() + } + + // Create a new request with the timeout context + if requestCtx != r.Context() { + r = r.Clone(requestCtx) + } + // for #366 // - extract streaming param from request context, should have been set by proxymanager var srw *statusResponseWriter @@ -877,3 +934,67 @@ func (s *statusResponseWriter) Flush() { flusher.Flush() } } + +// startRPCHealthChecker launches background goroutine for RPC health monitoring. +// Runs independently of process state - checks RPC endpoints regardless of whether +// the model is loaded, starting, stopped, etc. +func (p *Process) startRPCHealthChecker() { + if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 { + return + } + + ctx, cancel := context.WithCancel(p.shutdownCtx) + p.rpcHealthCancel = cancel + p.rpcHealthTicker = time.NewTicker(30 * time.Second) + + go func() { + defer p.rpcHealthTicker.Stop() + + // Run initial check immediately + p.checkRPCHealth() + + for { + select { + case <-ctx.Done(): + p.proxyLogger.Debugf("<%s> RPC health checker shutting down", p.ID) + return + case <-p.rpcHealthTicker.C: + // Check regardless of process state + p.checkRPCHealth() + } + } + }() +} + +func (p *Process) checkRPCHealth() { + allHealthy := true + + for _, endpoint := range p.rpcEndpoints { + dialer := net.Dialer{Timeout: 500 * time.Millisecond} + conn, err := dialer.Dial("tcp", endpoint) + if err != nil { + p.proxyLogger.Warnf("<%s> RPC endpoint %s unhealthy: %v", p.ID, endpoint, err) + allHealthy = false + break + } + conn.Close() + } + + wasHealthy := p.rpcHealthy.Load() + p.rpcHealthy.Store(allHealthy) + + // Log state changes + if wasHealthy && !allHealthy { + p.proxyLogger.Infof("<%s> RPC endpoints now UNHEALTHY", p.ID) + } else if !wasHealthy && allHealthy { + p.proxyLogger.Infof("<%s> RPC endpoints now HEALTHY", p.ID) + } +} + +// IsRPCHealthy returns true if RPC health checking is disabled or all endpoints healthy +func (p *Process) IsRPCHealthy() bool { + if !p.config.RPCHealthCheck || len(p.rpcEndpoints) == 0 { + return true // not using RPC health checks + } + return p.rpcHealthy.Load() +} diff --git a/proxy/process_rpc_health_test.go b/proxy/process_rpc_health_test.go new file mode 100644 index 00000000..cb9d1d25 --- /dev/null +++ b/proxy/process_rpc_health_test.go @@ -0,0 +1,84 @@ +package proxy + +import ( + "context" + "io" + "testing" + + "github.com/mostlygeek/llama-swap/proxy/config" + "github.com/stretchr/testify/assert" +) + +func TestProcess_RPCHealthIndependentOfState(t *testing.T) { + testLogger := NewLogMonitorWriter(io.Discard) + proxyLogger := NewLogMonitorWriter(io.Discard) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + modelConfig := config.ModelConfig{ + Cmd: "llama-server --rpc 127.0.0.1:50051", + Proxy: "http://localhost:8080", + RPCHealthCheck: true, + } + + process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx) + + // Verify endpoints were parsed + assert.NotEmpty(t, process.rpcEndpoints, "RPC endpoints should be parsed from cmd") + assert.Equal(t, []string{"127.0.0.1:50051"}, process.rpcEndpoints) + + // Initially should be unhealthy (false) until first check + assert.False(t, process.rpcHealthy.Load(), "RPC health should start as false") + + // Health checker should be running regardless of process state + assert.NotNil(t, process.rpcHealthTicker, "Health checker ticker should be running") + assert.NotNil(t, process.rpcHealthCancel, "Health checker should have cancel func") + + // Process state should not affect health checking + assert.Equal(t, StateStopped, process.CurrentState(), "Process should be in stopped state") + + // Health check runs independently - simulate RPC becoming healthy + process.rpcHealthy.Store(true) + assert.True(t, process.IsRPCHealthy(), "Process should report healthy regardless of state") +} + +func TestProcess_RPCHealthCheckDisabled(t *testing.T) { + testLogger := NewLogMonitorWriter(io.Discard) + proxyLogger := NewLogMonitorWriter(io.Discard) + ctx := context.Background() + + modelConfig := config.ModelConfig{ + Cmd: "llama-server --rpc 127.0.0.1:50051", + Proxy: "http://localhost:8080", + RPCHealthCheck: false, // Disabled + } + + process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx) + + // Should always return healthy when disabled + assert.True(t, process.IsRPCHealthy(), "Should return true when RPC health check is disabled") +} + +func TestProcess_RPCHealthCheckNoEndpoints(t *testing.T) { + testLogger := NewLogMonitorWriter(io.Discard) + proxyLogger := NewLogMonitorWriter(io.Discard) + ctx := context.Background() + + modelConfig := config.ModelConfig{ + Cmd: "llama-server --port 8080", // No --rpc flag + Proxy: "http://localhost:8080", + RPCHealthCheck: true, // Enabled but no endpoints + } + + process := NewProcess("test-model", 5, modelConfig, testLogger, proxyLogger, ctx) + + // Should have no endpoints + assert.Empty(t, process.rpcEndpoints, "Should have no RPC endpoints when --rpc flag is missing") + + // Should return healthy when no endpoints configured (treat as not using RPC) + assert.True(t, process.IsRPCHealthy(), "Should return true when no RPC endpoints found") + + // Health checker should NOT start when no endpoints + assert.Nil(t, process.rpcHealthTicker, "Health checker should not run without endpoints") + assert.Nil(t, process.rpcHealthCancel, "Health checker cancel should be nil") +} diff --git a/proxy/process_test.go b/proxy/process_test.go index 3881c3dd..87e31d6d 100644 --- a/proxy/process_test.go +++ b/proxy/process_test.go @@ -1,6 +1,7 @@ package proxy import ( + "context" "fmt" "net/http" "net/http/httptest" @@ -33,7 +34,7 @@ func TestProcess_AutomaticallyStartsUpstream(t *testing.T) { config := getTestSimpleResponderConfig(expectedMessage) // Create a process - process := NewProcess("test-process", 5, config, debugLogger, debugLogger) + process := NewProcess("test-process", 5, config, debugLogger, debugLogger, context.Background()) defer process.Stop() req := httptest.NewRequest("GET", "/test", nil) @@ -69,7 +70,7 @@ func TestProcess_WaitOnMultipleStarts(t *testing.T) { expectedMessage := "testing91931" config := getTestSimpleResponderConfig(expectedMessage) - process := NewProcess("test-process", 5, config, debugLogger, debugLogger) + process := NewProcess("test-process", 5, config, debugLogger, debugLogger, context.Background()) defer process.Stop() var wg sync.WaitGroup @@ -97,7 +98,7 @@ func TestProcess_BrokenModelConfig(t *testing.T) { CheckEndpoint: "/health", } - process := NewProcess("broken", 1, config, debugLogger, debugLogger) + process := NewProcess("broken", 1, config, debugLogger, debugLogger, context.Background()) req := httptest.NewRequest("GET", "/", nil) w := httptest.NewRecorder() @@ -122,7 +123,7 @@ func TestProcess_UnloadAfterTTL(t *testing.T) { config.UnloadAfter = 3 // seconds assert.Equal(t, 3, config.UnloadAfter) - process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger) + process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger, context.Background()) defer process.Stop() // this should take 4 seconds @@ -164,7 +165,7 @@ func TestProcess_LowTTLValue(t *testing.T) { config.UnloadAfter = 1 // second assert.Equal(t, 1, config.UnloadAfter) - process := NewProcess("ttl", 2, config, debugLogger, debugLogger) + process := NewProcess("ttl", 2, config, debugLogger, debugLogger, context.Background()) defer process.Stop() for i := 0; i < 100; i++ { @@ -191,7 +192,7 @@ func TestProcess_HTTPRequestsHaveTimeToFinish(t *testing.T) { expectedMessage := "12345" config := getTestSimpleResponderConfig(expectedMessage) - process := NewProcess("t", 10, config, debugLogger, debugLogger) + process := NewProcess("t", 10, config, debugLogger, debugLogger, context.Background()) defer process.Stop() results := map[string]string{ @@ -264,7 +265,7 @@ func TestProcess_SwapState(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - p := NewProcess("test", 10, getTestSimpleResponderConfig("test"), debugLogger, debugLogger) + p := NewProcess("test", 10, getTestSimpleResponderConfig("test"), debugLogger, debugLogger, context.Background()) p.state = test.currentState resultState, err := p.swapState(test.expectedState, test.newState) @@ -297,7 +298,7 @@ func TestProcess_ShutdownInterruptsHealthCheck(t *testing.T) { config.Proxy = "http://localhost:9998/test" healthCheckTTLSeconds := 30 - process := NewProcess("test-process", healthCheckTTLSeconds, config, debugLogger, debugLogger) + process := NewProcess("test-process", healthCheckTTLSeconds, config, debugLogger, debugLogger, context.Background()) // make it a lot faster process.healthCheckLoopInterval = time.Second @@ -332,7 +333,7 @@ func TestProcess_ExitInterruptsHealthCheck(t *testing.T) { CheckEndpoint: "/health", } - process := NewProcess("sleepy", checkHealthTimeout, config, debugLogger, debugLogger) + process := NewProcess("sleepy", checkHealthTimeout, config, debugLogger, debugLogger, context.Background()) process.healthCheckLoopInterval = time.Second // make it faster err := process.start() assert.Equal(t, "upstream command exited prematurely but successfully", err.Error()) @@ -350,7 +351,7 @@ func TestProcess_ConcurrencyLimit(t *testing.T) { // only allow 1 concurrent request at a time config.ConcurrencyLimit = 1 - process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger) + process := NewProcess("ttl_test", 2, config, debugLogger, debugLogger, context.Background()) assert.Equal(t, 1, cap(process.concurrencyLimitSemaphore)) defer process.Stop() @@ -375,7 +376,7 @@ func TestProcess_StopImmediately(t *testing.T) { expectedMessage := "test_stop_immediate" config := getTestSimpleResponderConfig(expectedMessage) - process := NewProcess("stop_immediate", 2, config, debugLogger, debugLogger) + process := NewProcess("stop_immediate", 2, config, debugLogger, debugLogger, context.Background()) defer process.Stop() err := process.start() @@ -415,7 +416,7 @@ func TestProcess_ForceStopWithKill(t *testing.T) { CheckEndpoint: "/health", } - process := NewProcess("stop_immediate", 2, conf, debugLogger, debugLogger) + process := NewProcess("stop_immediate", 2, conf, debugLogger, debugLogger, context.Background()) defer process.Stop() // reduce to make testing go faster @@ -465,7 +466,7 @@ func TestProcess_StopCmd(t *testing.T) { conf.CmdStop = "kill -TERM ${PID}" } - process := NewProcess("testStopCmd", 2, conf, debugLogger, debugLogger) + process := NewProcess("testStopCmd", 2, conf, debugLogger, debugLogger, context.Background()) defer process.Stop() err := process.start() @@ -485,8 +486,8 @@ func TestProcess_EnvironmentSetCorrectly(t *testing.T) { // ensure the additiona variables are appended to the process' environment configWEnv.Env = append(configWEnv.Env, "TEST_ENV1=1", "TEST_ENV2=2") - process1 := NewProcess("env_test", 2, conf, debugLogger, debugLogger) - process2 := NewProcess("env_test", 2, configWEnv, debugLogger, debugLogger) + process1 := NewProcess("env_test", 2, conf, debugLogger, debugLogger, context.Background()) + process2 := NewProcess("env_test", 2, configWEnv, debugLogger, debugLogger, context.Background()) process1.start() defer process1.Stop() @@ -521,7 +522,7 @@ func TestProcess_ReverseProxyPanicIsHandled(t *testing.T) { expectedMessage := "panic_test" config := getTestSimpleResponderConfig(expectedMessage) - process := NewProcess("panic-test", 5, config, debugLogger, debugLogger) + process := NewProcess("panic-test", 5, config, debugLogger, debugLogger, context.Background()) defer process.Stop() // Start the process diff --git a/proxy/process_timeout_test.go b/proxy/process_timeout_test.go new file mode 100644 index 00000000..9f048d9e --- /dev/null +++ b/proxy/process_timeout_test.go @@ -0,0 +1,109 @@ +package proxy + +import ( + "fmt" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" + + "github.com/mostlygeek/llama-swap/proxy/config" +) + +// TestProcess_RequestTimeout verifies that requestTimeout actually kills the process +func TestProcess_RequestTimeout(t *testing.T) { + // Create a mock server that simulates a long-running inference + mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Logf("Mock server received request") + + // Simulate streaming response that takes 60 seconds + w.Header().Set("Content-Type", "text/event-stream") + w.WriteHeader(http.StatusOK) + + flusher, ok := w.(http.Flusher) + if !ok { + t.Fatal("Expected http.ResponseWriter to be an http.Flusher") + } + + // Stream data for 60 seconds + for i := 0; i < 60; i++ { + select { + case <-r.Context().Done(): + t.Logf("Mock server: client disconnected after %d seconds", i) + return + default: + fmt.Fprintf(w, "data: token %d\n\n", i) + flusher.Flush() + time.Sleep(1 * time.Second) + } + } + t.Logf("Mock server completed full 60 second response") + })) + defer mockServer.Close() + + // Setup process logger - use NewLogMonitor() to avoid race in test + processLogger := NewLogMonitor() + proxyLogger := NewLogMonitor() + + // Create process with 5 second request timeout + cfg := config.ModelConfig{ + Proxy: mockServer.URL, + CheckEndpoint: "none", // skip health check + RequestTimeout: 5, // 5 second timeout + } + + p := NewProcess("test-timeout", 30, cfg, processLogger, proxyLogger) + p.gracefulStopTimeout = 2 * time.Second // shorter for testing + + // Manually set state to ready (skip actual process start) + p.forceState(StateReady) + + // Make a request that should timeout + req := httptest.NewRequest("POST", "/v1/chat/completions", nil) + w := httptest.NewRecorder() + + start := time.Now() + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + p.ProxyRequest(w, req) + }() + + // Wait for either completion or timeout + done := make(chan struct{}) + go func() { + wg.Wait() + close(done) + }() + + select { + case <-done: + elapsed := time.Since(start) + t.Logf("Request completed after %v", elapsed) + + // Request should complete within timeout + gracefulStopTimeout + some buffer + maxExpected := time.Duration(cfg.RequestTimeout+2)*time.Second + 3*time.Second + if elapsed > maxExpected { + t.Errorf("Request took %v, expected less than %v with 5s timeout", elapsed, maxExpected) + } else { + t.Logf("✓ Request was properly terminated by timeout") + } + + case <-time.After(15 * time.Second): + t.Fatalf("Test timed out after 15 seconds - request should have been killed by requestTimeout") + } +} + +// TestProcess_RequestTimeoutWithRealProcess tests with an actual process +func TestProcess_RequestTimeoutWithRealProcess(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test with real process in short mode") + } + + // This test would require a real llama.cpp server or similar + // For now, we can skip it or mock it + t.Skip("Requires real inference server") +} diff --git a/proxy/processgroup.go b/proxy/processgroup.go index b401d8a6..c920f302 100644 --- a/proxy/processgroup.go +++ b/proxy/processgroup.go @@ -1,6 +1,7 @@ package proxy import ( + "context" "fmt" "net/http" "slices" @@ -24,9 +25,11 @@ type ProcessGroup struct { // map of current processes processes map[string]*Process lastUsedProcess string + + shutdownCtx context.Context } -func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup { +func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor, shutdownCtx context.Context) *ProcessGroup { groupConfig, ok := config.Groups[id] if !ok { panic("Unable to find configuration for group id: " + id) @@ -41,13 +44,14 @@ func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, u proxyLogger: proxyLogger, upstreamLogger: upstreamLogger, processes: make(map[string]*Process), + shutdownCtx: shutdownCtx, } // Create a Process for each member in the group for _, modelID := range groupConfig.Members { modelConfig, modelID, _ := pg.config.FindConfig(modelID) processLogger := NewLogMonitorWriter(upstreamLogger) - process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger) + process := NewProcess(modelID, pg.config.HealthCheckTimeout, modelConfig, processLogger, pg.proxyLogger, shutdownCtx) pg.processes[modelID] = process } diff --git a/proxy/processgroup_test.go b/proxy/processgroup_test.go index 6b90f443..55e5276a 100644 --- a/proxy/processgroup_test.go +++ b/proxy/processgroup_test.go @@ -2,6 +2,7 @@ package proxy import ( "bytes" + "context" "net/http" "net/http/httptest" "sync" @@ -35,12 +36,12 @@ var processGroupTestConfig = config.AddDefaultGroupToConfig(config.Config{ }) func TestProcessGroup_DefaultHasCorrectModel(t *testing.T) { - pg := NewProcessGroup(config.DEFAULT_GROUP_ID, processGroupTestConfig, testLogger, testLogger) + pg := NewProcessGroup(config.DEFAULT_GROUP_ID, processGroupTestConfig, testLogger, testLogger, context.Background()) assert.True(t, pg.HasMember("model5")) } func TestProcessGroup_HasMember(t *testing.T) { - pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger) + pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger, context.Background()) assert.True(t, pg.HasMember("model1")) assert.True(t, pg.HasMember("model2")) assert.False(t, pg.HasMember("model3")) @@ -74,7 +75,7 @@ func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) { }, }) - pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger) + pg := NewProcessGroup("G1", processGroupTestConfig, testLogger, testLogger, context.Background()) defer pg.StopProcesses(StopWaitForInflightRequest) tests := []string{"model1", "model2", "model3", "model4", "model5"} @@ -96,7 +97,7 @@ func TestProcessGroup_ProxyRequestSwapIsTrueParallel(t *testing.T) { } func TestProcessGroup_ProxyRequestSwapIsFalse(t *testing.T) { - pg := NewProcessGroup("G2", processGroupTestConfig, testLogger, testLogger) + pg := NewProcessGroup("G2", processGroupTestConfig, testLogger, testLogger, context.Background()) defer pg.StopProcesses(StopWaitForInflightRequest) tests := []string{"model3", "model4"} diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 5a016bc5..3bb33b73 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -52,6 +52,12 @@ type ProxyManager struct { commit string version string + // config file path for editing + configPath string + + // embedded example config + configExample []byte + // peer proxy see: #296, #433 peerProxy *PeerProxy } @@ -167,7 +173,7 @@ func New(proxyConfig config.Config) *ProxyManager { // create the process groups for groupID := range proxyConfig.Groups { - processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger) + processGroup := NewProcessGroup(groupID, proxyConfig, proxyLogger, upstreamLogger, shutdownCtx) pm.processGroups[groupID] = processGroup } @@ -475,6 +481,16 @@ func (pm *ProxyManager) listModelsHandler(c *gin.Context) { continue } + // Filter models with unhealthy RPC endpoints + if processGroup := pm.findGroupByModelName(id); processGroup != nil { + if process, ok := processGroup.GetMember(id); ok { + if !process.IsRPCHealthy() { + pm.proxyLogger.Debugf("<%s> filtered from /v1/models (unhealthy RPC)", id) + continue + } + } + } + data = append(data, newRecord(id, modelConfig)) // Include aliases @@ -627,6 +643,15 @@ func (pm *ProxyManager) proxyInferenceHandler(c *gin.Context) { return } + // Check RPC health before processing request + if process, ok := processGroup.GetMember(modelID); ok { + if !process.IsRPCHealthy() { + pm.sendErrorResponse(c, http.StatusServiceUnavailable, + fmt.Sprintf("model %s unavailable (RPC endpoints unhealthy)", modelID)) + return + } + } + // issue #69 allow custom model names to be sent to upstream useModelName := pm.config.Models[modelID].UseModelName if useModelName != "" { @@ -966,3 +991,15 @@ func (pm *ProxyManager) SetVersion(buildDate string, commit string, version stri pm.commit = commit pm.version = version } + +func (pm *ProxyManager) SetConfigPath(configPath string) { + pm.Lock() + defer pm.Unlock() + pm.configPath = configPath +} + +func (pm *ProxyManager) SetConfigExample(configExample []byte) { + pm.Lock() + defer pm.Unlock() + pm.configExample = configExample +} diff --git a/proxy/proxymanager_api.go b/proxy/proxymanager_api.go index fe4326d0..05058193 100644 --- a/proxy/proxymanager_api.go +++ b/proxy/proxymanager_api.go @@ -4,7 +4,9 @@ import ( "context" "encoding/json" "fmt" + "io" "net/http" + "os" "sort" "strings" @@ -31,6 +33,9 @@ func addApiHandlers(pm *ProxyManager) { apiGroup.GET("/events", pm.apiSendEvents) apiGroup.GET("/metrics", pm.apiGetMetrics) apiGroup.GET("/version", pm.apiGetVersion) + apiGroup.GET("/config/current", pm.apiGetCurrentConfig) + apiGroup.GET("/config/example", pm.apiGetExampleConfig) + apiGroup.POST("/config", pm.apiUpdateConfig) } } @@ -250,3 +255,65 @@ func (pm *ProxyManager) apiGetVersion(c *gin.Context) { "build_date": pm.buildDate, }) } + +func (pm *ProxyManager) apiGetCurrentConfig(c *gin.Context) { + pm.Lock() + configPath := pm.configPath + pm.Unlock() + + if configPath == "" { + pm.sendErrorResponse(c, http.StatusNotFound, "Config file path not set") + return + } + + data, err := os.ReadFile(configPath) + if err != nil { + pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to read config file: %v", err)) + return + } + + c.Data(http.StatusOK, "text/yaml; charset=utf-8", data) +} + +func (pm *ProxyManager) apiGetExampleConfig(c *gin.Context) { + pm.Lock() + data := pm.configExample + pm.Unlock() + + if data == nil { + pm.sendErrorResponse(c, http.StatusInternalServerError, "Example config not available") + return + } + + c.Data(http.StatusOK, "text/yaml; charset=utf-8", data) +} + +func (pm *ProxyManager) apiUpdateConfig(c *gin.Context) { + pm.Lock() + configPath := pm.configPath + pm.Unlock() + + if configPath == "" { + pm.sendErrorResponse(c, http.StatusBadRequest, "Config file path not set") + return + } + + body, err := io.ReadAll(c.Request.Body) + if err != nil { + pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("Failed to read request body: %v", err)) + return + } + + // Write to config file + if err := os.WriteFile(configPath, body, 0644); err != nil { + pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("Failed to write config file: %v", err)) + return + } + + // Trigger config reload event + event.Emit(ConfigFileChangedEvent{ + ReloadingState: ReloadingStateStart, + }) + + c.JSON(http.StatusOK, gin.H{"message": "Config updated successfully. Reloading..."}) +} diff --git a/test-config.yaml b/test-config.yaml new file mode 100644 index 00000000..15fd5784 --- /dev/null +++ b/test-config.yaml @@ -0,0 +1,264 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mostlygeek/llama-swap/refs/heads/main/config-schema.json +# +# llama-swap configuration for 16GB VRAM AMD Radeon RX 6800 XT (gfx1030) +# Optimized for headless system with no display overhead +# ------------------------------------- + +healthCheckTimeout: 300 +logLevel: info +logTimeFormat: "rfc3339" +logToStdout: "proxy" +metricsMaxInMemory: 1000 +startPort: 10001 +sendLoadingState: false +includeAliasesInList: false + +macros: + "latest-llama": > + /home/svc-gpgpu/.local/bin/llama-server + --port ${PORT} --host 0.0.0.0 -b 512 -ub 32 -np 1 + "default_ctx": 4096 + "rocm_device": "0" + +models: + # ======================================== + # GENERAL PURPOSE MODELS + # ======================================== + + "qwen3:14b-q5_k_m-32768": + cmd: | + ${latest-llama} + -hf Qwen/Qwen3-14B-GGUF:q5_k_m + --ctx-size 32768 + -fa auto + -ctv q8_0 + -ctk q8_0 + -ngl 99 + --jinja + --mmap + -b 512 + name: "qwen3:14b-q5_k_m-32768" + description: "VRAM: 12505 MiB" + ttl: 600 + + "qwen3:8b-q5_k_m-40960": + cmd: | + ${latest-llama} + -hf Qwen/Qwen3-8B-GGUF:q5_k_m + --ctx-size 40960 + -fa auto + -ctv q8_0 + -ctk q8_0 + -ngl 99 + --jinja + --mmap + -b 512 + name: "qwen3:8b-q5_k_m-40960" + description: "VRAM: 8491 MiB" + ttl: 600 + + "qwen3:8b-q8_0-32768": + cmd: | + ${latest-llama} + -hf Qwen/Qwen3-8B-GGUF:q8_0 + --ctx-size 32768 + -fa auto + -ctv q8_0 + -ctk q8_0 + -ngl 99 + --jinja + --mmap + -b 512 + name: "qwen3:8b-q8_0-32768" + description: "VRAM: 10381 MiB" + ttl: 600 + + "ministral-3:14b-instruct-q5_k_m-20480-vision": + cmd: | + ${latest-llama} + -hf mistralai/Ministral-3-14B-Instruct-2512-GGUF:q5_k_m + --ctx-size 20480 + -fa off + -ngl 99 + --mmap + --jinja + --mmproj-auto + name: "ministral-3:14b-instruct-q5_k_m-20480-vision" + description: "VRAM: 13184 MiB" + ttl: 600 + + "ministral-3:14b-reasoning-q5_k_m-20480-vision": + cmd: | + ${latest-llama} + -hf mistralai/Ministral-3-14B-Reasoning-2512-GGUF:q5_k_m + --ctx-size 20480 + -fa off + -ngl 99 + --mmap + --jinja + --mmproj-auto + name: "ministral-3:14b-reasoning-q5_k_m-20480-vision" + description: "VRAM: 13184 MiB" + ttl: 600 + + "ministral-3:14b-instruct-q5_k_m-32768": + cmd: | + ${latest-llama} + -hf mistralai/Ministral-3-14B-Instruct-2512-GGUF:q5_k_m + --ctx-size 32768 + -fa off + -ngl 99 + --mmap + --jinja + --no-mmproj + name: "ministral-3:14b-instruct-q5_k_m-32768" + description: "VRAM: 14224 MiB" + ttl: 600 + + "ministral-3:14b-reasoning-q5_k_m-32768": + cmd: | + ${latest-llama} + -hf mistralai/Ministral-3-14B-Reasoning-2512-GGUF:q5_k_m + --ctx-size 32768 + -fa off + -ngl 99 + --mmap + --jinja + --no-mmproj + name: "ministral-3:14b-reasoning-q5_k_m-32768" + description: "VRAM: 14224 MiB" + ttl: 600 + + # ======================================== + # UTILITY MODELS (General Purpose) + # ======================================== + + "embeddinggemma:300m": + cmd: | + ${latest-llama} + -hf gaianet/embeddinggemma-300m-GGUF + --ctx-size 2048 + -fa off + -ngl 99 + --embeddings + --pooling mean + -b 1024 + -ub 1024 + name: "embeddinggemma:300m" + description: "VRAM: 512 MiB" + ttl: 3600 + + "bge-reranker-v2-m3": + cmd: | + ${latest-llama} + -hf Felladrin/bge-reranker-v2-m3-Q8_0-GGUF + --ctx-size 8192 + -ngl 99 + --mmap + --rerank + --embedding + --pooling rank + -b 8192 + -ub 8192 + name: "bge-reranker-v2-m3" + description: "VRAM: 1077 MiB" + ttl: 3600 + + # ======================================== + # CODING MODELS + # ======================================== + + "qwen2.5-coder:14b-q5_k_m-32768": + cmd: | + ${latest-llama} + -hf Qwen/Qwen2.5-Coder-14B-Instruct-GGUF:q5_k_m + --ctx-size 32768 + -fa auto + -ctv q8_0 + -ctk q8_0 + -ngl 99 + --jinja + --mmap + -b 512 + name: "qwen2.5-coder:14b-q5_k_m-32768" + description: "VRAM: ~12500 MiB" + ttl: 600 + + "qwen2.5-coder:1.5b-q4_k_m-autocomplete": + cmd: | + ${latest-llama} + -hf Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF:q4_k_m + --ctx-size 2048 + -fa off + -ngl 99 + -b 128 + -ub 32 + --mmap + --no-warmup + name: "qwen2.5-coder:1.5b-q4_k_m-autocomplete" + description: "VRAM: ~1000 MiB" + ttl: 3600 + + # ======================================== + # PERSISTENT CPU MODEL + # ======================================== + + "qwen3:1.7b-cpu-json": + cmd: | + ${latest-llama} + -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_M + --ctx-size 8192 + -fa off + -ngl 0 + -b 512 + --jinja + --mmap + name: "qwen3:1.7b-cpu-json" + description: "CPU-only - permanent RAM resident for tags/titles/queries" + ttl: 0 + +# ======================================== +# GROUPS CONFIGURATION +# ======================================== + +groups: + # General purpose models can coexist with utility models + # When loaded, they prevent coding group from running + "general-purpose": + swap: false # All models in group can run simultaneously + exclusive: true # Unloads other exclusive groups when active + members: + - "qwen3:14b-q5_k_m-32768" + - "qwen3:8b-q5_k_m-40960" + - "qwen3:8b-q8_0-32768" + - "ministral-3:14b-instruct-q5_k_m-20480-vision" + - "ministral-3:14b-reasoning-q5_k_m-20480-vision" + - "ministral-3:14b-instruct-q5_k_m-32768" + - "ministral-3:14b-reasoning-q5_k_m-32768" + - "bge-reranker-v2-m3" + + # Coding models can coexist with each other + # When loaded, they prevent general-purpose group from running + "coding": + swap: false # Both coder models can run simultaneously + exclusive: true # Unloads other exclusive groups when active + members: + - "qwen2.5-coder:14b-q5_k_m-32768" + - "qwen2.5-coder:1.5b-q4_k_m-autocomplete" + + # CPU-based persistent model - never unloaded, doesn't interfere + "persistent-cpu": + swap: false # No swapping (only one model anyway) + exclusive: false # Doesn't unload other groups + persistent: true # Other groups cannot unload this + members: + - "qwen3:1.7b-cpu-json" + +# ======================================== +# STARTUP HOOKS +# ======================================== + +hooks: + on_startup: + preload: + - "qwen3:1.7b-cpu-json" diff --git a/ui-svelte/package-lock.json b/ui-svelte/package-lock.json index 93150075..57723b8e 100644 --- a/ui-svelte/package-lock.json +++ b/ui-svelte/package-lock.json @@ -8,6 +8,12 @@ "name": "ui-svelte", "version": "0.0.0", "dependencies": { + "@codemirror/lang-yaml": "^6.1.2", + "@codemirror/language": "^6.12.1", + "@codemirror/state": "^6.5.4", + "@codemirror/view": "^6.39.12", + "codemirror": "^6.0.2", + "js-yaml": "^4.1.1", "svelte-spa-router": "^4.0.1" }, "devDependencies": { @@ -21,6 +27,102 @@ "vite": "^6.3.5" } }, + "node_modules/@codemirror/autocomplete": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/@codemirror/autocomplete/-/autocomplete-6.20.0.tgz", + "integrity": "sha512-bOwvTOIJcG5FVo5gUUupiwYh8MioPLQ4UcqbcRf7UQ98X90tCa9E1kZ3Z7tqwpZxYyOvh1YTYbmZE9RTfTp5hg==", + "license": "MIT", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.17.0", + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@codemirror/commands": { + "version": "6.10.1", + "resolved": "https://registry.npmjs.org/@codemirror/commands/-/commands-6.10.1.tgz", + "integrity": "sha512-uWDWFypNdQmz2y1LaNJzK7fL7TYKLeUAU0npEC685OKTF3KcQ2Vu3klIM78D7I6wGhktme0lh3CuQLv0ZCrD9Q==", + "license": "MIT", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.4.0", + "@codemirror/view": "^6.27.0", + "@lezer/common": "^1.1.0" + } + }, + "node_modules/@codemirror/lang-yaml": { + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/@codemirror/lang-yaml/-/lang-yaml-6.1.2.tgz", + "integrity": "sha512-dxrfG8w5Ce/QbT7YID7mWZFKhdhsaTNOYjOkSIMt1qmC4VQnXSDSYVHHHn8k6kJUfIhtLo8t1JJgltlxWdsITw==", + "license": "MIT", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.2.0", + "@lezer/lr": "^1.0.0", + "@lezer/yaml": "^1.0.0" + } + }, + "node_modules/@codemirror/language": { + "version": "6.12.1", + "resolved": "https://registry.npmjs.org/@codemirror/language/-/language-6.12.1.tgz", + "integrity": "sha512-Fa6xkSiuGKc8XC8Cn96T+TQHYj4ZZ7RdFmXA3i9xe/3hLHfwPZdM+dqfX0Cp0zQklBKhVD8Yzc8LS45rkqcwpQ==", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.23.0", + "@lezer/common": "^1.5.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0", + "style-mod": "^4.0.0" + } + }, + "node_modules/@codemirror/lint": { + "version": "6.9.3", + "resolved": "https://registry.npmjs.org/@codemirror/lint/-/lint-6.9.3.tgz", + "integrity": "sha512-y3YkYhdnhjDBAe0VIA0c4wVoFOvnp8CnAvfLqi0TqotIv92wIlAAP7HELOpLBsKwjAX6W92rSflA6an/2zBvXw==", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.35.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/search": { + "version": "6.6.0", + "resolved": "https://registry.npmjs.org/@codemirror/search/-/search-6.6.0.tgz", + "integrity": "sha512-koFuNXcDvyyotWcgOnZGmY7LZqEOXZaaxD/j6n18TCLx2/9HieZJ5H6hs1g8FiRxBD0DNfs0nXn17g872RmYdw==", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.37.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/state": { + "version": "6.5.4", + "resolved": "https://registry.npmjs.org/@codemirror/state/-/state-6.5.4.tgz", + "integrity": "sha512-8y7xqG/hpB53l25CIoit9/ngxdfoG+fx+V3SHBrinnhOtLvKHRyAJJuHzkWrR4YXXLX8eXBsejgAAxHUOdW1yw==", + "license": "MIT", + "dependencies": { + "@marijn/find-cluster-break": "^1.0.0" + } + }, + "node_modules/@codemirror/view": { + "version": "6.39.12", + "resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.39.12.tgz", + "integrity": "sha512-f+/VsHVn/kOA9lltk/GFzuYwVVAKmOnNjxbrhkk3tPHntFqjWeI2TbIXx006YkBkqC10wZ4NsnWXCQiFPeAISQ==", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.5.0", + "crelt": "^1.0.6", + "style-mod": "^4.1.0", + "w3c-keyname": "^2.2.4" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.25.12", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz", @@ -513,6 +615,47 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@lezer/common": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.5.0.tgz", + "integrity": "sha512-PNGcolp9hr4PJdXR4ix7XtixDrClScvtSCYW3rQG106oVMOOI+jFb+0+J3mbeL/53g1Zd6s0kJzaw6Ri68GmAA==", + "license": "MIT" + }, + "node_modules/@lezer/highlight": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.3.tgz", + "integrity": "sha512-qXdH7UqTvGfdVBINrgKhDsVTJTxactNNxLk7+UMwZhU13lMHaOBlJe9Vqp907ya56Y3+ed2tlqzys7jDkTmW0g==", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.3.0" + } + }, + "node_modules/@lezer/lr": { + "version": "1.4.8", + "resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.8.tgz", + "integrity": "sha512-bPWa0Pgx69ylNlMlPvBPryqeLYQjyJjqPx+Aupm5zydLIF3NE+6MMLT8Yi23Bd9cif9VS00aUebn+6fDIGBcDA==", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@lezer/yaml": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@lezer/yaml/-/yaml-1.0.4.tgz", + "integrity": "sha512-2lrrHqxalACEbxIbsjhqGpSW8kWpUKuY6RHgnSAFZa6qK62wvnPxA8hGOwOoDbwHcOFs5M4o27mjGu+P7TvBmw==", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.4.0" + } + }, + "node_modules/@marijn/find-cluster-break": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@marijn/find-cluster-break/-/find-cluster-break-1.0.2.tgz", + "integrity": "sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==", + "license": "MIT" + }, "node_modules/@rollup/rollup-android-arm-eabi": { "version": "4.57.0", "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.57.0.tgz", @@ -879,7 +1022,6 @@ "integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@sveltejs/vite-plugin-svelte-inspector": "^4.0.1", "debug": "^4.4.1", @@ -1206,7 +1348,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1214,6 +1355,12 @@ "node": ">=0.4.0" } }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "license": "Python-2.0" + }, "node_modules/aria-query": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz", @@ -1260,6 +1407,27 @@ "node": ">=6" } }, + "node_modules/codemirror": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/codemirror/-/codemirror-6.0.2.tgz", + "integrity": "sha512-VhydHotNW5w1UGK0Qj96BwSk/Zqbp9WbnyK2W/eVMv4QyF41INRGpjUhFJY7/uDNuudSc33a/PKr4iDqRduvHw==", + "license": "MIT", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/commands": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/lint": "^6.0.0", + "@codemirror/search": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.0.0" + } + }, + "node_modules/crelt": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/crelt/-/crelt-1.0.6.tgz", + "integrity": "sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==", + "license": "MIT" + }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", @@ -1438,6 +1606,18 @@ "jiti": "lib/jiti-cli.mjs" } }, + "node_modules/js-yaml": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, "node_modules/kleur": { "version": "4.1.5", "resolved": "https://registry.npmjs.org/kleur/-/kleur-4.1.5.tgz", @@ -1775,7 +1955,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -1903,13 +2082,18 @@ "node": ">=0.10.0" } }, + "node_modules/style-mod": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/style-mod/-/style-mod-4.1.3.tgz", + "integrity": "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ==", + "license": "MIT" + }, "node_modules/svelte": { "version": "5.48.5", "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.48.5.tgz", "integrity": "sha512-NB3o70OxfmnE5UPyLr8uH3IV02Q43qJVAuWigYmsSOYsS0s/rHxP0TF81blG0onF/xkhNvZw4G8NfzIX+By5ZQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@jridgewell/remapping": "^2.3.4", "@jridgewell/sourcemap-codec": "^1.5.0", @@ -2011,7 +2195,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -2026,7 +2209,6 @@ "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", @@ -2116,6 +2298,12 @@ } } }, + "node_modules/w3c-keyname": { + "version": "2.2.8", + "resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz", + "integrity": "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==", + "license": "MIT" + }, "node_modules/zimmerframe": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/zimmerframe/-/zimmerframe-1.1.4.tgz", diff --git a/ui-svelte/package.json b/ui-svelte/package.json index 53296797..fbe35b9d 100644 --- a/ui-svelte/package.json +++ b/ui-svelte/package.json @@ -20,6 +20,12 @@ "vite": "^6.3.5" }, "dependencies": { + "@codemirror/lang-yaml": "^6.1.2", + "@codemirror/language": "^6.12.1", + "@codemirror/state": "^6.5.4", + "@codemirror/view": "^6.39.12", + "codemirror": "^6.0.2", + "js-yaml": "^4.1.1", "svelte-spa-router": "^4.0.1" } } diff --git a/ui-svelte/src/App.svelte b/ui-svelte/src/App.svelte index 69216703..2e4ab9d1 100644 --- a/ui-svelte/src/App.svelte +++ b/ui-svelte/src/App.svelte @@ -5,6 +5,7 @@ import LogViewer from "./routes/LogViewer.svelte"; import Models from "./routes/Models.svelte"; import Activity from "./routes/Activity.svelte"; + import Config from "./routes/Config.svelte"; import { enableAPIEvents } from "./stores/api"; import { initScreenWidth, isDarkMode, appTitle, connectionState } from "./stores/theme"; @@ -12,6 +13,7 @@ "/": Models, "/logs": LogViewer, "/activity": Activity, + "/config": Config, "*": Models, }; diff --git a/ui-svelte/src/components/Header.svelte b/ui-svelte/src/components/Header.svelte index 73c66874..4c7553ee 100644 --- a/ui-svelte/src/components/Header.svelte +++ b/ui-svelte/src/components/Header.svelte @@ -68,6 +68,14 @@ > Logs + + Config + + + + + + + {#if validationError} +
+ Validation Error: {validationError} +
+ {/if} + + {#if error} +
+ {error} +
+ {/if} + + {#if loading} +
+
Loading configuration...
+
+ {:else} +
+ +
+

Current Config (Editable)

+
+
+ + +
+

Example Config (Reference)

+
+
+
+ {/if} + diff --git a/ui/package-lock.json b/ui/package-lock.json index c88133e7..097d3bcc 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -8,6 +8,10 @@ "name": "ui", "version": "0.0.0", "dependencies": { + "@codemirror/lang-yaml": "^6.1.1", + "@codemirror/state": "^6.4.1", + "codemirror": "^6.0.1", + "js-yaml": "^4.1.0", "react": "^19.1.0", "react-dom": "^19.1.0", "react-icons": "^5.5.0", @@ -17,6 +21,7 @@ "devDependencies": { "@eslint/js": "^9.25.0", "@tailwindcss/vite": "^4.1.8", + "@types/js-yaml": "^4.0.9", "@types/react": "^19.1.2", "@types/react-dom": "^19.1.2", "@vitejs/plugin-react": "^4.4.1", @@ -75,7 +80,6 @@ "integrity": "sha512-bXYxrXFubeYdvB0NhD/NBB3Qi6aZeV20GOWVI47t2dkecCEoneR4NPVcb7abpXDEvejgrUfFtG6vG/zxAKmg+g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.27.1", @@ -327,6 +331,94 @@ "node": ">=6.9.0" } }, + "node_modules/@codemirror/autocomplete": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/@codemirror/autocomplete/-/autocomplete-6.20.0.tgz", + "integrity": "sha512-bOwvTOIJcG5FVo5gUUupiwYh8MioPLQ4UcqbcRf7UQ98X90tCa9E1kZ3Z7tqwpZxYyOvh1YTYbmZE9RTfTp5hg==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.17.0", + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@codemirror/commands": { + "version": "6.10.1", + "resolved": "https://registry.npmjs.org/@codemirror/commands/-/commands-6.10.1.tgz", + "integrity": "sha512-uWDWFypNdQmz2y1LaNJzK7fL7TYKLeUAU0npEC685OKTF3KcQ2Vu3klIM78D7I6wGhktme0lh3CuQLv0ZCrD9Q==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.4.0", + "@codemirror/view": "^6.27.0", + "@lezer/common": "^1.1.0" + } + }, + "node_modules/@codemirror/lang-yaml": { + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/@codemirror/lang-yaml/-/lang-yaml-6.1.2.tgz", + "integrity": "sha512-dxrfG8w5Ce/QbT7YID7mWZFKhdhsaTNOYjOkSIMt1qmC4VQnXSDSYVHHHn8k6kJUfIhtLo8t1JJgltlxWdsITw==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.2.0", + "@lezer/lr": "^1.0.0", + "@lezer/yaml": "^1.0.0" + } + }, + "node_modules/@codemirror/language": { + "version": "6.12.1", + "resolved": "https://registry.npmjs.org/@codemirror/language/-/language-6.12.1.tgz", + "integrity": "sha512-Fa6xkSiuGKc8XC8Cn96T+TQHYj4ZZ7RdFmXA3i9xe/3hLHfwPZdM+dqfX0Cp0zQklBKhVD8Yzc8LS45rkqcwpQ==", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.23.0", + "@lezer/common": "^1.5.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0", + "style-mod": "^4.0.0" + } + }, + "node_modules/@codemirror/lint": { + "version": "6.9.3", + "resolved": "https://registry.npmjs.org/@codemirror/lint/-/lint-6.9.3.tgz", + "integrity": "sha512-y3YkYhdnhjDBAe0VIA0c4wVoFOvnp8CnAvfLqi0TqotIv92wIlAAP7HELOpLBsKwjAX6W92rSflA6an/2zBvXw==", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.35.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/search": { + "version": "6.6.0", + "resolved": "https://registry.npmjs.org/@codemirror/search/-/search-6.6.0.tgz", + "integrity": "sha512-koFuNXcDvyyotWcgOnZGmY7LZqEOXZaaxD/j6n18TCLx2/9HieZJ5H6hs1g8FiRxBD0DNfs0nXn17g872RmYdw==", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.37.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/state": { + "version": "6.5.4", + "resolved": "https://registry.npmjs.org/@codemirror/state/-/state-6.5.4.tgz", + "integrity": "sha512-8y7xqG/hpB53l25CIoit9/ngxdfoG+fx+V3SHBrinnhOtLvKHRyAJJuHzkWrR4YXXLX8eXBsejgAAxHUOdW1yw==", + "dependencies": { + "@marijn/find-cluster-break": "^1.0.0" + } + }, + "node_modules/@codemirror/view": { + "version": "6.39.12", + "resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.39.12.tgz", + "integrity": "sha512-f+/VsHVn/kOA9lltk/GFzuYwVVAKmOnNjxbrhkk3tPHntFqjWeI2TbIXx006YkBkqC10wZ4NsnWXCQiFPeAISQ==", + "dependencies": { + "@codemirror/state": "^6.5.0", + "crelt": "^1.0.6", + "style-mod": "^4.1.0", + "w3c-keyname": "^2.2.4" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.25.5", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz", @@ -1041,6 +1133,42 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@lezer/common": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.5.0.tgz", + "integrity": "sha512-PNGcolp9hr4PJdXR4ix7XtixDrClScvtSCYW3rQG106oVMOOI+jFb+0+J3mbeL/53g1Zd6s0kJzaw6Ri68GmAA==" + }, + "node_modules/@lezer/highlight": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.3.tgz", + "integrity": "sha512-qXdH7UqTvGfdVBINrgKhDsVTJTxactNNxLk7+UMwZhU13lMHaOBlJe9Vqp907ya56Y3+ed2tlqzys7jDkTmW0g==", + "dependencies": { + "@lezer/common": "^1.3.0" + } + }, + "node_modules/@lezer/lr": { + "version": "1.4.8", + "resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.8.tgz", + "integrity": "sha512-bPWa0Pgx69ylNlMlPvBPryqeLYQjyJjqPx+Aupm5zydLIF3NE+6MMLT8Yi23Bd9cif9VS00aUebn+6fDIGBcDA==", + "dependencies": { + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@lezer/yaml": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@lezer/yaml/-/yaml-1.0.4.tgz", + "integrity": "sha512-2lrrHqxalACEbxIbsjhqGpSW8kWpUKuY6RHgnSAFZa6qK62wvnPxA8hGOwOoDbwHcOFs5M4o27mjGu+P7TvBmw==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.4.0" + } + }, + "node_modules/@marijn/find-cluster-break": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@marijn/find-cluster-break/-/find-cluster-break-1.0.2.tgz", + "integrity": "sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==" + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -1755,6 +1883,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/js-yaml": { + "version": "4.0.9", + "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz", + "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==", + "dev": true + }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -1768,7 +1902,6 @@ "integrity": "sha512-JeG0rEWak0N6Itr6QUx+X60uQmN+5t3j9r/OVDtWzFXKaj6kD1BwJzOksD0FF6iWxZlbE1kB0q9vtnU2ekqa1Q==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -1829,7 +1962,6 @@ "integrity": "sha512-qwxv6dq682yVvgKKp2qWwLgRbscDAYktPptK4JPojCwwi3R9cwrvIxS4lvBpzmcqzR4bdn54Z0IG1uHFskW4dA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.33.1", "@typescript-eslint/types": "8.33.1", @@ -2081,7 +2213,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2136,7 +2267,6 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true, "license": "Python-2.0" }, "node_modules/balanced-match": { @@ -2190,7 +2320,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001718", "electron-to-chromium": "^1.5.160", @@ -2262,6 +2391,20 @@ "node": ">=18" } }, + "node_modules/codemirror": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/codemirror/-/codemirror-6.0.2.tgz", + "integrity": "sha512-VhydHotNW5w1UGK0Qj96BwSk/Zqbp9WbnyK2W/eVMv4QyF41INRGpjUhFJY7/uDNuudSc33a/PKr4iDqRduvHw==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/commands": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/lint": "^6.0.0", + "@codemirror/search": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.0.0" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -2309,6 +2452,11 @@ "url": "https://opencollective.com/express" } }, + "node_modules/crelt": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/crelt/-/crelt-1.0.6.tgz", + "integrity": "sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==" + }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", @@ -2457,7 +2605,6 @@ "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -2927,7 +3074,6 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", - "dev": true, "license": "MIT", "dependencies": { "argparse": "^2.0.1" @@ -3567,7 +3713,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz", "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -3577,7 +3722,6 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz", "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==", "license": "MIT", - "peer": true, "dependencies": { "scheduler": "^0.26.0" }, @@ -3812,6 +3956,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/style-mod": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/style-mod/-/style-mod-4.1.3.tgz", + "integrity": "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ==" + }, "node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", @@ -3907,7 +4056,6 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -3960,7 +4108,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -4039,7 +4186,6 @@ "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", @@ -4130,7 +4276,6 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -4138,6 +4283,11 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/w3c-keyname": { + "version": "2.2.8", + "resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz", + "integrity": "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==" + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/ui/package.json b/ui/package.json index d8a5e090..07cc38ea 100644 --- a/ui/package.json +++ b/ui/package.json @@ -10,6 +10,10 @@ "preview": "vite preview" }, "dependencies": { + "@codemirror/lang-yaml": "^6.1.1", + "@codemirror/state": "^6.4.1", + "codemirror": "^6.0.1", + "js-yaml": "^4.1.0", "react": "^19.1.0", "react-dom": "^19.1.0", "react-icons": "^5.5.0", @@ -19,6 +23,7 @@ "devDependencies": { "@eslint/js": "^9.25.0", "@tailwindcss/vite": "^4.1.8", + "@types/js-yaml": "^4.0.9", "@types/react": "^19.1.2", "@types/react-dom": "^19.1.2", "@vitejs/plugin-react": "^4.4.1",