From 1264d0ed001d97fcbe2962c9cb00e092bef5f3fa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 21:47:36 +0000 Subject: [PATCH 1/7] Initial plan From 41fc97d5d5e895fba262f5b74a3e8ade0cac7262 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 21:56:58 +0000 Subject: [PATCH 2/7] proxy: add per-model afterHealthy and beforeStop hooks Add two new lifecycle hooks to ModelConfig that run at specific points in each model's lifecycle: - afterHealthy: one-shot command that runs after the health check passes, before the process transitions to StateReady. Blocks the model from accepting requests until it completes. Failure is logged as a warning but does not prevent startup. - beforeStop: command that runs right before the upstream process is killed. Blocks shutdown until it completes (or fails), but the process is killed regardless of the outcome. Both hooks inherit the upstream process environment and log output through the process logger. Primary use case is loading/saving llama.cpp prompt cache slots. - proxy/config/model_config.go: add AfterHealthy and BeforeStop fields - proxy/process.go: add runHookCommand helper; wire hooks into start() and stopCommand() - proxy/process_test.go: add TestProcess_AfterHealthyHook and TestProcess_BeforeStopHook - config.example.yaml: document both new fields Co-authored-by: chand1012 <3521582+chand1012@users.noreply.github.com> --- config.example.yaml | 15 +++++++++++ proxy/config/model_config.go | 2 ++ proxy/process.go | 33 +++++++++++++++++++++++++ proxy/process_test.go | 48 ++++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+) diff --git a/config.example.yaml b/config.example.yaml index 35f74c12..6ae40d0d 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -315,6 +315,21 @@ models: # - processes have 5 seconds to shutdown until forceful termination is attempted cmdStop: docker stop ${MODEL_ID} + # afterHealthy: a command to run once after the model passes its health check + # - optional, default: "" + # - runs as a one-shot command as soon as the server reports healthy + # - blocks the model from becoming ready until it completes + # - a failure is logged as a warning but does not prevent the model from starting + # - useful for loading saved prompt cache slots in llama.cpp (e.g. llama-save-load-state) + # afterHealthy: llama-cli --load-prompt-cache /tmp/my-model.cache + + # beforeStop: a command to run right before the model process is killed + # - optional, default: "" + # - blocks the model shutdown until it completes (or fails) + # - the model will be stopped regardless of whether this command succeeds + # - useful for saving prompt cache slots in llama.cpp before unloading + # beforeStop: llama-cli --save-prompt-cache /tmp/my-model.cache + # groups: a dictionary of group settings # - optional, default: empty dictionary # - provides advanced controls over model swapping behaviour diff --git a/proxy/config/model_config.go b/proxy/config/model_config.go index 685687ba..974b01aa 100644 --- a/proxy/config/model_config.go +++ b/proxy/config/model_config.go @@ -12,6 +12,8 @@ const ( type ModelConfig struct { Cmd string `yaml:"cmd"` CmdStop string `yaml:"cmdStop"` + AfterHealthy string `yaml:"afterHealthy"` + BeforeStop string `yaml:"beforeStop"` Proxy string `yaml:"proxy"` Aliases []string `yaml:"aliases"` Env []string `yaml:"env"` diff --git a/proxy/process.go b/proxy/process.go index 41427059..19939c7b 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -358,6 +358,13 @@ func (p *Process) start() error { }() } + if p.config.AfterHealthy != "" { + p.proxyLogger.Debugf("<%s> Running afterHealthy hook: %s", p.ID, p.config.AfterHealthy) + if err := p.runHookCommand(p.config.AfterHealthy); err != nil { + p.proxyLogger.Warnf("<%s> afterHealthy hook failed: %v", p.ID, err) + } + } + if curState, err := p.swapState(StateStarting, StateReady); err != nil { return fmt.Errorf("failed to set Process state to ready: current state: %v, error: %v", curState, err) } else { @@ -429,6 +436,13 @@ func (p *Process) stopCommand() { return } + if p.config.BeforeStop != "" { + p.proxyLogger.Debugf("<%s> Running beforeStop hook: %s", p.ID, p.config.BeforeStop) + if err := p.runHookCommand(p.config.BeforeStop); err != nil { + p.proxyLogger.Warnf("<%s> beforeStop hook failed: %v", p.ID, err) + } + } + cancelUpstream() <-cmdWaitChan } @@ -654,6 +668,25 @@ func (p *Process) Logger() *LogMonitor { return p.processLogger } +// runHookCommand executes a hook command, logging its output through the +// process logger. The command inherits the environment of the upstream process. +func (p *Process) runHookCommand(hookCmd string) error { + args, err := config.SanitizeCommand(hookCmd) + if err != nil { + return fmt.Errorf("failed to sanitize hook command %q: %v", hookCmd, err) + } + + cmd := exec.Command(args[0], args[1:]...) + cmd.Stdout = p.processLogger + cmd.Stderr = p.processLogger + if p.cmd != nil { + cmd.Env = p.cmd.Env + } + setProcAttributes(cmd) + + return cmd.Run() +} + var loadingRemarks = []string{ "Still faster than your last standup meeting...", "Reticulating splines...", diff --git a/proxy/process_test.go b/proxy/process_test.go index dd9e9d8a..797b360f 100644 --- a/proxy/process_test.go +++ b/proxy/process_test.go @@ -456,6 +456,54 @@ func TestProcess_ForceStopWithKill(t *testing.T) { <-waitChan } +func TestProcess_AfterHealthyHook(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("skipping AfterHealthy hook test on Windows") + } + + tmpFile := t.TempDir() + "/after_healthy_ran" + conf := getTestSimpleResponderConfig("after_healthy_test") + conf.AfterHealthy = fmt.Sprintf("touch %s", tmpFile) + + process := NewProcess("test-after-healthy", 5, conf, debugLogger, debugLogger) + defer process.Stop() + + err := process.start() + assert.Nil(t, err) + assert.Equal(t, StateReady, process.CurrentState()) + + // The hook should have created the temp file before the process became ready + _, statErr := os.Stat(tmpFile) + assert.Nil(t, statErr, "afterHealthy hook should have created temp file") +} + +func TestProcess_BeforeStopHook(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("skipping BeforeStop hook test on Windows") + } + + tmpFile := t.TempDir() + "/before_stop_ran" + conf := getTestSimpleResponderConfig("before_stop_test") + conf.BeforeStop = fmt.Sprintf("touch %s", tmpFile) + + process := NewProcess("test-before-stop", 5, conf, debugLogger, debugLogger) + + err := process.start() + assert.Nil(t, err) + assert.Equal(t, StateReady, process.CurrentState()) + + // Verify hook hasn't run yet + _, statErr := os.Stat(tmpFile) + assert.True(t, os.IsNotExist(statErr), "beforeStop hook should not have run yet") + + process.Stop() + assert.Equal(t, StateStopped, process.CurrentState()) + + // The hook should have created the temp file before the process was killed + _, statErr = os.Stat(tmpFile) + assert.Nil(t, statErr, "beforeStop hook should have created temp file") +} + func TestProcess_StopCmd(t *testing.T) { conf := getTestSimpleResponderConfig("test_stop_cmd") From 423b9a70b2a14a0c3093604a7bf810b5e0956eab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 22:07:25 +0000 Subject: [PATCH 3/7] config: use curl examples for afterHealthy and beforeStop hooks Replace llama-cli command examples with curl calls to the llama.cpp slots API, which is the correct way to save/restore prompt cache state via the server's HTTP interface. Co-authored-by: chand1012 <3521582+chand1012@users.noreply.github.com> --- config.example.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index 6ae40d0d..bbfbc7fa 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -320,15 +320,15 @@ models: # - runs as a one-shot command as soon as the server reports healthy # - blocks the model from becoming ready until it completes # - a failure is logged as a warning but does not prevent the model from starting - # - useful for loading saved prompt cache slots in llama.cpp (e.g. llama-save-load-state) - # afterHealthy: llama-cli --load-prompt-cache /tmp/my-model.cache + # - useful for loading saved prompt cache slots in llama.cpp + # afterHealthy: "curl -sf -X POST http://localhost:${PORT}/slots/0 -H 'Content-Type: application/json' -d '{\"action\": \"restore\", \"filename\": \"my-model.cache\"}'" # beforeStop: a command to run right before the model process is killed # - optional, default: "" # - blocks the model shutdown until it completes (or fails) # - the model will be stopped regardless of whether this command succeeds # - useful for saving prompt cache slots in llama.cpp before unloading - # beforeStop: llama-cli --save-prompt-cache /tmp/my-model.cache + # beforeStop: "curl -sf -X POST http://localhost:${PORT}/slots/0 -H 'Content-Type: application/json' -d '{\"action\": \"save\", \"filename\": \"my-model.cache\"}'" # groups: a dictionary of group settings # - optional, default: empty dictionary From c3d404b3c5ad57063f5b0a7fd270982836a96281 Mon Sep 17 00:00:00 2001 From: chand1012 Date: Fri, 20 Mar 2026 15:23:36 +0000 Subject: [PATCH 4/7] proxy: run afterHealthy hook after health check passes - move afterHealthy hook to run after health check instead of after state ready - expand ${PORT} and other macros in afterHealthy and beforeStop fields --- proxy/config/config.go | 12 ++++++++++-- proxy/process.go | 14 +++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/proxy/config/config.go b/proxy/config/config.go index 00f44970..91696471 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -305,6 +305,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr) modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr) + modelConfig.AfterHealthy = strings.ReplaceAll(modelConfig.AfterHealthy, macroSlug, macroStr) + modelConfig.BeforeStop = strings.ReplaceAll(modelConfig.BeforeStop, macroSlug, macroStr) modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr) modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr) modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr) @@ -339,10 +341,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { } } - // Handle PORT macro - only allocate if cmd uses it + // Handle PORT macro - only allocate if cmd, afterHealthy, or beforeStop uses it cmdHasPort := strings.Contains(modelConfig.Cmd, "${PORT}") + afterHealthyHasPort := strings.Contains(modelConfig.AfterHealthy, "${PORT}") + beforeStopHasPort := strings.Contains(modelConfig.BeforeStop, "${PORT}") proxyHasPort := strings.Contains(modelConfig.Proxy, "${PORT}") - if cmdHasPort || proxyHasPort { + if cmdHasPort || afterHealthyHasPort || beforeStopHasPort || proxyHasPort { if !cmdHasPort && proxyHasPort { return Config{}, fmt.Errorf("model %s: proxy uses ${PORT} but cmd does not - ${PORT} is only available when used in cmd", modelId) } @@ -352,6 +356,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr) modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr) + modelConfig.AfterHealthy = strings.ReplaceAll(modelConfig.AfterHealthy, macroSlug, macroStr) + modelConfig.BeforeStop = strings.ReplaceAll(modelConfig.BeforeStop, macroSlug, macroStr) modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr) modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr) modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr) @@ -371,6 +377,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { fieldMap := map[string]string{ "cmd": modelConfig.Cmd, "cmdStop": modelConfig.CmdStop, + "afterHealthy": modelConfig.AfterHealthy, + "beforeStop": modelConfig.BeforeStop, "proxy": modelConfig.Proxy, "checkEndpoint": modelConfig.CheckEndpoint, "filters.stripParams": modelConfig.Filters.StripParams, diff --git a/proxy/process.go b/proxy/process.go index 19939c7b..aaa44782 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -333,6 +333,13 @@ func (p *Process) start() error { } } + if p.config.AfterHealthy != "" { + p.proxyLogger.Debugf("<%s> Running afterHealthy hook: %s", p.ID, p.config.AfterHealthy) + if err := p.runHookCommand(p.config.AfterHealthy); err != nil { + p.proxyLogger.Warnf("<%s> afterHealthy hook failed: %v", p.ID, err) + } + } + if p.config.UnloadAfter > 0 { // start a goroutine to check every second if // the process should be stopped @@ -358,13 +365,6 @@ func (p *Process) start() error { }() } - if p.config.AfterHealthy != "" { - p.proxyLogger.Debugf("<%s> Running afterHealthy hook: %s", p.ID, p.config.AfterHealthy) - if err := p.runHookCommand(p.config.AfterHealthy); err != nil { - p.proxyLogger.Warnf("<%s> afterHealthy hook failed: %v", p.ID, err) - } - } - if curState, err := p.swapState(StateStarting, StateReady); err != nil { return fmt.Errorf("failed to set Process state to ready: current state: %v, error: %v", curState, err) } else { From d7f8e52adc1f8dffd0ca198794c33871b0556566 Mon Sep 17 00:00:00 2001 From: chandler Date: Fri, 20 Mar 2026 15:31:11 +0000 Subject: [PATCH 5/7] config: update curl examples to use query params --- config.example.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index bbfbc7fa..d8749796 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -321,14 +321,14 @@ models: # - blocks the model from becoming ready until it completes # - a failure is logged as a warning but does not prevent the model from starting # - useful for loading saved prompt cache slots in llama.cpp - # afterHealthy: "curl -sf -X POST http://localhost:${PORT}/slots/0 -H 'Content-Type: application/json' -d '{\"action\": \"restore\", \"filename\": \"my-model.cache\"}'" + # afterHealthy: "curl -sf -X POST \"http://localhost:${PORT}/slots/0?action=restore&filename=my-model.cache\"" # beforeStop: a command to run right before the model process is killed # - optional, default: "" # - blocks the model shutdown until it completes (or fails) # - the model will be stopped regardless of whether this command succeeds # - useful for saving prompt cache slots in llama.cpp before unloading - # beforeStop: "curl -sf -X POST http://localhost:${PORT}/slots/0 -H 'Content-Type: application/json' -d '{\"action\": \"save\", \"filename\": \"my-model.cache\"}'" + # beforeStop: "curl -sf -X POST \"http://localhost:${PORT}/slots/0?action=save&filename=my-model.cache\"" # groups: a dictionary of group settings # - optional, default: empty dictionary From 6ca9637171e2d4981807e39131fe3318b50466b7 Mon Sep 17 00:00:00 2001 From: chand1012 Date: Sat, 21 Mar 2026 13:14:51 -0400 Subject: [PATCH 6/7] Add 30 second timeout for hooks --- proxy/process.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/proxy/process.go b/proxy/process.go index aaa44782..73efa2ee 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -668,6 +668,8 @@ func (p *Process) Logger() *LogMonitor { return p.processLogger } +var hookCommandTimeout = 30 * time.Second + // runHookCommand executes a hook command, logging its output through the // process logger. The command inherits the environment of the upstream process. func (p *Process) runHookCommand(hookCmd string) error { @@ -676,7 +678,10 @@ func (p *Process) runHookCommand(hookCmd string) error { return fmt.Errorf("failed to sanitize hook command %q: %v", hookCmd, err) } - cmd := exec.Command(args[0], args[1:]...) + ctx, cancel := context.WithTimeout(context.Background(), hookCommandTimeout) + defer cancel() + + cmd := exec.CommandContext(ctx, args[0], args[1:]...) cmd.Stdout = p.processLogger cmd.Stderr = p.processLogger if p.cmd != nil { @@ -684,7 +689,15 @@ func (p *Process) runHookCommand(hookCmd string) error { } setProcAttributes(cmd) - return cmd.Run() + err = cmd.Run() + if err != nil { + if ctx.Err() == context.DeadlineExceeded { + return fmt.Errorf("hook command timed out after %v: %w", hookCommandTimeout, err) + } + return fmt.Errorf("hook command failed: %w", err) + } + + return nil } var loadingRemarks = []string{ From b82e763df74ea83a0c8755c41e8157ea05d9c98a Mon Sep 17 00:00:00 2001 From: chand1012 Date: Mon, 23 Mar 2026 11:56:35 -0400 Subject: [PATCH 7/7] Fix examples --- config.example.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index d8749796..da173eef 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -321,14 +321,14 @@ models: # - blocks the model from becoming ready until it completes # - a failure is logged as a warning but does not prevent the model from starting # - useful for loading saved prompt cache slots in llama.cpp - # afterHealthy: "curl -sf -X POST \"http://localhost:${PORT}/slots/0?action=restore&filename=my-model.cache\"" + # afterHealthy: "curl -X POST 'http://localhost:${PORT}/slots/0?action=restore' -H 'Content-Type: application/json' -d '{\"filename\": \"slot0.bin\"}'" # beforeStop: a command to run right before the model process is killed # - optional, default: "" # - blocks the model shutdown until it completes (or fails) # - the model will be stopped regardless of whether this command succeeds # - useful for saving prompt cache slots in llama.cpp before unloading - # beforeStop: "curl -sf -X POST \"http://localhost:${PORT}/slots/0?action=save&filename=my-model.cache\"" + # beforeStop: "curl -X POST 'http://localhost:${PORT}/slots/0?action=save' -H 'Content-Type: application/json' -d '{\"filename\": \"slot0.bin\"}'" # groups: a dictionary of group settings # - optional, default: empty dictionary