From 1264d0ed001d97fcbe2962c9cb00e092bef5f3fa Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 19 Mar 2026 21:47:36 +0000
Subject: [PATCH 1/7] Initial plan


From 41fc97d5d5e895fba262f5b74a3e8ade0cac7262 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 19 Mar 2026 21:56:58 +0000
Subject: [PATCH 2/7] proxy: add per-model afterHealthy and beforeStop hooks

Add two new lifecycle hooks to ModelConfig that run at specific points
in each model's lifecycle:

- afterHealthy: one-shot command that runs after the health check
  passes, before the process transitions to StateReady. Blocks the
  model from accepting requests until it completes. Failure is logged
  as a warning but does not prevent startup.

- beforeStop: command that runs right before the upstream process is
  killed. Blocks shutdown until it completes (or fails), but the
  process is killed regardless of the outcome.

Both hooks inherit the upstream process environment and log output
through the process logger.

Primary use case is loading/saving llama.cpp prompt cache slots.

- proxy/config/model_config.go: add AfterHealthy and BeforeStop fields
- proxy/process.go: add runHookCommand helper; wire hooks into start()
  and stopCommand()
- proxy/process_test.go: add TestProcess_AfterHealthyHook and
  TestProcess_BeforeStopHook
- config.example.yaml: document both new fields

Co-authored-by: chand1012 <3521582+chand1012@users.noreply.github.com>
---
 config.example.yaml          | 15 +++++++++++
 proxy/config/model_config.go |  2 ++
 proxy/process.go             | 33 +++++++++++++++++++++++++
 proxy/process_test.go        | 48 ++++++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+)

diff --git a/config.example.yaml b/config.example.yaml
index 35f74c12..6ae40d0d 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -315,6 +315,21 @@ models:
     # - processes have 5 seconds to shutdown until forceful termination is attempted
     cmdStop: docker stop ${MODEL_ID}
 
+    # afterHealthy: a command to run once after the model passes its health check
+    # - optional, default: ""
+    # - runs as a one-shot command as soon as the server reports healthy
+    # - blocks the model from becoming ready until it completes
+    # - a failure is logged as a warning but does not prevent the model from starting
+    # - useful for loading saved prompt cache slots in llama.cpp (e.g. llama-save-load-state)
+    # afterHealthy: llama-cli --load-prompt-cache /tmp/my-model.cache
+
+    # beforeStop: a command to run right before the model process is killed
+    # - optional, default: ""
+    # - blocks the model shutdown until it completes (or fails)
+    # - the model will be stopped regardless of whether this command succeeds
+    # - useful for saving prompt cache slots in llama.cpp before unloading
+    # beforeStop: llama-cli --save-prompt-cache /tmp/my-model.cache
+
 # groups: a dictionary of group settings
 # - optional, default: empty dictionary
 # - provides advanced controls over model swapping behaviour
diff --git a/proxy/config/model_config.go b/proxy/config/model_config.go
index 685687ba..974b01aa 100644
--- a/proxy/config/model_config.go
+++ b/proxy/config/model_config.go
@@ -12,6 +12,8 @@ const (
 type ModelConfig struct {
 	Cmd           string   `yaml:"cmd"`
 	CmdStop       string   `yaml:"cmdStop"`
+	AfterHealthy  string   `yaml:"afterHealthy"`
+	BeforeStop    string   `yaml:"beforeStop"`
 	Proxy         string   `yaml:"proxy"`
 	Aliases       []string `yaml:"aliases"`
 	Env           []string `yaml:"env"`
diff --git a/proxy/process.go b/proxy/process.go
index 41427059..19939c7b 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -358,6 +358,13 @@ func (p *Process) start() error {
 		}()
 	}
 
+	if p.config.AfterHealthy != "" {
+		p.proxyLogger.Debugf("<%s> Running afterHealthy hook: %s", p.ID, p.config.AfterHealthy)
+		if err := p.runHookCommand(p.config.AfterHealthy); err != nil {
+			p.proxyLogger.Warnf("<%s> afterHealthy hook failed: %v", p.ID, err)
+		}
+	}
+
 	if curState, err := p.swapState(StateStarting, StateReady); err != nil {
 		return fmt.Errorf("failed to set Process state to ready: current state: %v, error: %v", curState, err)
 	} else {
@@ -429,6 +436,13 @@ func (p *Process) stopCommand() {
 		return
 	}
 
+	if p.config.BeforeStop != "" {
+		p.proxyLogger.Debugf("<%s> Running beforeStop hook: %s", p.ID, p.config.BeforeStop)
+		if err := p.runHookCommand(p.config.BeforeStop); err != nil {
+			p.proxyLogger.Warnf("<%s> beforeStop hook failed: %v", p.ID, err)
+		}
+	}
+
 	cancelUpstream()
 	<-cmdWaitChan
 }
@@ -654,6 +668,25 @@ func (p *Process) Logger() *LogMonitor {
 	return p.processLogger
 }
 
+// runHookCommand executes a hook command, logging its output through the
+// process logger. The command inherits the environment of the upstream process.
+func (p *Process) runHookCommand(hookCmd string) error {
+	args, err := config.SanitizeCommand(hookCmd)
+	if err != nil {
+		return fmt.Errorf("failed to sanitize hook command %q: %v", hookCmd, err)
+	}
+
+	cmd := exec.Command(args[0], args[1:]...)
+	cmd.Stdout = p.processLogger
+	cmd.Stderr = p.processLogger
+	if p.cmd != nil {
+		cmd.Env = p.cmd.Env
+	}
+	setProcAttributes(cmd)
+
+	return cmd.Run()
+}
+
 var loadingRemarks = []string{
 	"Still faster than your last standup meeting...",
 	"Reticulating splines...",
diff --git a/proxy/process_test.go b/proxy/process_test.go
index dd9e9d8a..797b360f 100644
--- a/proxy/process_test.go
+++ b/proxy/process_test.go
@@ -456,6 +456,54 @@ func TestProcess_ForceStopWithKill(t *testing.T) {
 	<-waitChan
 }
 
+func TestProcess_AfterHealthyHook(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("skipping AfterHealthy hook test on Windows")
+	}
+
+	tmpFile := t.TempDir() + "/after_healthy_ran"
+	conf := getTestSimpleResponderConfig("after_healthy_test")
+	conf.AfterHealthy = fmt.Sprintf("touch %s", tmpFile)
+
+	process := NewProcess("test-after-healthy", 5, conf, debugLogger, debugLogger)
+	defer process.Stop()
+
+	err := process.start()
+	assert.Nil(t, err)
+	assert.Equal(t, StateReady, process.CurrentState())
+
+	// The hook should have created the temp file before the process became ready
+	_, statErr := os.Stat(tmpFile)
+	assert.Nil(t, statErr, "afterHealthy hook should have created temp file")
+}
+
+func TestProcess_BeforeStopHook(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("skipping BeforeStop hook test on Windows")
+	}
+
+	tmpFile := t.TempDir() + "/before_stop_ran"
+	conf := getTestSimpleResponderConfig("before_stop_test")
+	conf.BeforeStop = fmt.Sprintf("touch %s", tmpFile)
+
+	process := NewProcess("test-before-stop", 5, conf, debugLogger, debugLogger)
+
+	err := process.start()
+	assert.Nil(t, err)
+	assert.Equal(t, StateReady, process.CurrentState())
+
+	// Verify hook hasn't run yet
+	_, statErr := os.Stat(tmpFile)
+	assert.True(t, os.IsNotExist(statErr), "beforeStop hook should not have run yet")
+
+	process.Stop()
+	assert.Equal(t, StateStopped, process.CurrentState())
+
+	// The hook should have created the temp file before the process was killed
+	_, statErr = os.Stat(tmpFile)
+	assert.Nil(t, statErr, "beforeStop hook should have created temp file")
+}
+
 func TestProcess_StopCmd(t *testing.T) {
 	conf := getTestSimpleResponderConfig("test_stop_cmd")
 

From 423b9a70b2a14a0c3093604a7bf810b5e0956eab Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 19 Mar 2026 22:07:25 +0000
Subject: [PATCH 3/7] config: use curl examples for afterHealthy and beforeStop
 hooks

Replace llama-cli command examples with curl calls to the llama.cpp
slots API, which is the correct way to save/restore prompt cache state
via the server's HTTP interface.

Co-authored-by: chand1012 <3521582+chand1012@users.noreply.github.com>
---
 config.example.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index 6ae40d0d..bbfbc7fa 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -320,15 +320,15 @@ models:
     # - runs as a one-shot command as soon as the server reports healthy
     # - blocks the model from becoming ready until it completes
     # - a failure is logged as a warning but does not prevent the model from starting
-    # - useful for loading saved prompt cache slots in llama.cpp (e.g. llama-save-load-state)
-    # afterHealthy: llama-cli --load-prompt-cache /tmp/my-model.cache
+    # - useful for loading saved prompt cache slots in llama.cpp
+    # afterHealthy: "curl -sf -X POST http://localhost:${PORT}/slots/0 -H 'Content-Type: application/json' -d '{\"action\": \"restore\", \"filename\": \"my-model.cache\"}'"
 
     # beforeStop: a command to run right before the model process is killed
     # - optional, default: ""
     # - blocks the model shutdown until it completes (or fails)
     # - the model will be stopped regardless of whether this command succeeds
     # - useful for saving prompt cache slots in llama.cpp before unloading
-    # beforeStop: llama-cli --save-prompt-cache /tmp/my-model.cache
+    # beforeStop: "curl -sf -X POST http://localhost:${PORT}/slots/0 -H 'Content-Type: application/json' -d '{\"action\": \"save\", \"filename\": \"my-model.cache\"}'"
 
 # groups: a dictionary of group settings
 # - optional, default: empty dictionary

From c3d404b3c5ad57063f5b0a7fd270982836a96281 Mon Sep 17 00:00:00 2001
From: chand1012 <chandler@chand1012.dev>
Date: Fri, 20 Mar 2026 15:23:36 +0000
Subject: [PATCH 4/7] proxy: run afterHealthy hook after health check passes

- move afterHealthy hook to run after health check instead of after state ready
- expand ${PORT} and other macros in afterHealthy and beforeStop fields
---
 proxy/config/config.go | 12 ++++++++++--
 proxy/process.go       | 14 +++++++-------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/proxy/config/config.go b/proxy/config/config.go
index 00f44970..91696471 100644
--- a/proxy/config/config.go
+++ b/proxy/config/config.go
@@ -305,6 +305,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 
 			modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
 			modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
+			modelConfig.AfterHealthy = strings.ReplaceAll(modelConfig.AfterHealthy, macroSlug, macroStr)
+			modelConfig.BeforeStop = strings.ReplaceAll(modelConfig.BeforeStop, macroSlug, macroStr)
 			modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
 			modelConfig.CheckEndpoint = strings.ReplaceAll(modelConfig.CheckEndpoint, macroSlug, macroStr)
 			modelConfig.Filters.StripParams = strings.ReplaceAll(modelConfig.Filters.StripParams, macroSlug, macroStr)
@@ -339,10 +341,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 			}
 		}
 
-		// Handle PORT macro - only allocate if cmd uses it
+		// Handle PORT macro - only allocate if cmd, afterHealthy, or beforeStop uses it
 		cmdHasPort := strings.Contains(modelConfig.Cmd, "${PORT}")
+		afterHealthyHasPort := strings.Contains(modelConfig.AfterHealthy, "${PORT}")
+		beforeStopHasPort := strings.Contains(modelConfig.BeforeStop, "${PORT}")
 		proxyHasPort := strings.Contains(modelConfig.Proxy, "${PORT}")
-		if cmdHasPort || proxyHasPort {
+		if cmdHasPort || afterHealthyHasPort || beforeStopHasPort || proxyHasPort {
 			if !cmdHasPort && proxyHasPort {
 				return Config{}, fmt.Errorf("model %s: proxy uses ${PORT} but cmd does not - ${PORT} is only available when used in cmd", modelId)
 			}
@@ -352,6 +356,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 
 			modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, macroSlug, macroStr)
 			modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, macroSlug, macroStr)
+			modelConfig.AfterHealthy = strings.ReplaceAll(modelConfig.AfterHealthy, macroSlug, macroStr)
+			modelConfig.BeforeStop = strings.ReplaceAll(modelConfig.BeforeStop, macroSlug, macroStr)
 			modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, macroSlug, macroStr)
 			modelConfig.Name = strings.ReplaceAll(modelConfig.Name, macroSlug, macroStr)
 			modelConfig.Description = strings.ReplaceAll(modelConfig.Description, macroSlug, macroStr)
@@ -371,6 +377,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 		fieldMap := map[string]string{
 			"cmd":                 modelConfig.Cmd,
 			"cmdStop":             modelConfig.CmdStop,
+			"afterHealthy":        modelConfig.AfterHealthy,
+			"beforeStop":          modelConfig.BeforeStop,
 			"proxy":               modelConfig.Proxy,
 			"checkEndpoint":       modelConfig.CheckEndpoint,
 			"filters.stripParams": modelConfig.Filters.StripParams,
diff --git a/proxy/process.go b/proxy/process.go
index 19939c7b..aaa44782 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -333,6 +333,13 @@ func (p *Process) start() error {
 		}
 	}
 
+	if p.config.AfterHealthy != "" {
+		p.proxyLogger.Debugf("<%s> Running afterHealthy hook: %s", p.ID, p.config.AfterHealthy)
+		if err := p.runHookCommand(p.config.AfterHealthy); err != nil {
+			p.proxyLogger.Warnf("<%s> afterHealthy hook failed: %v", p.ID, err)
+		}
+	}
+
 	if p.config.UnloadAfter > 0 {
 		// start a goroutine to check every second if
 		// the process should be stopped
@@ -358,13 +365,6 @@ func (p *Process) start() error {
 		}()
 	}
 
-	if p.config.AfterHealthy != "" {
-		p.proxyLogger.Debugf("<%s> Running afterHealthy hook: %s", p.ID, p.config.AfterHealthy)
-		if err := p.runHookCommand(p.config.AfterHealthy); err != nil {
-			p.proxyLogger.Warnf("<%s> afterHealthy hook failed: %v", p.ID, err)
-		}
-	}
-
 	if curState, err := p.swapState(StateStarting, StateReady); err != nil {
 		return fmt.Errorf("failed to set Process state to ready: current state: %v, error: %v", curState, err)
 	} else {

From d7f8e52adc1f8dffd0ca198794c33871b0556566 Mon Sep 17 00:00:00 2001
From: chandler <chandler@jank.afrino-beardie.ts.net>
Date: Fri, 20 Mar 2026 15:31:11 +0000
Subject: [PATCH 5/7] config: update curl examples to use query params

---
 config.example.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index bbfbc7fa..d8749796 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -321,14 +321,14 @@ models:
     # - blocks the model from becoming ready until it completes
     # - a failure is logged as a warning but does not prevent the model from starting
     # - useful for loading saved prompt cache slots in llama.cpp
-    # afterHealthy: "curl -sf -X POST http://localhost:${PORT}/slots/0 -H 'Content-Type: application/json' -d '{\"action\": \"restore\", \"filename\": \"my-model.cache\"}'"
+    # afterHealthy: "curl -sf -X POST \"http://localhost:${PORT}/slots/0?action=restore&filename=my-model.cache\""
 
     # beforeStop: a command to run right before the model process is killed
     # - optional, default: ""
     # - blocks the model shutdown until it completes (or fails)
     # - the model will be stopped regardless of whether this command succeeds
     # - useful for saving prompt cache slots in llama.cpp before unloading
-    # beforeStop: "curl -sf -X POST http://localhost:${PORT}/slots/0 -H 'Content-Type: application/json' -d '{\"action\": \"save\", \"filename\": \"my-model.cache\"}'"
+    # beforeStop: "curl -sf -X POST \"http://localhost:${PORT}/slots/0?action=save&filename=my-model.cache\""
 
 # groups: a dictionary of group settings
 # - optional, default: empty dictionary

From 6ca9637171e2d4981807e39131fe3318b50466b7 Mon Sep 17 00:00:00 2001
From: chand1012 <chandler@chand1012.dev>
Date: Sat, 21 Mar 2026 13:14:51 -0400
Subject: [PATCH 6/7] Add 30 second timeout for hooks

---
 proxy/process.go | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/proxy/process.go b/proxy/process.go
index aaa44782..73efa2ee 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -668,6 +668,8 @@ func (p *Process) Logger() *LogMonitor {
 	return p.processLogger
 }
 
+var hookCommandTimeout = 30 * time.Second
+
 // runHookCommand executes a hook command, logging its output through the
 // process logger. The command inherits the environment of the upstream process.
 func (p *Process) runHookCommand(hookCmd string) error {
@@ -676,7 +678,10 @@ func (p *Process) runHookCommand(hookCmd string) error {
 		return fmt.Errorf("failed to sanitize hook command %q: %v", hookCmd, err)
 	}
 
-	cmd := exec.Command(args[0], args[1:]...)
+	ctx, cancel := context.WithTimeout(context.Background(), hookCommandTimeout)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, args[0], args[1:]...)
 	cmd.Stdout = p.processLogger
 	cmd.Stderr = p.processLogger
 	if p.cmd != nil {
@@ -684,7 +689,15 @@ func (p *Process) runHookCommand(hookCmd string) error {
 	}
 	setProcAttributes(cmd)
 
-	return cmd.Run()
+	err = cmd.Run()
+	if err != nil {
+		if ctx.Err() == context.DeadlineExceeded {
+			return fmt.Errorf("hook command timed out after %v: %w", hookCommandTimeout, err)
+		}
+		return fmt.Errorf("hook command failed: %w", err)
+	}
+
+	return nil
 }
 
 var loadingRemarks = []string{

From b82e763df74ea83a0c8755c41e8157ea05d9c98a Mon Sep 17 00:00:00 2001
From: chand1012 <chandler@chand1012.dev>
Date: Mon, 23 Mar 2026 11:56:35 -0400
Subject: [PATCH 7/7] Fix examples

---
 config.example.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index d8749796..da173eef 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -321,14 +321,14 @@ models:
     # - blocks the model from becoming ready until it completes
     # - a failure is logged as a warning but does not prevent the model from starting
     # - useful for loading saved prompt cache slots in llama.cpp
-    # afterHealthy: "curl -sf -X POST \"http://localhost:${PORT}/slots/0?action=restore&filename=my-model.cache\""
+    # afterHealthy: "curl -X POST 'http://localhost:${PORT}/slots/0?action=restore' -H 'Content-Type: application/json' -d '{\"filename\": \"slot0.bin\"}'"
 
     # beforeStop: a command to run right before the model process is killed
     # - optional, default: ""
     # - blocks the model shutdown until it completes (or fails)
     # - the model will be stopped regardless of whether this command succeeds
     # - useful for saving prompt cache slots in llama.cpp before unloading
-    # beforeStop: "curl -sf -X POST \"http://localhost:${PORT}/slots/0?action=save&filename=my-model.cache\""
+    # beforeStop: "curl -X POST 'http://localhost:${PORT}/slots/0?action=save' -H 'Content-Type: application/json' -d '{\"filename\": \"slot0.bin\"}'"
 
 # groups: a dictionary of group settings
 # - optional, default: empty dictionary