Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ healthCheckTimeout: 60
# Valid log levels: debug, info (default), warn, error
logLevel: info

# Automatic Port Values
# use ${PORT} in model.cmd and model.proxy to use an automatic port number
# when you use ${PORT} you can omit a custom model.proxy value, as it will
# default to http://localhost:${PORT}

# override the default port (5800) for automatic port values
startPort: 10001

# define valid model values and the upstream server start
models:
"llama":
Expand All @@ -83,6 +91,7 @@ models:
- "CUDA_VISIBLE_DEVICES=0"

# where to reach the server started by cmd, make sure the ports match
# can be omitted if you use an automatic ${PORT} in cmd
proxy: http://127.0.0.1:8999

# aliases names to use this model for
Expand All @@ -109,14 +118,14 @@ models:
# but they can still be requested as normal
"qwen-unlisted":
unlisted: true
cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0

# Docker Support (v26.1.4+ required!)
"docker-llama":
proxy: "http://127.0.0.1:9790"
proxy: "http://127.0.0.1:${PORT}"
cmd: >
docker run --name dockertest
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
ghcr.io/ggerganov/llama.cpp:server
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'

Expand Down
37 changes: 37 additions & 0 deletions proxy/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"io"
"os"
"sort"
"strconv"
"strings"

"github.com/google/shlex"
Expand Down Expand Up @@ -63,6 +64,9 @@ type Config struct {

// map aliases to actual model IDs
aliases map[string]string

// automatic port assignments
StartPort int `yaml:"startPort"`
}

func (c *Config) RealModelName(search string) (string, bool) {
Expand Down Expand Up @@ -108,6 +112,14 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
config.HealthCheckTimeout = 15
}

// set default port ranges
if config.StartPort == 0 {
// default to 5800
config.StartPort = 5800
} else if config.StartPort < 1 {
return Config{}, fmt.Errorf("startPort must be greater than 1")
}

// Populate the aliases map
config.aliases = make(map[string]string)
for modelName, modelConfig := range config.Models {
Expand All @@ -119,6 +131,31 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
}
}

// iterate over the models and replace any ${PORT} with the next available port
// Get and sort all model IDs first, makes testing more consistent
modelIds := make([]string, 0, len(config.Models))
for modelId := range config.Models {
modelIds = append(modelIds, modelId)
}
sort.Strings(modelIds) // This guarantees stable iteration order

// iterate over the sorted models
nextPort := config.StartPort
for _, modelId := range modelIds {
modelConfig := config.Models[modelId]
if strings.Contains(modelConfig.Cmd, "${PORT}") {
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${PORT}", strconv.Itoa(nextPort))
if modelConfig.Proxy == "" {
modelConfig.Proxy = fmt.Sprintf("http://localhost:%d", nextPort)
} else {
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, "${PORT}", strconv.Itoa(nextPort))
}
nextPort++
config.Models[modelId] = modelConfig
} else if modelConfig.Proxy == "" {
return Config{}, fmt.Errorf("model %s requires a proxy value when not using automatic ${PORT}", modelId)
}
}
config = AddDefaultGroupToConfig(config)
// check that members are all unique in the groups
memberUsage := make(map[string]string) // maps member to group it appears in
Expand Down
86 changes: 83 additions & 3 deletions proxy/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ models:
checkEndpoint: "/"
model4:
cmd: path/to/cmd --arg1 one
proxy: "http://localhost:8082"
checkEndpoint: "/"

healthCheckTimeout: 15
Expand Down Expand Up @@ -74,6 +75,7 @@ groups:
}

expected := Config{
StartPort: 5800,
Models: map[string]ModelConfig{
"model1": {
Cmd: "path/to/cmd --arg1 one",
Expand All @@ -98,6 +100,7 @@ groups:
},
"model4": {
Cmd: "path/to/cmd --arg1 one",
Proxy: "http://localhost:8082",
CheckEndpoint: "/",
},
},
Expand Down Expand Up @@ -166,8 +169,9 @@ groups:
`
// Load the config and verify
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Equal(t, "model member model2 is used in multiple groups: group1 and group2", err.Error())

// a Contains as order of the map is not guaranteed
assert.Contains(t, err.Error(), "model member model2 is used in multiple groups:")
}

func TestConfig_ModelAliasesAreUnique(t *testing.T) {
Expand All @@ -186,10 +190,12 @@ models:
- m1
- m2
`

// Load the config and verify
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Equal(t, "duplicate alias m1 found in model: model2", err.Error())

// this is a contains because it could be `model1` or `model2` depending on the order
// go decided on the order of the map
assert.Contains(t, err.Error(), "duplicate alias m1 found in model: model")
}

func TestConfig_ModelConfigSanitizedCommand(t *testing.T) {
Expand Down Expand Up @@ -279,3 +285,77 @@ func TestConfig_SanitizeCommand(t *testing.T) {
assert.Error(t, err)
assert.Nil(t, args)
}

func TestConfig_AutomaticPortAssignments(t *testing.T) {

t.Run("Default Port Ranges", func(t *testing.T) {
content := ``
config, err := LoadConfigFromReader(strings.NewReader(content))
if !assert.NoError(t, err) {
t.Fatalf("Failed to load config: %v", err)
}

assert.Equal(t, 5800, config.StartPort)
})
t.Run("User specific port ranges", func(t *testing.T) {
content := `startPort: 1000`
config, err := LoadConfigFromReader(strings.NewReader(content))
if !assert.NoError(t, err) {
t.Fatalf("Failed to load config: %v", err)
}

assert.Equal(t, 1000, config.StartPort)
})

t.Run("Invalid start port", func(t *testing.T) {
content := `startPort: abcd`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.NotNil(t, err)
})

t.Run("start port must be greater than 1", func(t *testing.T) {
content := `startPort: -99`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.NotNil(t, err)
})

t.Run("Automatic port assignments", func(t *testing.T) {
content := `
startPort: 5800
models:
model1:
cmd: svr --port ${PORT}
model2:
cmd: svr --port ${PORT}
proxy: "http://172.11.22.33:${PORT}"
model3:
cmd: svr --port 1999
proxy: "http://1.2.3.4:1999"
`
config, err := LoadConfigFromReader(strings.NewReader(content))
if !assert.NoError(t, err) {
t.Fatalf("Failed to load config: %v", err)
}

assert.Equal(t, 5800, config.StartPort)
assert.Equal(t, "svr --port 5800", config.Models["model1"].Cmd)
assert.Equal(t, "http://localhost:5800", config.Models["model1"].Proxy)

assert.Equal(t, "svr --port 5801", config.Models["model2"].Cmd)
assert.Equal(t, "http://172.11.22.33:5801", config.Models["model2"].Proxy)

assert.Equal(t, "svr --port 1999", config.Models["model3"].Cmd)
assert.Equal(t, "http://1.2.3.4:1999", config.Models["model3"].Proxy)

})

t.Run("Proxy value required if no ${PORT} in cmd", func(t *testing.T) {
content := `
models:
model1:
cmd: svr --port 111
`
_, err := LoadConfigFromReader(strings.NewReader(content))
assert.Equal(t, "model model1 requires a proxy value when not using automatic ${PORT}", err.Error())
})
}