mostlygeek · mostlygeek · Aug 29, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025
diff --git a/README.md b/README.md
@@ -23,6 +23,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
 - ✅ llama-server (llama.cpp) supported endpoints:
   - `v1/rerank`, `v1/reranking`, `/rerank`
   - `/infill` - for code infilling
+  - `/completion` - for completion endpoint
 - ✅ llama-swap custom API endpoints
   - `/ui` - web UI
   - `/log` - remote log monitoring

diff --git a/misc/simple-responder/simple-responder.go b/misc/simple-responder/simple-responder.go
@@ -153,6 +153,19 @@ func main() {
 
 	})
 
+	// llama-server compatibility: /completion
+	r.POST("/completion", func(c *gin.Context) {
+		c.Header("Content-Type", "application/json")
+		c.JSON(http.StatusOK, gin.H{
+			"responseMessage": *responseMessage,
+			"usage": gin.H{
+				"completion_tokens": 10,
+				"prompt_tokens":     25,
+				"total_tokens":      35,
+			},
+		})
+	})
+
 	// issue #41
 	r.POST("/v1/audio/transcriptions", func(c *gin.Context) {
 		// Parse the multipart form

diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go
@@ -203,6 +203,9 @@ func (pm *ProxyManager) setupGinEngine() {
 	// llama-server's /infill endpoint for code infilling
 	pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
 
+	// llama-server's /completion endpoint
+	pm.ginEngine.POST("/completion", mm, pm.proxyOAIHandler)
+
 	// Support audio/speech endpoint
 	pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
 	pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)

diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go
@@ -834,6 +834,28 @@ func TestProxyManager_HealthEndpoint(t *testing.T) {
 	assert.Equal(t, "OK", rec.Body.String())
 }
 
+// Ensure the custom llama-server /completion endpoint proxies correctly
+func TestProxyManager_CompletionEndpoint(t *testing.T) {
+	config := AddDefaultGroupToConfig(Config{
+		HealthCheckTimeout: 15,
+		Models: map[string]ModelConfig{
+			"model1": getTestSimpleResponderConfig("model1"),
+		},
+		LogLevel: "error",
+	})
+
+	proxy := New(config)
+	defer proxy.StopProcesses(StopWaitForInflightRequest)
+
+	reqBody := `{"model":"model1"}`
+	req := httptest.NewRequest("POST", "/completion", bytes.NewBufferString(reqBody))
+	w := httptest.NewRecorder()
+
+	proxy.ServeHTTP(w, req)
+	assert.Equal(t, http.StatusOK, w.Code)
+	assert.Contains(t, w.Body.String(), "model1")
+}
+
 func TestProxyManager_StartupHooks(t *testing.T) {
 
 	// using real YAML as the configuration has gotten more complex