ericcurtin · Copilot · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025
diff --git a/main.go b/main.go
@@ -11,6 +11,7 @@ import (
 	"syscall"
 	"time"
 
+	"github.com/docker/model-runner/pkg/anthropic"
 	"github.com/docker/model-runner/pkg/gpuinfo"
 	"github.com/docker/model-runner/pkg/inference"
 	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
@@ -183,6 +184,10 @@ func main() {
 	ollamaHandler := ollama.NewHandler(log, scheduler, nil, modelManager)
 	router.Handle(ollama.APIPrefix+"/", ollamaHandler)
 
+	// Add Anthropic Messages API compatibility layer
+	anthropicHandler := anthropic.NewHandler(log, scheduler, nil, modelManager)
+	router.Handle(anthropic.APIPrefix+"/", anthropicHandler)
+
 	// Register root handler LAST - it will only catch exact "/" requests that don't match other patterns
 	router.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
 		// Only respond to exact root path

diff --git a/pkg/anthropic/handler.go b/pkg/anthropic/handler.go
@@ -0,0 +1,171 @@
+package anthropic
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"io"
+	"net/http"
+
+	"github.com/docker/model-runner/pkg/inference"
+	"github.com/docker/model-runner/pkg/inference/models"
+	"github.com/docker/model-runner/pkg/inference/scheduling"
+	"github.com/docker/model-runner/pkg/internal/utils"
+	"github.com/docker/model-runner/pkg/logging"
+	"github.com/docker/model-runner/pkg/middleware"
+)
+
+const (
+	// APIPrefix is the prefix for Anthropic API routes.
+	// llama.cpp implements Anthropic API at /v1/messages, matching the official Anthropic API structure.
+	APIPrefix = "/anthropic"
+)
+
+// Handler implements the Anthropic Messages API compatibility layer.
+// It forwards requests to the scheduler which proxies to llama.cpp,
+// which natively supports the Anthropic Messages API format.
+type Handler struct {
+	log          logging.Logger
+	router       *http.ServeMux
+	httpHandler  http.Handler
+	modelManager *models.Manager
+	scheduler    *scheduling.Scheduler
+}
+
+// NewHandler creates a new Anthropic API handler.
+func NewHandler(log logging.Logger, scheduler *scheduling.Scheduler, allowedOrigins []string, modelManager *models.Manager) *Handler {
+	h := &Handler{
+		log:          log,
+		router:       http.NewServeMux(),
+		scheduler:    scheduler,
+		modelManager: modelManager,
+	}
+
+	// Register routes
+	for route, handler := range h.routeHandlers() {
+		h.router.HandleFunc(route, handler)
+	}
+
+	// Apply CORS middleware
+	h.httpHandler = middleware.CorsMiddleware(allowedOrigins, h.router)
+
+	return h
+}
+
+// ServeHTTP implements the http.Handler interface.
+func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+	safeMethod := utils.SanitizeForLog(r.Method, -1)
+	safePath := utils.SanitizeForLog(r.URL.Path, -1)
+	h.log.Infof("Anthropic API request: %s %s", safeMethod, safePath)
+	h.httpHandler.ServeHTTP(w, r)
+}
+
+// routeHandlers returns the mapping of routes to their handlers.
+func (h *Handler) routeHandlers() map[string]http.HandlerFunc {
+	return map[string]http.HandlerFunc{
+		// Messages API endpoint - main chat completion endpoint
+		"POST " + APIPrefix + "/v1/messages": h.handleMessages,
+		// Token counting endpoint
+		"POST " + APIPrefix + "/v1/messages/count_tokens": h.handleCountTokens,
+	}
+}
+
+// MessagesRequest represents an Anthropic Messages API request.
+// This is used to extract the model field for routing purposes.
+type MessagesRequest struct {
+	Model string `json:"model"`
+}
+
+// handleMessages handles POST /anthropic/v1/messages requests.
+// It forwards the request to the scheduler which proxies to the llama.cpp backend.
+// The llama.cpp backend natively handles the Anthropic Messages API format conversion.
+func (h *Handler) handleMessages(w http.ResponseWriter, r *http.Request) {
+	h.proxyToBackend(w, r, "/v1/messages")
+}
+
+// handleCountTokens handles POST /anthropic/v1/messages/count_tokens requests.
+// It forwards the request to the scheduler which proxies to the llama.cpp backend.
+func (h *Handler) handleCountTokens(w http.ResponseWriter, r *http.Request) {
+	h.proxyToBackend(w, r, "/v1/messages/count_tokens")
+}
+
+// proxyToBackend proxies the request to the llama.cpp backend via the scheduler.
+func (h *Handler) proxyToBackend(w http.ResponseWriter, r *http.Request, targetPath string) {
+	ctx := r.Context()
+
+	// Read the request body
+	body, err := io.ReadAll(http.MaxBytesReader(w, r.Body, 10*1024*1024)) // 10MB limit
+	if err != nil {
+		var maxBytesError *http.MaxBytesError
+		if errors.As(err, &maxBytesError) {
+			h.writeAnthropicError(w, http.StatusRequestEntityTooLarge, "request_too_large", "Request body too large")
+		} else {
+			h.writeAnthropicError(w, http.StatusInternalServerError, "internal_error", "Failed to read request body")
+		}
+		return
+	}
+
+	// Parse the model field from the request to route to the correct backend
+	var req MessagesRequest
+	if err := json.Unmarshal(body, &req); err != nil {
+		h.writeAnthropicError(w, http.StatusBadRequest, "invalid_request_error", "Invalid JSON in request body")
+		return
+	}
+
+	if req.Model == "" {
+		h.writeAnthropicError(w, http.StatusBadRequest, "invalid_request_error", "Missing required field: model")
+		return
+	}
+
+	// Normalize model name
+	modelName := models.NormalizeModelName(req.Model)
+
+	// Verify the model exists locally
+	_, err = h.modelManager.GetLocal(modelName)
+	if err != nil {
+		h.writeAnthropicError(w, http.StatusNotFound, "not_found_error", "Model not found: "+modelName)
+		return
+	}
+
+	// Create the proxied request to the inference endpoint
+	// The scheduler will route to the appropriate backend
+	newReq := r.Clone(ctx)
+	newReq.URL.Path = inference.InferencePrefix + targetPath
+	newReq.Body = io.NopCloser(bytes.NewReader(body))
+	newReq.ContentLength = int64(len(body))
+	newReq.Header.Set("Content-Type", "application/json")
+	newReq.Header.Set(inference.RequestOriginHeader, inference.OriginAnthropicMessages)
+
+	// Forward to the scheduler
+	h.scheduler.ServeHTTP(w, newReq)
+}
+
+// AnthropicError represents an error response in the Anthropic API format.
+type AnthropicError struct {
+	Type  string            `json:"type"`
+	Error AnthropicErrorObj `json:"error"`
+}
+
+// AnthropicErrorObj represents the error object in an Anthropic error response.
+type AnthropicErrorObj struct {
+	Type    string `json:"type"`
+	Message string `json:"message"`
+}
+
+// writeAnthropicError writes an error response in the Anthropic API format.
+func (h *Handler) writeAnthropicError(w http.ResponseWriter, statusCode int, errorType, message string) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(statusCode)
+
+	errResp := AnthropicError{
+		Type: "error",
+		Error: AnthropicErrorObj{
+			Type:    errorType,
+			Message: message,
+		},
+	}
+
+	if err := json.NewEncoder(w).Encode(errResp); err != nil {
+		h.log.Errorf("Failed to encode error response: %v", err)
+	}
+}
diff --git a/pkg/anthropic/handler_test.go b/pkg/anthropic/handler_test.go
@@ -0,0 +1,98 @@
+package anthropic
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+func TestWriteAnthropicError(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name       string
+		statusCode int
+		errorType  string
+		message    string
+		wantBody   string
+	}{
+		{
+			name:       "invalid request error",
+			statusCode: http.StatusBadRequest,
+			errorType:  "invalid_request_error",
+			message:    "Missing required field: model",
+			wantBody:   `{"type":"error","error":{"type":"invalid_request_error","message":"Missing required field: model"}}`,
+		},
+		{
+			name:       "not found error",
+			statusCode: http.StatusNotFound,
+			errorType:  "not_found_error",
+			message:    "Model not found: test-model",
+			wantBody:   `{"type":"error","error":{"type":"not_found_error","message":"Model not found: test-model"}}`,
+		},
+		{
+			name:       "internal error",
+			statusCode: http.StatusInternalServerError,
+			errorType:  "internal_error",
+			message:    "An internal error occurred",
+			wantBody:   `{"type":"error","error":{"type":"internal_error","message":"An internal error occurred"}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			rec := httptest.NewRecorder()
+			h := &Handler{}
+			h.writeAnthropicError(rec, tt.statusCode, tt.errorType, tt.message)
+
+			if rec.Code != tt.statusCode {
+				t.Errorf("expected status %d, got %d", tt.statusCode, rec.Code)
+			}
+
+			if contentType := rec.Header().Get("Content-Type"); contentType != "application/json" {
+				t.Errorf("expected Content-Type application/json, got %s", contentType)
+			}
+
+			body := strings.TrimSpace(rec.Body.String())
+			if body != tt.wantBody {
+				t.Errorf("expected body %s, got %s", tt.wantBody, body)
+			}
+		})
+	}
+}
+
+func TestRouteHandlers(t *testing.T) {
+	t.Parallel()
+
+	h := &Handler{
+		router: http.NewServeMux(),
+	}
+
+	routes := h.routeHandlers()
+
+	expectedRoutes := []string{
+		"POST " + APIPrefix + "/v1/messages",
+		"POST " + APIPrefix + "/v1/messages/count_tokens",
+	}
+
+	for _, route := range expectedRoutes {
+		if _, exists := routes[route]; !exists {
+			t.Errorf("expected route %s to be registered", route)
+		}
+	}
+
+	if len(routes) != len(expectedRoutes) {
+		t.Errorf("expected %d routes, got %d", len(expectedRoutes), len(routes))
+	}
+}
+
+func TestAPIPrefix(t *testing.T) {
+	t.Parallel()
+
+	if APIPrefix != "/anthropic" {
+		t.Errorf("expected APIPrefix to be /anthropic, got %s", APIPrefix)
+	}
+}
diff --git a/pkg/go-containerregistry/cmd/krane/go.mod b/pkg/go-containerregistry/cmd/krane/go.mod
@@ -7,7 +7,7 @@ replace github.com/google/go-containerregistry => ../../
 require (
 	github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.11.0
 	github.com/chrismellard/docker-credential-acr-env v0.0.0-20230304212654-82a0ddb27589
-	github.com/google/go-containerregistry v0.20.3
+	github.com/docker/model-runner/pkg/go-containerregistry v0.0.0-20251203142437-40446829248e
 )
 
 require (
@@ -38,7 +38,7 @@ require (
 	github.com/aws/smithy-go v1.23.2 // indirect
 	github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect
 	github.com/dimchansky/utfbom v1.1.1 // indirect
-	github.com/docker/cli v28.2.2+incompatible // indirect
+	github.com/docker/cli v28.3.0+incompatible // indirect
 	github.com/docker/distribution v2.8.3+incompatible // indirect
 	github.com/docker/docker-credential-helpers v0.9.4 // indirect
 	github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
@@ -50,12 +50,11 @@ require (
 	github.com/opencontainers/image-spec v1.1.1 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
-	github.com/spf13/cobra v1.9.1 // indirect
-	github.com/spf13/pflag v1.0.6 // indirect
+	github.com/spf13/cobra v1.10.1 // indirect
+	github.com/spf13/pflag v1.0.9 // indirect
 	github.com/vbatts/tar-split v0.12.1 // indirect
 	golang.org/x/crypto v0.38.0 // indirect
-	golang.org/x/oauth2 v0.30.0 // indirect
-	golang.org/x/sync v0.15.0 // indirect
-	golang.org/x/sys v0.33.0 // indirect
-	gotest.tools/v3 v3.1.0 // indirect
+	golang.org/x/oauth2 v0.31.0 // indirect
+	golang.org/x/sync v0.17.0 // indirect
+	golang.org/x/sys v0.36.0 // indirect
 )