diff --git a/cmd/aigw/ai-gateway-default-resources.yaml b/cmd/aigw/ai-gateway-default-resources.yaml index 231d9f92f6..e01e375b10 100644 --- a/cmd/aigw/ai-gateway-default-resources.yaml +++ b/cmd/aigw/ai-gateway-default-resources.yaml @@ -38,6 +38,7 @@ apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: name: aigw-run + namespace: default spec: gatewayClassName: aigw-run listeners: @@ -54,6 +55,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1 kind: EnvoyProxy metadata: name: envoy-ai-gateway + namespace: default spec: logging: level: @@ -63,6 +65,7 @@ apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIGatewayRoute metadata: name: aigw-run + namespace: default spec: schema: name: OpenAI @@ -79,6 +82,7 @@ spec: value: openai backendRefs: - name: openai + namespace: default # Special rule to unconditionally route to the AWS Bedrock backend regardless of the model. - matches: - headers: @@ -87,6 +91,7 @@ spec: value: aws backendRefs: - name: aws + namespace: default # Special rule to unconditionally route to the ollama backend regardless of the model. - matches: - headers: @@ -95,6 +100,7 @@ spec: value: ollama backendRefs: - name: ollama + namespace: default # A Model-specific rule, routing to the OpenAI backend if the model is gpt-4o-mini. - matches: - headers: @@ -103,6 +109,7 @@ spec: value: gpt-4o-mini backendRefs: - name: openai + namespace: default # A Model-specific rule, routing to the AWS Bedrock backend if the model is us.meta.llama3-2-1b-instruct-v1:0. - matches: - headers: @@ -111,6 +118,7 @@ spec: value: us.meta.llama3-2-1b-instruct-v1:0 backendRefs: - name: aws + namespace: default # A Model-specific rule, routing to the Ollama backend if the model is mistral:latest. - matches: - headers: @@ -119,11 +127,13 @@ spec: value: mistral:latest backendRefs: - name: ollama + namespace: default --- apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIServiceBackend metadata: name: openai + namespace: default spec: timeouts: request: 3m @@ -133,15 +143,18 @@ spec: name: openai kind: Backend group: gateway.envoyproxy.io + namespace: default backendSecurityPolicyRef: name: openai-apikey kind: BackendSecurityPolicy group: aigateway.envoyproxy.io + namespace: default --- apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIServiceBackend metadata: name: aws + namespace: default spec: timeouts: request: 3m @@ -151,15 +164,18 @@ spec: name: aws kind: Backend group: gateway.envoyproxy.io + namespace: default backendSecurityPolicyRef: name: aws-credentials kind: BackendSecurityPolicy group: aigateway.envoyproxy.io + namespace: default --- apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIServiceBackend metadata: name: ollama + namespace: default spec: timeouts: request: 3m @@ -169,11 +185,13 @@ spec: name: ollama kind: Backend group: gateway.envoyproxy.io + namespace: default --- apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: BackendSecurityPolicy metadata: name: openai-apikey + namespace: default spec: type: APIKey apiKey: @@ -184,6 +202,7 @@ apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: BackendSecurityPolicy metadata: name: aws-credentials + namespace: default spec: type: AWSCredentials awsCredentials: @@ -196,6 +215,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1 kind: Backend metadata: name: openai + namespace: default spec: endpoints: - fqdn: @@ -206,6 +226,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1 kind: Backend metadata: name: aws + namespace: default spec: endpoints: - fqdn: @@ -216,6 +237,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1 kind: Backend metadata: name: ollama + namespace: default spec: endpoints: - ip: @@ -227,6 +249,7 @@ apiVersion: gateway.networking.k8s.io/v1alpha3 kind: BackendTLSPolicy metadata: name: openai-tls + namespace: default spec: targetRefs: - group: 'gateway.envoyproxy.io' @@ -240,6 +263,7 @@ apiVersion: gateway.networking.k8s.io/v1alpha3 kind: BackendTLSPolicy metadata: name: aws-tls + namespace: default spec: targetRefs: - group: 'gateway.envoyproxy.io' @@ -253,6 +277,7 @@ apiVersion: v1 kind: Secret metadata: name: openai-apikey + namespace: default annotations: # This will tell the CLI to replace the value of the apiKey field # with the value of the environment variable OPENAI_API_KEY. @@ -266,6 +291,7 @@ apiVersion: v1 kind: Secret metadata: name: aws-credentials + namespace: default annotations: # This will tell the CLI to symlink the file used via the credentials field # to the file at ~/.aws/credentials. diff --git a/cmd/aigw/envoy-gateway-config.yaml b/cmd/aigw/envoy-gateway-config.yaml index d51bf69f9a..a6cf7a6635 100644 --- a/cmd/aigw/envoy-gateway-config.yaml +++ b/cmd/aigw/envoy-gateway-config.yaml @@ -22,3 +22,13 @@ logging: default: error extensionApis: enableBackend: true +extensionManager: + hooks: + xdsTranslator: + post: + - VirtualHost + - Translation + service: + fqdn: + hostname: localhost + port: 1061 diff --git a/cmd/aigw/run.go b/cmd/aigw/run.go index 59cb2c22fa..8d0d42490f 100644 --- a/cmd/aigw/run.go +++ b/cmd/aigw/run.go @@ -20,16 +20,21 @@ import ( egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" "github.com/envoyproxy/gateway/cmd/envoy-gateway/root" + egextension "github.com/envoyproxy/gateway/proto/extension" + "google.golang.org/grpc" + "google.golang.org/grpc/health/grpc_health_v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/yaml" "github.com/envoyproxy/ai-gateway/cmd/extproc/mainlib" "github.com/envoyproxy/ai-gateway/filterapi" + "github.com/envoyproxy/ai-gateway/internal/extensionserver" ) // This is the default configuration for the AI Gateway when parameter is not given. @@ -134,11 +139,29 @@ func run(ctx context.Context, c cmdRun, stdout, stderr io.Writer) error { } aiGatewayResourcesYaml = string(yamlBytes) } - err = runCtx.writeEnvoyResourcesAndRunExtProc(ctx, aiGatewayResourcesYaml) + fakeCleint, err := runCtx.writeEnvoyResourcesAndRunExtProc(ctx, aiGatewayResourcesYaml) if err != nil { return err } + lis, err := net.Listen("tcp", "localhost:1061") + if err != nil { + return fmt.Errorf("failed to listen: %w", err) + } + s := grpc.NewServer() + extSrv := extensionserver.New(fakeCleint, ctrl.Log) + egextension.RegisterEnvoyGatewayExtensionServer(s, extSrv) + grpc_health_v1.RegisterHealthServer(s, extSrv) + go func() { + <-ctx.Done() + s.GracefulStop() + }() + go func() { + if err := s.Serve(lis); err != nil { + stderrLogger.Error("Failed to run extension server", "error", err) + } + }() + // At this point, we have two things prepared: // 1. The Envoy Gateway config in egConfigPath. // 2. The Envoy Gateway resources in resourceYamlPath pointed by the config at egConfigPath. @@ -173,22 +196,22 @@ func recreateDir(path string) error { // writeEnvoyResourcesAndRunExtProc reads all resources from the given string, writes them to the output file, and runs // external processes for EnvoyExtensionPolicy resources. -func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Context, original string) error { +func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Context, original string) (client.Client, error) { aigwRoutes, aigwBackends, backendSecurityPolicies, secrets, err := collectObjects(original, runCtx.envoyGatewayResourcesOut, runCtx.stderrLogger) if err != nil { - return fmt.Errorf("error collecting: %w", err) + return nil, fmt.Errorf("error collecting: %w", err) } for _, bsp := range backendSecurityPolicies { spec := bsp.Spec if spec.AWSCredentials != nil && spec.AWSCredentials.OIDCExchangeToken != nil { // TODO: We can make it work by generalizing the rotation logic. - return fmt.Errorf("OIDC exchange token is not supported: %s", bsp.Name) + return nil, fmt.Errorf("OIDC exchange token is not supported: %s", bsp.Name) } } - _fakeClientSet, httpRoutes, extensionPolicies, httpRouteFilter, _, _, _, _, err := translateCustomResourceObjects(ctx, aigwRoutes, aigwBackends, backendSecurityPolicies, runCtx.stderrLogger) + fakeClient, _fakeClientSet, httpRoutes, extensionPolicies, httpRouteFilter, _, _, _, _, err := translateCustomResourceObjects(ctx, aigwRoutes, aigwBackends, backendSecurityPolicies, runCtx.stderrLogger) if err != nil { - return fmt.Errorf("error translating: %w", err) + return nil, fmt.Errorf("error translating: %w", err) } runCtx.fakeClientSet = _fakeClientSet @@ -204,7 +227,7 @@ func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Contex // Store the user defined secrets in the fake client set. for _, s := range secrets { if _, err := runCtx.fakeClientSet.CoreV1().Secrets(s.Namespace).Create(ctx, s, metav1.CreateOptions{}); err != nil { - return fmt.Errorf("failed to create secret %s: %w", s.Name, err) + return nil, fmt.Errorf("failed to create secret %s: %w", s.Name, err) } } @@ -217,7 +240,7 @@ func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Contex } wd, port, filterCfg, err := runCtx.writeExtensionPolicy(ep) if err != nil { - return err + return nil, err } runCtx.stderrLogger.Info("Running external process", "policy", ep.Name, "port", port, @@ -225,7 +248,7 @@ func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Contex ) runCtx.mustStartExtProc(ctx, wd, port, filterCfg) } - return nil + return fakeClient, nil } // writeExtensionPolicy modifies the given EnvoyExtensionPolicy to run an external process locally, writes the diff --git a/cmd/aigw/run_test.go b/cmd/aigw/run_test.go index 056a0997bd..8dcc6287ca 100644 --- a/cmd/aigw/run_test.go +++ b/cmd/aigw/run_test.go @@ -50,7 +50,6 @@ func setupDefaultAIGatewayResourcesWithAvailableCredentials(t *testing.T) (strin } func TestRun(t *testing.T) { - t.Skip("TODO: https://github.com/envoyproxy/gateway/pull/5767") resourcePath, cc := setupDefaultAIGatewayResourcesWithAvailableCredentials(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -144,7 +143,7 @@ func TestRunCmdContext_writeEnvoyResourcesAndRunExtProc(t *testing.T) { content, err := os.ReadFile(resourcePath) require.NoError(t, err) ctx, cancel := context.WithCancel(context.Background()) - err = runCtx.writeEnvoyResourcesAndRunExtProc(ctx, string(content)) + _, err = runCtx.writeEnvoyResourcesAndRunExtProc(ctx, string(content)) require.NoError(t, err) time.Sleep(1 * time.Second) cancel() diff --git a/cmd/aigw/translate.go b/cmd/aigw/translate.go index 2177bda5bf..ea2bc5c8bf 100644 --- a/cmd/aigw/translate.go +++ b/cmd/aigw/translate.go @@ -53,7 +53,7 @@ func translate(ctx context.Context, cmd cmdTranslate, output, stderr io.Writer) return fmt.Errorf("error translating: %w", err) } - _, httpRoutes, extensionPolicies, httpRouteFilter, configMaps, secrets, deployments, services, err := translateCustomResourceObjects(ctx, aigwRoutes, aigwBackends, backendSecurityPolicies, stderrLogger) + _, _, httpRoutes, extensionPolicies, httpRouteFilter, configMaps, secrets, deployments, services, err := translateCustomResourceObjects(ctx, aigwRoutes, aigwBackends, backendSecurityPolicies, stderrLogger) if err != nil { return fmt.Errorf("error emitting: %w", err) } @@ -168,6 +168,7 @@ func translateCustomResourceObjects( backendSecurityPolicies []*aigv1a1.BackendSecurityPolicy, logger *slog.Logger, ) ( + fakeClient client.Client, fakeClientSet *fake2.Clientset, httpRoutes gwapiv1.HTTPRouteList, extensionPolicies egv1a1.EnvoyExtensionPolicyList, @@ -187,7 +188,7 @@ func translateCustomResourceObjects( builder = builder.WithIndex(obj, field, extractValue) return nil }) // Error should never happen. - fakeClient := builder.Build() + fakeClient = builder.Build() fakeClientSet = fake2.NewClientset() bspC := controller.NewBackendSecurityPolicyController(fakeClient, fakeClientSet, logr.Discard(), diff --git a/go.mod b/go.mod index 869a478352..3e17b531ff 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/aws/aws-sdk-go-v2/config v1.29.14 github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 github.com/coreos/go-oidc/v3 v3.14.1 - github.com/envoyproxy/gateway v1.4.0-rc.2 + github.com/envoyproxy/gateway v0.5.0-rc.1.0.20250513103415-8455177501fb github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250408134212-157c26b62099 github.com/go-logr/logr v1.4.2 github.com/google/cel-go v0.25.0 diff --git a/go.sum b/go.sum index 4616fd7946..2572c28ca9 100644 --- a/go.sum +++ b/go.sum @@ -328,8 +328,8 @@ github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtz github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= -github.com/envoyproxy/gateway v1.4.0-rc.2 h1:lnEuvTqOutOfvXDue5AYBnvfobtH7iab9ezKjABBOjc= -github.com/envoyproxy/gateway v1.4.0-rc.2/go.mod h1:ZyMCOCZOyWVZSNNS5c2fEVDp9g/JTebeW97YWrjNFZQ= +github.com/envoyproxy/gateway v0.5.0-rc.1.0.20250513103415-8455177501fb h1:XUBZGIHnz57Eo3pHrWCPZTr9wIUMrcNf5UgjZed4BqM= +github.com/envoyproxy/gateway v0.5.0-rc.1.0.20250513103415-8455177501fb/go.mod h1:6LPhGttzeXGLcu86IkwMkU5y7gK8cA9/v+fBjMIPlJk= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.13.5-0.20250408134212-157c26b62099 h1:Shucu2sY2a/KjKzO10Y0st2iN0Flzj5QsTEgMFtd0MY= github.com/envoyproxy/go-control-plane v0.13.5-0.20250408134212-157c26b62099/go.mod h1:Kf4hNGzgvzKhoKdlSXD+IZtG55h9r2SOpO1kRKLI03o=