Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cmd/aigw/ai-gateway-default-resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
name: aigw-run
namespace: default
spec:
gatewayClassName: aigw-run
listeners:
Expand All @@ -54,6 +55,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1
kind: EnvoyProxy
metadata:
name: envoy-ai-gateway
namespace: default
spec:
logging:
level:
Expand All @@ -63,6 +65,7 @@ apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIGatewayRoute
metadata:
name: aigw-run
namespace: default
spec:
schema:
name: OpenAI
Expand All @@ -79,6 +82,7 @@ spec:
value: openai
backendRefs:
- name: openai
namespace: default
# Special rule to unconditionally route to the AWS Bedrock backend regardless of the model.
- matches:
- headers:
Expand All @@ -87,6 +91,7 @@ spec:
value: aws
backendRefs:
- name: aws
namespace: default
# Special rule to unconditionally route to the ollama backend regardless of the model.
- matches:
- headers:
Expand All @@ -95,6 +100,7 @@ spec:
value: ollama
backendRefs:
- name: ollama
namespace: default
# A Model-specific rule, routing to the OpenAI backend if the model is gpt-4o-mini.
- matches:
- headers:
Expand All @@ -103,6 +109,7 @@ spec:
value: gpt-4o-mini
backendRefs:
- name: openai
namespace: default
# A Model-specific rule, routing to the AWS Bedrock backend if the model is us.meta.llama3-2-1b-instruct-v1:0.
- matches:
- headers:
Expand All @@ -111,6 +118,7 @@ spec:
value: us.meta.llama3-2-1b-instruct-v1:0
backendRefs:
- name: aws
namespace: default
# A Model-specific rule, routing to the Ollama backend if the model is mistral:latest.
- matches:
- headers:
Expand All @@ -119,11 +127,13 @@ spec:
value: mistral:latest
backendRefs:
- name: ollama
namespace: default
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIServiceBackend
metadata:
name: openai
namespace: default
spec:
timeouts:
request: 3m
Expand All @@ -133,15 +143,18 @@ spec:
name: openai
kind: Backend
group: gateway.envoyproxy.io
namespace: default
backendSecurityPolicyRef:
name: openai-apikey
kind: BackendSecurityPolicy
group: aigateway.envoyproxy.io
namespace: default
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIServiceBackend
metadata:
name: aws
namespace: default
spec:
timeouts:
request: 3m
Expand All @@ -151,15 +164,18 @@ spec:
name: aws
kind: Backend
group: gateway.envoyproxy.io
namespace: default
backendSecurityPolicyRef:
name: aws-credentials
kind: BackendSecurityPolicy
group: aigateway.envoyproxy.io
namespace: default
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIServiceBackend
metadata:
name: ollama
namespace: default
spec:
timeouts:
request: 3m
Expand All @@ -169,11 +185,13 @@ spec:
name: ollama
kind: Backend
group: gateway.envoyproxy.io
namespace: default
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: BackendSecurityPolicy
metadata:
name: openai-apikey
namespace: default
spec:
type: APIKey
apiKey:
Expand All @@ -184,6 +202,7 @@ apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: BackendSecurityPolicy
metadata:
name: aws-credentials
namespace: default
spec:
type: AWSCredentials
awsCredentials:
Expand All @@ -196,6 +215,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1
kind: Backend
metadata:
name: openai
namespace: default
spec:
endpoints:
- fqdn:
Expand All @@ -206,6 +226,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1
kind: Backend
metadata:
name: aws
namespace: default
spec:
endpoints:
- fqdn:
Expand All @@ -216,6 +237,7 @@ apiVersion: gateway.envoyproxy.io/v1alpha1
kind: Backend
metadata:
name: ollama
namespace: default
spec:
endpoints:
- ip:
Expand All @@ -227,6 +249,7 @@ apiVersion: gateway.networking.k8s.io/v1alpha3
kind: BackendTLSPolicy
metadata:
name: openai-tls
namespace: default
spec:
targetRefs:
- group: 'gateway.envoyproxy.io'
Expand All @@ -240,6 +263,7 @@ apiVersion: gateway.networking.k8s.io/v1alpha3
kind: BackendTLSPolicy
metadata:
name: aws-tls
namespace: default
spec:
targetRefs:
- group: 'gateway.envoyproxy.io'
Expand All @@ -253,6 +277,7 @@ apiVersion: v1
kind: Secret
metadata:
name: openai-apikey
namespace: default
annotations:
# This will tell the CLI to replace the value of the apiKey field
# with the value of the environment variable OPENAI_API_KEY.
Expand All @@ -266,6 +291,7 @@ apiVersion: v1
kind: Secret
metadata:
name: aws-credentials
namespace: default
annotations:
# This will tell the CLI to symlink the file used via the credentials field
# to the file at ~/.aws/credentials.
Expand Down
10 changes: 10 additions & 0 deletions cmd/aigw/envoy-gateway-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,13 @@ logging:
default: error
extensionApis:
enableBackend: true
extensionManager:
hooks:
xdsTranslator:
post:
- VirtualHost
- Translation
service:
fqdn:
hostname: localhost
port: 1061
41 changes: 32 additions & 9 deletions cmd/aigw/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,21 @@ import (

egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
"github.com/envoyproxy/gateway/cmd/envoy-gateway/root"
egextension "github.com/envoyproxy/gateway/proto/extension"
"google.golang.org/grpc"
"google.golang.org/grpc/health/grpc_health_v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
"sigs.k8s.io/yaml"

"github.com/envoyproxy/ai-gateway/cmd/extproc/mainlib"
"github.com/envoyproxy/ai-gateway/filterapi"
"github.com/envoyproxy/ai-gateway/internal/extensionserver"
)

// This is the default configuration for the AI Gateway when <path> parameter is not given.
Expand Down Expand Up @@ -134,11 +139,29 @@ func run(ctx context.Context, c cmdRun, stdout, stderr io.Writer) error {
}
aiGatewayResourcesYaml = string(yamlBytes)
}
err = runCtx.writeEnvoyResourcesAndRunExtProc(ctx, aiGatewayResourcesYaml)
fakeCleint, err := runCtx.writeEnvoyResourcesAndRunExtProc(ctx, aiGatewayResourcesYaml)
if err != nil {
return err
}

lis, err := net.Listen("tcp", "localhost:1061")
if err != nil {
return fmt.Errorf("failed to listen: %w", err)
}
s := grpc.NewServer()
extSrv := extensionserver.New(fakeCleint, ctrl.Log)
egextension.RegisterEnvoyGatewayExtensionServer(s, extSrv)
grpc_health_v1.RegisterHealthServer(s, extSrv)
go func() {
<-ctx.Done()
s.GracefulStop()
}()
go func() {
if err := s.Serve(lis); err != nil {
stderrLogger.Error("Failed to run extension server", "error", err)
}
}()

// At this point, we have two things prepared:
// 1. The Envoy Gateway config in egConfigPath.
// 2. The Envoy Gateway resources in resourceYamlPath pointed by the config at egConfigPath.
Expand Down Expand Up @@ -173,22 +196,22 @@ func recreateDir(path string) error {

// writeEnvoyResourcesAndRunExtProc reads all resources from the given string, writes them to the output file, and runs
// external processes for EnvoyExtensionPolicy resources.
func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Context, original string) error {
func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Context, original string) (client.Client, error) {
aigwRoutes, aigwBackends, backendSecurityPolicies, secrets, err := collectObjects(original, runCtx.envoyGatewayResourcesOut, runCtx.stderrLogger)
if err != nil {
return fmt.Errorf("error collecting: %w", err)
return nil, fmt.Errorf("error collecting: %w", err)
}

for _, bsp := range backendSecurityPolicies {
spec := bsp.Spec
if spec.AWSCredentials != nil && spec.AWSCredentials.OIDCExchangeToken != nil {
// TODO: We can make it work by generalizing the rotation logic.
return fmt.Errorf("OIDC exchange token is not supported: %s", bsp.Name)
return nil, fmt.Errorf("OIDC exchange token is not supported: %s", bsp.Name)
}
}
_fakeClientSet, httpRoutes, extensionPolicies, httpRouteFilter, _, _, _, _, err := translateCustomResourceObjects(ctx, aigwRoutes, aigwBackends, backendSecurityPolicies, runCtx.stderrLogger)
fakeClient, _fakeClientSet, httpRoutes, extensionPolicies, httpRouteFilter, _, _, _, _, err := translateCustomResourceObjects(ctx, aigwRoutes, aigwBackends, backendSecurityPolicies, runCtx.stderrLogger)
if err != nil {
return fmt.Errorf("error translating: %w", err)
return nil, fmt.Errorf("error translating: %w", err)
}
runCtx.fakeClientSet = _fakeClientSet

Expand All @@ -204,7 +227,7 @@ func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Contex
// Store the user defined secrets in the fake client set.
for _, s := range secrets {
if _, err := runCtx.fakeClientSet.CoreV1().Secrets(s.Namespace).Create(ctx, s, metav1.CreateOptions{}); err != nil {
return fmt.Errorf("failed to create secret %s: %w", s.Name, err)
return nil, fmt.Errorf("failed to create secret %s: %w", s.Name, err)
}
}

Expand All @@ -217,15 +240,15 @@ func (runCtx *runCmdContext) writeEnvoyResourcesAndRunExtProc(ctx context.Contex
}
wd, port, filterCfg, err := runCtx.writeExtensionPolicy(ep)
if err != nil {
return err
return nil, err
}
runCtx.stderrLogger.Info("Running external process",
"policy", ep.Name, "port", port,
"working directory", wd, "config", filterCfg,
)
runCtx.mustStartExtProc(ctx, wd, port, filterCfg)
}
return nil
return fakeClient, nil
}

// writeExtensionPolicy modifies the given EnvoyExtensionPolicy to run an external process locally, writes the
Expand Down
3 changes: 1 addition & 2 deletions cmd/aigw/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ func setupDefaultAIGatewayResourcesWithAvailableCredentials(t *testing.T) (strin
}

func TestRun(t *testing.T) {
t.Skip("TODO: https://github.com/envoyproxy/gateway/pull/5767")
resourcePath, cc := setupDefaultAIGatewayResourcesWithAvailableCredentials(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
Expand Down Expand Up @@ -144,7 +143,7 @@ func TestRunCmdContext_writeEnvoyResourcesAndRunExtProc(t *testing.T) {
content, err := os.ReadFile(resourcePath)
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
err = runCtx.writeEnvoyResourcesAndRunExtProc(ctx, string(content))
_, err = runCtx.writeEnvoyResourcesAndRunExtProc(ctx, string(content))
require.NoError(t, err)
time.Sleep(1 * time.Second)
cancel()
Expand Down
5 changes: 3 additions & 2 deletions cmd/aigw/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func translate(ctx context.Context, cmd cmdTranslate, output, stderr io.Writer)
return fmt.Errorf("error translating: %w", err)
}

_, httpRoutes, extensionPolicies, httpRouteFilter, configMaps, secrets, deployments, services, err := translateCustomResourceObjects(ctx, aigwRoutes, aigwBackends, backendSecurityPolicies, stderrLogger)
_, _, httpRoutes, extensionPolicies, httpRouteFilter, configMaps, secrets, deployments, services, err := translateCustomResourceObjects(ctx, aigwRoutes, aigwBackends, backendSecurityPolicies, stderrLogger)
if err != nil {
return fmt.Errorf("error emitting: %w", err)
}
Expand Down Expand Up @@ -168,6 +168,7 @@ func translateCustomResourceObjects(
backendSecurityPolicies []*aigv1a1.BackendSecurityPolicy,
logger *slog.Logger,
) (
fakeClient client.Client,
fakeClientSet *fake2.Clientset,
httpRoutes gwapiv1.HTTPRouteList,
extensionPolicies egv1a1.EnvoyExtensionPolicyList,
Expand All @@ -187,7 +188,7 @@ func translateCustomResourceObjects(
builder = builder.WithIndex(obj, field, extractValue)
return nil
}) // Error should never happen.
fakeClient := builder.Build()
fakeClient = builder.Build()
fakeClientSet = fake2.NewClientset()

bspC := controller.NewBackendSecurityPolicyController(fakeClient, fakeClientSet, logr.Discard(),
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/aws/aws-sdk-go-v2/config v1.29.14
github.com/aws/aws-sdk-go-v2/service/sts v1.33.19
github.com/coreos/go-oidc/v3 v3.14.1
github.com/envoyproxy/gateway v1.4.0-rc.2
github.com/envoyproxy/gateway v0.5.0-rc.1.0.20250513103415-8455177501fb
github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250408134212-157c26b62099
github.com/go-logr/logr v1.4.2
github.com/google/cel-go v0.25.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,8 @@ github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtz
github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/envoyproxy/gateway v1.4.0-rc.2 h1:lnEuvTqOutOfvXDue5AYBnvfobtH7iab9ezKjABBOjc=
github.com/envoyproxy/gateway v1.4.0-rc.2/go.mod h1:ZyMCOCZOyWVZSNNS5c2fEVDp9g/JTebeW97YWrjNFZQ=
github.com/envoyproxy/gateway v0.5.0-rc.1.0.20250513103415-8455177501fb h1:XUBZGIHnz57Eo3pHrWCPZTr9wIUMrcNf5UgjZed4BqM=
github.com/envoyproxy/gateway v0.5.0-rc.1.0.20250513103415-8455177501fb/go.mod h1:6LPhGttzeXGLcu86IkwMkU5y7gK8cA9/v+fBjMIPlJk=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.13.5-0.20250408134212-157c26b62099 h1:Shucu2sY2a/KjKzO10Y0st2iN0Flzj5QsTEgMFtd0MY=
github.com/envoyproxy/go-control-plane v0.13.5-0.20250408134212-157c26b62099/go.mod h1:Kf4hNGzgvzKhoKdlSXD+IZtG55h9r2SOpO1kRKLI03o=
Expand Down