Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions api/v1alpha1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,16 @@ type AIGatewayRouteRuleBackendRef struct {
// +kubebuilder:validation:Minimum=0
// +kubebuilder:default=1
Weight *int32 `json:"weight,omitempty"`
// Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints.
// See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority
// Note: This will override the `faillback` property of the underlying Envoy Gateway Backend
//
// Default is 0.
//
// +optional
// +kubebuilder:validation:Minimum=0
// +kubebuilder:default=0
Priority *uint32 `json:"priority,omitempty"`
}

type AIGatewayRouteRuleMatch struct {
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions cmd/aigw/testdata/translate_basic.out.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ kind: HTTPRoute
metadata:
name: envoy-ai-gateway-basic
namespace: default
annotations:
gateway.envoyproxy.io/backend-ref-priority: 0:envoy-ai-gateway-basic-openai:0,1:envoy-ai-gateway-basic-aws:0,2:envoy-ai-gateway-basic-testupstream:0
ownerReferences:
- apiVersion: aigateway.envoyproxy.io/v1alpha1
blockOwnerDeletion: true
Expand Down
4 changes: 2 additions & 2 deletions examples/provider_fallback/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ spec:
value: us.meta.llama3-2-1b-instruct-v1:0
backendRefs:
- name: provider-fallback-always-failing-upstream # This is the primary backend and trying to speak TLS, which always fails.
priority: 0
- name: provider-fallback-aws
priority: 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also delete the fallback: true below as it's overridden by this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I deleted the fallback flag below

---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIServiceBackend
Expand Down Expand Up @@ -80,8 +82,6 @@ metadata:
name: provider-fallback-aws
namespace: default
spec:
# Indicate that this backend is a fallback backend, meaning that it will only be used if the primary backend fails.
fallback: true
endpoints:
- fqdn:
hostname: bedrock-runtime.us-east-1.amazonaws.com
Expand Down
39 changes: 33 additions & 6 deletions internal/controller/ai_gateway_route.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"cmp"
"context"
"fmt"
"strings"

egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
"github.com/go-logr/logr"
Expand All @@ -28,12 +29,16 @@ import (
)

const (
managedByLabel = "app.kubernetes.io/managed-by"
selectedRouteHeaderKey = "x-ai-eg-selected-route"
hostRewriteHTTPFilterName = "ai-eg-host-rewrite"
aigatewayUUIDAnnotationKey = "aigateway.envoyproxy.io/uuid"
egOwningGatewayNameLabel = "gateway.envoyproxy.io/owning-gateway-name"
egOwningGatewayNamespaceLabel = "gateway.envoyproxy.io/owning-gateway-namespace"
managedByLabel = "app.kubernetes.io/managed-by"
selectedRouteHeaderKey = "x-ai-eg-selected-route"
hostRewriteHTTPFilterName = "ai-eg-host-rewrite"
aigatewayUUIDAnnotationKey = "aigateway.envoyproxy.io/uuid"
// We use this annotation to ensure that Envoy Gateway reconciles the HTTPRoute when the backend refs change.
// This will result in metadata being added to the underling Envoy route
// @see https://gateway.envoyproxy.io/contributions/design/metadata/
httpRouteBackendRefPriorityAnnotationKey = "gateway.envoyproxy.io/backend-ref-priority"
egOwningGatewayNameLabel = "gateway.envoyproxy.io/owning-gateway-name"
egOwningGatewayNamespaceLabel = "gateway.envoyproxy.io/owning-gateway-namespace"
// apiKeyInSecret is the key to store OpenAI API key.
apiKeyInSecret = "apiKey"
)
Expand Down Expand Up @@ -227,6 +232,12 @@ func (c *AIGatewayRouteController) newHTTPRoute(ctx context.Context, dst *gwapiv

dst.Spec.Rules = rules

if dst.ObjectMeta.Annotations == nil {
dst.ObjectMeta.Annotations = make(map[string]string)
}
// HACK: We need to set an annotation so that Envoy Gateway reconciles the HTTPRoute when the backend refs change.
dst.ObjectMeta.Annotations[httpRouteBackendRefPriorityAnnotationKey] = buildPriorityAnnotation(aiGatewayRoute.Spec.Rules)

targetRefs := aiGatewayRoute.Spec.TargetRefs
egNs := gwapiv1.Namespace(aiGatewayRoute.Namespace)
parentRefs := make([]gwapiv1.ParentReference, len(targetRefs))
Expand Down Expand Up @@ -276,3 +287,19 @@ func (c *AIGatewayRouteController) updateAIGatewayRouteStatus(ctx context.Contex
c.logger.Error(err, "failed to update AIGatewayRoute status")
}
}

// Build an annotation that contains the priority of each backend ref. This is used to ensure Envoy Gateway reconciles the
// HTTP route when the priorities change.
func buildPriorityAnnotation(rules []aigv1a1.AIGatewayRouteRule) string {
priorities := make([]string, 0, len(rules))
for i, rule := range rules {
for _, br := range rule.BackendRefs {
var priority uint32
if br.Priority != nil {
priority = *br.Priority
}
priorities = append(priorities, fmt.Sprintf("%d:%s:%d", i, br.Name, priority))
}
}
return strings.Join(priorities, ",")
}
20 changes: 17 additions & 3 deletions internal/controller/ai_gateway_route_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ func Test_newHTTPRoute(t *testing.T) {
},
{
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{Name: "orange", Weight: ptr.To[int32](100)},
{Name: "apple", Weight: ptr.To[int32](100)},
{Name: "pineapple", Weight: ptr.To[int32](100)},
{Name: "orange", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](0)},
{Name: "apple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](1)},
{Name: "pineapple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](2)},
},
},
{
Expand Down Expand Up @@ -316,3 +316,17 @@ func TestAIGatewayRouteController_updateAIGatewayRouteStatus(t *testing.T) {
require.Equal(t, "ok", updatedRoute.Status.Conditions[0].Message)
require.Equal(t, aigv1a1.ConditionTypeAccepted, updatedRoute.Status.Conditions[0].Type)
}

func Test_buildPriorityAnnotation(t *testing.T) {
rules := []aigv1a1.AIGatewayRouteRule{
{
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{Name: "orange", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](0)},
{Name: "apple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](1)},
{Name: "pineapple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](2)},
},
},
}
annotation := buildPriorityAnnotation(rules)
require.Equal(t, "0:orange:0,0:apple:1,0:pineapple:2", annotation)
}
3 changes: 3 additions & 0 deletions internal/extensionserver/extensionserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ func (s *Server) maybeModifyCluster(cluster *clusterv3.Cluster) {
backendRef := httpRouteRule.BackendRefs[i]
name := backendRef.Name
namespace := aigwRoute.Namespace
if backendRef.Priority != nil {
endpoints.Priority = *backendRef.Priority
}
// We populate the same metadata for all endpoints in the LoadAssignment.
// This is because currently, an extproc cannot retrieve the endpoint set level metadata.
for _, endpoint := range endpoints.LbEndpoints {
Expand Down
13 changes: 11 additions & 2 deletions internal/extensionserver/extensionserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/go-logr/logr"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

Expand Down Expand Up @@ -108,7 +109,8 @@ func Test_maybeModifyCluster(t *testing.T) {
Rules: []aigv1a1.AIGatewayRouteRule{
{
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
{Name: "aaa"},
{Name: "aaa", Priority: ptr.To[uint32](0)},
{Name: "bbb", Priority: ptr.To[uint32](1)},
},
},
},
Expand Down Expand Up @@ -156,6 +158,11 @@ func Test_maybeModifyCluster(t *testing.T) {
{},
},
},
{
LbEndpoints: []*endpointv3.LbEndpoint{
{},
},
},
},
},
}
Expand All @@ -164,8 +171,10 @@ func Test_maybeModifyCluster(t *testing.T) {
s.maybeModifyCluster(cluster)
require.Empty(t, buf.String())

require.Len(t, cluster.LoadAssignment.Endpoints, 1)
require.Len(t, cluster.LoadAssignment.Endpoints, 2)
require.Len(t, cluster.LoadAssignment.Endpoints[0].LbEndpoints, 1)
require.Equal(t, uint32(0), cluster.LoadAssignment.Endpoints[0].Priority)
require.Equal(t, uint32(1), cluster.LoadAssignment.Endpoints[1].Priority)
md := cluster.LoadAssignment.Endpoints[0].LbEndpoints[0].Metadata
require.NotNil(t, md)
require.Len(t, md.FilterMetadata, 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,17 @@ spec:
description: Name is the name of the AIServiceBackend.
minLength: 1
type: string
priority:
default: 0
description: |-
Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints.
See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority
Note: This will override the `faillback` property of the underlying Envoy Gateway Backend

Default is 0.
format: int32
minimum: 0
type: integer
weight:
default: 1
description: |-
Expand Down
6 changes: 6 additions & 0 deletions site/docs/api/api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,12 @@ AIGatewayRouteRuleBackendRef is a reference to a backend with a weight.
required="false"
defaultValue="1"
description="Weight is the weight of the AIServiceBackend. This is exactly the same as the weight in<br />the BackendRef in the Gateway API. See for the details:<br />https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef<br />Default is 1."
/><ApiField
name="priority"
type="integer"
required="false"
defaultValue="0"
description="Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints.<br />See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority<br />Note: This will override the `faillback` property of the underlying Envoy Gateway Backend<br />Default is 0."
/>


Expand Down