diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go
index 132a715ff4..06ace550b8 100644
--- a/api/v1alpha1/api.go
+++ b/api/v1alpha1/api.go
@@ -277,6 +277,16 @@ type AIGatewayRouteRuleBackendRef struct {
// +kubebuilder:validation:Minimum=0
// +kubebuilder:default=1
Weight *int32 `json:"weight,omitempty"`
+ // Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints.
+ // See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority
+ // Note: This will override the `faillback` property of the underlying Envoy Gateway Backend
+ //
+ // Default is 0.
+ //
+ // +optional
+ // +kubebuilder:validation:Minimum=0
+ // +kubebuilder:default=0
+ Priority *uint32 `json:"priority,omitempty"`
}
type AIGatewayRouteRuleMatch struct {
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 33b844d7a9..372b6fbba4 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -172,6 +172,11 @@ func (in *AIGatewayRouteRuleBackendRef) DeepCopyInto(out *AIGatewayRouteRuleBack
*out = new(int32)
**out = **in
}
+ if in.Priority != nil {
+ in, out := &in.Priority, &out.Priority
+ *out = new(uint32)
+ **out = **in
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AIGatewayRouteRuleBackendRef.
diff --git a/cmd/aigw/testdata/translate_basic.out.yaml b/cmd/aigw/testdata/translate_basic.out.yaml
index f86d03cf2a..1711a6db73 100644
--- a/cmd/aigw/testdata/translate_basic.out.yaml
+++ b/cmd/aigw/testdata/translate_basic.out.yaml
@@ -66,6 +66,8 @@ kind: HTTPRoute
metadata:
name: envoy-ai-gateway-basic
namespace: default
+ annotations:
+ gateway.envoyproxy.io/backend-ref-priority: 0:envoy-ai-gateway-basic-openai:0,1:envoy-ai-gateway-basic-aws:0,2:envoy-ai-gateway-basic-testupstream:0
ownerReferences:
- apiVersion: aigateway.envoyproxy.io/v1alpha1
blockOwnerDeletion: true
diff --git a/examples/provider_fallback/base.yaml b/examples/provider_fallback/base.yaml
index b695ae41d1..e1a747915d 100644
--- a/examples/provider_fallback/base.yaml
+++ b/examples/provider_fallback/base.yaml
@@ -42,7 +42,9 @@ spec:
value: us.meta.llama3-2-1b-instruct-v1:0
backendRefs:
- name: provider-fallback-always-failing-upstream # This is the primary backend and trying to speak TLS, which always fails.
+ priority: 0
- name: provider-fallback-aws
+ priority: 1
---
apiVersion: aigateway.envoyproxy.io/v1alpha1
kind: AIServiceBackend
@@ -80,8 +82,6 @@ metadata:
name: provider-fallback-aws
namespace: default
spec:
- # Indicate that this backend is a fallback backend, meaning that it will only be used if the primary backend fails.
- fallback: true
endpoints:
- fqdn:
hostname: bedrock-runtime.us-east-1.amazonaws.com
diff --git a/internal/controller/ai_gateway_route.go b/internal/controller/ai_gateway_route.go
index 30f06320f8..c396f05cd7 100644
--- a/internal/controller/ai_gateway_route.go
+++ b/internal/controller/ai_gateway_route.go
@@ -9,6 +9,7 @@ import (
"cmp"
"context"
"fmt"
+ "strings"
egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
"github.com/go-logr/logr"
@@ -28,12 +29,16 @@ import (
)
const (
- managedByLabel = "app.kubernetes.io/managed-by"
- selectedRouteHeaderKey = "x-ai-eg-selected-route"
- hostRewriteHTTPFilterName = "ai-eg-host-rewrite"
- aigatewayUUIDAnnotationKey = "aigateway.envoyproxy.io/uuid"
- egOwningGatewayNameLabel = "gateway.envoyproxy.io/owning-gateway-name"
- egOwningGatewayNamespaceLabel = "gateway.envoyproxy.io/owning-gateway-namespace"
+ managedByLabel = "app.kubernetes.io/managed-by"
+ selectedRouteHeaderKey = "x-ai-eg-selected-route"
+ hostRewriteHTTPFilterName = "ai-eg-host-rewrite"
+ aigatewayUUIDAnnotationKey = "aigateway.envoyproxy.io/uuid"
+ // We use this annotation to ensure that Envoy Gateway reconciles the HTTPRoute when the backend refs change.
+ // This will result in metadata being added to the underling Envoy route
+ // @see https://gateway.envoyproxy.io/contributions/design/metadata/
+ httpRouteBackendRefPriorityAnnotationKey = "gateway.envoyproxy.io/backend-ref-priority"
+ egOwningGatewayNameLabel = "gateway.envoyproxy.io/owning-gateway-name"
+ egOwningGatewayNamespaceLabel = "gateway.envoyproxy.io/owning-gateway-namespace"
// apiKeyInSecret is the key to store OpenAI API key.
apiKeyInSecret = "apiKey"
)
@@ -227,6 +232,12 @@ func (c *AIGatewayRouteController) newHTTPRoute(ctx context.Context, dst *gwapiv
dst.Spec.Rules = rules
+ if dst.ObjectMeta.Annotations == nil {
+ dst.ObjectMeta.Annotations = make(map[string]string)
+ }
+ // HACK: We need to set an annotation so that Envoy Gateway reconciles the HTTPRoute when the backend refs change.
+ dst.ObjectMeta.Annotations[httpRouteBackendRefPriorityAnnotationKey] = buildPriorityAnnotation(aiGatewayRoute.Spec.Rules)
+
targetRefs := aiGatewayRoute.Spec.TargetRefs
egNs := gwapiv1.Namespace(aiGatewayRoute.Namespace)
parentRefs := make([]gwapiv1.ParentReference, len(targetRefs))
@@ -276,3 +287,19 @@ func (c *AIGatewayRouteController) updateAIGatewayRouteStatus(ctx context.Contex
c.logger.Error(err, "failed to update AIGatewayRoute status")
}
}
+
+// Build an annotation that contains the priority of each backend ref. This is used to ensure Envoy Gateway reconciles the
+// HTTP route when the priorities change.
+func buildPriorityAnnotation(rules []aigv1a1.AIGatewayRouteRule) string {
+ priorities := make([]string, 0, len(rules))
+ for i, rule := range rules {
+ for _, br := range rule.BackendRefs {
+ var priority uint32
+ if br.Priority != nil {
+ priority = *br.Priority
+ }
+ priorities = append(priorities, fmt.Sprintf("%d:%s:%d", i, br.Name, priority))
+ }
+ }
+ return strings.Join(priorities, ",")
+}
diff --git a/internal/controller/ai_gateway_route_test.go b/internal/controller/ai_gateway_route_test.go
index 83c4132b5a..6c25ae7ad5 100644
--- a/internal/controller/ai_gateway_route_test.go
+++ b/internal/controller/ai_gateway_route_test.go
@@ -176,9 +176,9 @@ func Test_newHTTPRoute(t *testing.T) {
},
{
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
- {Name: "orange", Weight: ptr.To[int32](100)},
- {Name: "apple", Weight: ptr.To[int32](100)},
- {Name: "pineapple", Weight: ptr.To[int32](100)},
+ {Name: "orange", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](0)},
+ {Name: "apple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](1)},
+ {Name: "pineapple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](2)},
},
},
{
@@ -316,3 +316,17 @@ func TestAIGatewayRouteController_updateAIGatewayRouteStatus(t *testing.T) {
require.Equal(t, "ok", updatedRoute.Status.Conditions[0].Message)
require.Equal(t, aigv1a1.ConditionTypeAccepted, updatedRoute.Status.Conditions[0].Type)
}
+
+func Test_buildPriorityAnnotation(t *testing.T) {
+ rules := []aigv1a1.AIGatewayRouteRule{
+ {
+ BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
+ {Name: "orange", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](0)},
+ {Name: "apple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](1)},
+ {Name: "pineapple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](2)},
+ },
+ },
+ }
+ annotation := buildPriorityAnnotation(rules)
+ require.Equal(t, "0:orange:0,0:apple:1,0:pineapple:2", annotation)
+}
diff --git a/internal/extensionserver/extensionserver.go b/internal/extensionserver/extensionserver.go
index 1402279f28..49b91015d1 100644
--- a/internal/extensionserver/extensionserver.go
+++ b/internal/extensionserver/extensionserver.go
@@ -187,6 +187,9 @@ func (s *Server) maybeModifyCluster(cluster *clusterv3.Cluster) {
backendRef := httpRouteRule.BackendRefs[i]
name := backendRef.Name
namespace := aigwRoute.Namespace
+ if backendRef.Priority != nil {
+ endpoints.Priority = *backendRef.Priority
+ }
// We populate the same metadata for all endpoints in the LoadAssignment.
// This is because currently, an extproc cannot retrieve the endpoint set level metadata.
for _, endpoint := range endpoints.LbEndpoints {
diff --git a/internal/extensionserver/extensionserver_test.go b/internal/extensionserver/extensionserver_test.go
index bdd7a3a15b..43507fd6a4 100644
--- a/internal/extensionserver/extensionserver_test.go
+++ b/internal/extensionserver/extensionserver_test.go
@@ -17,6 +17,7 @@ import (
"github.com/go-logr/logr"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
@@ -108,7 +109,8 @@ func Test_maybeModifyCluster(t *testing.T) {
Rules: []aigv1a1.AIGatewayRouteRule{
{
BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{
- {Name: "aaa"},
+ {Name: "aaa", Priority: ptr.To[uint32](0)},
+ {Name: "bbb", Priority: ptr.To[uint32](1)},
},
},
},
@@ -156,6 +158,11 @@ func Test_maybeModifyCluster(t *testing.T) {
{},
},
},
+ {
+ LbEndpoints: []*endpointv3.LbEndpoint{
+ {},
+ },
+ },
},
},
}
@@ -164,8 +171,10 @@ func Test_maybeModifyCluster(t *testing.T) {
s.maybeModifyCluster(cluster)
require.Empty(t, buf.String())
- require.Len(t, cluster.LoadAssignment.Endpoints, 1)
+ require.Len(t, cluster.LoadAssignment.Endpoints, 2)
require.Len(t, cluster.LoadAssignment.Endpoints[0].LbEndpoints, 1)
+ require.Equal(t, uint32(0), cluster.LoadAssignment.Endpoints[0].Priority)
+ require.Equal(t, uint32(1), cluster.LoadAssignment.Endpoints[1].Priority)
md := cluster.LoadAssignment.Endpoints[0].LbEndpoints[0].Metadata
require.NotNil(t, md)
require.Len(t, md.FilterMetadata, 1)
diff --git a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml
index fb1229a46c..30a71d4c31 100644
--- a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml
+++ b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml
@@ -302,6 +302,17 @@ spec:
description: Name is the name of the AIServiceBackend.
minLength: 1
type: string
+ priority:
+ default: 0
+ description: |-
+ Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints.
+ See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority
+ Note: This will override the `faillback` property of the underlying Envoy Gateway Backend
+
+ Default is 0.
+ format: int32
+ minimum: 0
+ type: integer
weight:
default: 1
description: |-
diff --git a/site/docs/api/api.mdx b/site/docs/api/api.mdx
index aabf71c89e..571e77a742 100644
--- a/site/docs/api/api.mdx
+++ b/site/docs/api/api.mdx
@@ -463,6 +463,12 @@ AIGatewayRouteRuleBackendRef is a reference to a backend with a weight.
required="false"
defaultValue="1"
description="Weight is the weight of the AIServiceBackend. This is exactly the same as the weight in
the BackendRef in the Gateway API. See for the details:
https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef
Default is 1."
+/>